diff --git a/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp index 780a3569afdbe..c24b8553999cc 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp @@ -155,17 +155,26 @@ void ClangTidyCheck::OptionsView::store(ClangTidyOptions::OptionMap &Options, Options[NamePrefix + LocalName.str()] = Value; } -void ClangTidyCheck::OptionsView::store(ClangTidyOptions::OptionMap &Options, - StringRef LocalName, - int64_t Value) const { +void ClangTidyCheck::OptionsView::storeInt(ClangTidyOptions::OptionMap &Options, + StringRef LocalName, + int64_t Value) const { store(Options, LocalName, llvm::itostr(Value)); } -llvm::Expected ClangTidyCheck::OptionsView::getEnumInt( - StringRef LocalName, ArrayRef> Mapping, - bool CheckGlobal, bool IgnoreCase) { - auto Iter = CheckGlobal ? findPriorityOption(CheckOptions, NamePrefix, LocalName) - : CheckOptions.find((NamePrefix + LocalName).str()); +template <> +void ClangTidyCheck::OptionsView::store( + ClangTidyOptions::OptionMap &Options, StringRef LocalName, + bool Value) const { + store(Options, LocalName, Value ? StringRef("true") : StringRef("false")); +} + +llvm::Expected +ClangTidyCheck::OptionsView::getEnumInt(StringRef LocalName, + ArrayRef Mapping, + bool CheckGlobal, bool IgnoreCase) { + auto Iter = CheckGlobal + ? findPriorityOption(CheckOptions, NamePrefix, LocalName) + : CheckOptions.find((NamePrefix + LocalName).str()); if (Iter == CheckOptions.end()) return llvm::make_error((NamePrefix + LocalName).str()); @@ -174,19 +183,19 @@ llvm::Expected ClangTidyCheck::OptionsView::getEnumInt( unsigned EditDistance = -1; for (const auto &NameAndEnum : Mapping) { if (IgnoreCase) { - if (Value.equals_lower(NameAndEnum.first)) - return NameAndEnum.second; - } else if (Value.equals(NameAndEnum.first)) { - return NameAndEnum.second; - } else if (Value.equals_lower(NameAndEnum.first)) { - Closest = NameAndEnum.first; + if (Value.equals_lower(NameAndEnum.second)) + return NameAndEnum.first; + } else if (Value.equals(NameAndEnum.second)) { + return NameAndEnum.first; + } else if (Value.equals_lower(NameAndEnum.second)) { + Closest = NameAndEnum.second; EditDistance = 0; continue; } - unsigned Distance = Value.edit_distance(NameAndEnum.first); + unsigned Distance = Value.edit_distance(NameAndEnum.second); if (Distance < EditDistance) { EditDistance = Distance; - Closest = NameAndEnum.first; + Closest = NameAndEnum.second; } } if (EditDistance < 3) diff --git a/clang-tools-extra/clang-tidy/ClangTidyCheck.h b/clang-tools-extra/clang-tidy/ClangTidyCheck.h index dfe01a8aaa30f..54b7251267524 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyCheck.h +++ b/clang-tools-extra/clang-tidy/ClangTidyCheck.h @@ -26,6 +26,13 @@ class SourceManager; namespace tidy { +/// This class should be specialized by any enum type that needs to be converted +/// to and from an \ref llvm::StringRef. +template struct OptionEnumMapping { + // Specializations of this struct must implement this function. + static ArrayRef> getEnumMapping() = delete; +}; + template class OptionError : public llvm::ErrorInfo { std::error_code convertToErrorCode() const override { return llvm::inconvertibleErrorCode(); @@ -312,36 +319,38 @@ class ClangTidyCheck : public ast_matchers::MatchFinder::MatchCallback { } /// Read a named option from the ``Context`` and parse it as an - /// enum type ``T`` using the \p Mapping provided. If \p IgnoreCase is set, - /// it will search the mapping ignoring the case. + /// enum type ``T``. /// /// Reads the option with the check-local name \p LocalName from the /// ``CheckOptions``. If the corresponding key is not present, returns a /// ``MissingOptionError``. If the key can't be parsed as a ``T`` returns a /// ``UnparseableEnumOptionError``. + /// + /// \ref clang::tidy::OptionEnumMapping must be specialized for ``T`` to + /// supply the mapping required to convert between ``T`` and a string. template std::enable_if_t::value, llvm::Expected> - get(StringRef LocalName, ArrayRef> Mapping, - bool IgnoreCase = false) { - if (llvm::Expected ValueOr = getEnumInt( - LocalName, typeEraseMapping(Mapping), false, IgnoreCase)) + get(StringRef LocalName, bool IgnoreCase = false) { + if (llvm::Expected ValueOr = + getEnumInt(LocalName, typeEraseMapping(), false, IgnoreCase)) return static_cast(*ValueOr); else return std::move(ValueOr.takeError()); } /// Read a named option from the ``Context`` and parse it as an - /// enum type ``T`` using the \p Mapping provided. If \p IgnoreCase is set, - /// it will search the mapping ignoring the case. + /// enum type ``T``. /// /// Reads the option with the check-local name \p LocalName from the /// ``CheckOptions``. If the corresponding key is not present or it can't be /// parsed as a ``T``, returns \p Default. + /// + /// \ref clang::tidy::OptionEnumMapping must be specialized for ``T`` to + /// supply the mapping required to convert between ``T`` and a string. template std::enable_if_t::value, T> - get(StringRef LocalName, ArrayRef> Mapping, - T Default, bool IgnoreCase = false) { - if (auto ValueOr = get(LocalName, Mapping, IgnoreCase)) + get(StringRef LocalName, T Default, bool IgnoreCase = false) { + if (auto ValueOr = get(LocalName, IgnoreCase)) return *ValueOr; else logErrToStdErr(ValueOr.takeError()); @@ -349,40 +358,41 @@ class ClangTidyCheck : public ast_matchers::MatchFinder::MatchCallback { } /// Read a named option from the ``Context`` and parse it as an - /// enum type ``T`` using the \p Mapping provided. If \p IgnoreCase is set, - /// it will search the mapping ignoring the case. + /// enum type ``T``. /// /// Reads the option with the check-local name \p LocalName from local or /// global ``CheckOptions``. Gets local option first. If local is not /// present, falls back to get global option. If global option is not /// present either, returns a ``MissingOptionError``. If the key can't be /// parsed as a ``T`` returns a ``UnparseableEnumOptionError``. + /// + /// \ref clang::tidy::OptionEnumMapping must be specialized for ``T`` to + /// supply the mapping required to convert between ``T`` and a string. template std::enable_if_t::value, llvm::Expected> getLocalOrGlobal(StringRef LocalName, - ArrayRef> Mapping, bool IgnoreCase = false) { - if (llvm::Expected ValueOr = getEnumInt( - LocalName, typeEraseMapping(Mapping), true, IgnoreCase)) + if (llvm::Expected ValueOr = + getEnumInt(LocalName, typeEraseMapping(), true, IgnoreCase)) return static_cast(*ValueOr); else return std::move(ValueOr.takeError()); } /// Read a named option from the ``Context`` and parse it as an - /// enum type ``T`` using the \p Mapping provided. If \p IgnoreCase is set, - /// it will search the mapping ignoring the case. + /// enum type ``T``. /// /// Reads the option with the check-local name \p LocalName from local or /// global ``CheckOptions``. Gets local option first. If local is not /// present, falls back to get global option. If global option is not /// present either or it can't be parsed as a ``T``, returns \p Default. + /// + /// \ref clang::tidy::OptionEnumMapping must be specialized for ``T`` to + /// supply the mapping required to convert between ``T`` and a string. template std::enable_if_t::value, T> - getLocalOrGlobal(StringRef LocalName, - ArrayRef> Mapping, T Default, - bool IgnoreCase = false) { - if (auto ValueOr = getLocalOrGlobal(LocalName, Mapping, IgnoreCase)) + getLocalOrGlobal(StringRef LocalName, T Default, bool IgnoreCase = false) { + if (auto ValueOr = getLocalOrGlobal(LocalName, IgnoreCase)) return *ValueOr; else logErrToStdErr(ValueOr.takeError()); @@ -395,26 +405,34 @@ class ClangTidyCheck : public ast_matchers::MatchFinder::MatchCallback { StringRef Value) const; /// Stores an option with the check-local name \p LocalName with - /// ``int64_t`` value \p Value to \p Options. - void store(ClangTidyOptions::OptionMap &Options, StringRef LocalName, - int64_t Value) const; + /// integer value \p Value to \p Options. + template + std::enable_if_t::value> + store(ClangTidyOptions::OptionMap &Options, StringRef LocalName, + T Value) const { + storeInt(Options, LocalName, Value); + } /// Stores an option with the check-local name \p LocalName as the string - /// representation of the Enum \p Value using the \p Mapping to \p Options. + /// representation of the Enum \p Value to \p Options. + /// + /// \ref clang::tidy::OptionEnumMapping must be specialized for ``T`` to + /// supply the mapping required to convert between ``T`` and a string. template std::enable_if_t::value> - store(ClangTidyOptions::OptionMap &Options, StringRef LocalName, T Value, - ArrayRef> Mapping) { + store(ClangTidyOptions::OptionMap &Options, StringRef LocalName, T Value) { + ArrayRef> Mapping = + OptionEnumMapping::getEnumMapping(); auto Iter = llvm::find_if( - Mapping, [&](const std::pair &NameAndEnum) { - return NameAndEnum.second == Value; + Mapping, [&](const std::pair &NameAndEnum) { + return NameAndEnum.first == Value; }); assert(Iter != Mapping.end() && "Unknown Case Value"); - store(Options, LocalName, Iter->first); + store(Options, LocalName, Iter->second); } private: - using NameAndValue = std::pair; + using NameAndValue = std::pair; llvm::Expected getEnumInt(StringRef LocalName, ArrayRef Mapping, @@ -422,16 +440,21 @@ class ClangTidyCheck : public ast_matchers::MatchFinder::MatchCallback { template std::enable_if_t::value, std::vector> - typeEraseMapping(ArrayRef> Mapping) { + typeEraseMapping() { + ArrayRef> Mapping = + OptionEnumMapping::getEnumMapping(); std::vector Result; Result.reserve(Mapping.size()); for (auto &MappedItem : Mapping) { - Result.emplace_back(MappedItem.first, - static_cast(MappedItem.second)); + Result.emplace_back(static_cast(MappedItem.first), + MappedItem.second); } return Result; } + void storeInt(ClangTidyOptions::OptionMap &Options, StringRef LocalName, + int64_t Value) const; + static void logErrToStdErr(llvm::Error &&Err); std::string NamePrefix; @@ -493,6 +516,13 @@ template <> bool ClangTidyCheck::OptionsView::getLocalOrGlobal(StringRef LocalName, bool Default) const; +/// Stores an option with the check-local name \p LocalName with +/// bool value \p Value to \p Options. +template <> +void ClangTidyCheck::OptionsView::store( + ClangTidyOptions::OptionMap &Options, StringRef LocalName, + bool Value) const; + } // namespace tidy } // namespace clang diff --git a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp index df4dbd5ff180d..11bbcbcb527f5 100644 --- a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp @@ -27,7 +27,6 @@ StringFindStartswithCheck::StringFindStartswithCheck(StringRef Name, StringLikeClasses(utils::options::parseStringList( Options.get("StringLikeClasses", "::std::basic_string"))), IncludeStyle(Options.getLocalOrGlobal("IncludeStyle", - utils::IncludeSorter::getMapping(), utils::IncludeSorter::IS_LLVM)), AbseilStringsMatchHeader( Options.get("AbseilStringsMatchHeader", "absl/strings/match.h")) {} @@ -122,8 +121,7 @@ void StringFindStartswithCheck::storeOptions( ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "StringLikeClasses", utils::options::serializeStringList(StringLikeClasses)); - Options.store(Opts, "IncludeStyle", IncludeStyle, - utils::IncludeSorter::getMapping()); + Options.store(Opts, "IncludeStyle", IncludeStyle); Options.store(Opts, "AbseilStringsMatchHeader", AbseilStringsMatchHeader); } diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.cpp index 2be3bc4ab3cd1..f1755d3f9b855 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.cpp @@ -27,13 +27,11 @@ InitVariablesCheck::InitVariablesCheck(StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), IncludeStyle(Options.getLocalOrGlobal("IncludeStyle", - utils::IncludeSorter::getMapping(), utils::IncludeSorter::IS_LLVM)), MathHeader(Options.get("MathHeader", "math.h")) {} void InitVariablesCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { - Options.store(Opts, "IncludeStyle", IncludeStyle, - utils::IncludeSorter::getMapping()); + Options.store(Opts, "IncludeStyle", IncludeStyle); Options.store(Opts, "MathHeader", MathHeader); } diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp index b48511287f883..dd0bedd742a40 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp @@ -22,7 +22,6 @@ ProBoundsConstantArrayIndexCheck::ProBoundsConstantArrayIndexCheck( StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), GslHeader(Options.get("GslHeader", "")), IncludeStyle(Options.getLocalOrGlobal("IncludeStyle", - utils::IncludeSorter::getMapping(), utils::IncludeSorter::IS_LLVM)) {} void ProBoundsConstantArrayIndexCheck::storeOptions( diff --git a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp index 215ba341f21fd..b90af1521baf5 100644 --- a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp @@ -28,6 +28,31 @@ using namespace llvm; namespace clang { namespace tidy { + +template <> struct OptionEnumMapping { + static llvm::ArrayRef> + getEnumMapping() { + static constexpr std::pair + Mapping[] = {{modernize::Confidence::CL_Reasonable, "reasonable"}, + {modernize::Confidence::CL_Safe, "safe"}, + {modernize::Confidence::CL_Risky, "risky"}}; + return makeArrayRef(Mapping); + } +}; + +template <> struct OptionEnumMapping { + static llvm::ArrayRef< + std::pair> + getEnumMapping() { + static constexpr std::pair + Mapping[] = {{modernize::VariableNamer::NS_CamelCase, "CamelCase"}, + {modernize::VariableNamer::NS_CamelBack, "camelBack"}, + {modernize::VariableNamer::NS_LowerCase, "lower_case"}, + {modernize::VariableNamer::NS_UpperCase, "UPPER_CASE"}}; + return makeArrayRef(Mapping); + } +}; + namespace modernize { static const char LoopNameArray[] = "forLoopArray"; @@ -44,25 +69,6 @@ static const char EndVarName[] = "endVar"; static const char DerefByValueResultName[] = "derefByValueResult"; static const char DerefByRefResultName[] = "derefByRefResult"; -static ArrayRef> -getConfidenceMapping() { - static constexpr std::pair Mapping[] = { - {"reasonable", Confidence::CL_Reasonable}, - {"safe", Confidence::CL_Safe}, - {"risky", Confidence::CL_Risky}}; - return makeArrayRef(Mapping); -} - -static ArrayRef> -getStyleMapping() { - static constexpr std::pair Mapping[] = - {{"CamelCase", VariableNamer::NS_CamelCase}, - {"camelBack", VariableNamer::NS_CamelBack}, - {"lower_case", VariableNamer::NS_LowerCase}, - {"UPPER_CASE", VariableNamer::NS_UpperCase}}; - return makeArrayRef(Mapping); -} - // shared matchers static const TypeMatcher AnyType() { return anything(); } @@ -474,15 +480,13 @@ LoopConvertCheck::RangeDescriptor::RangeDescriptor() LoopConvertCheck::LoopConvertCheck(StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), TUInfo(new TUTrackingInfo), MaxCopySize(Options.get("MaxCopySize", 16ULL)), - MinConfidence(Options.get("MinConfidence", getConfidenceMapping(), - Confidence::CL_Reasonable)), - NamingStyle(Options.get("NamingStyle", getStyleMapping(), - VariableNamer::NS_CamelCase)) {} + MinConfidence(Options.get("MinConfidence", Confidence::CL_Reasonable)), + NamingStyle(Options.get("NamingStyle", VariableNamer::NS_CamelCase)) {} void LoopConvertCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "MaxCopySize", std::to_string(MaxCopySize)); - Options.store(Opts, "MinConfidence", MinConfidence, getConfidenceMapping()); - Options.store(Opts, "NamingStyle", NamingStyle, getStyleMapping()); + Options.store(Opts, "MinConfidence", MinConfidence); + Options.store(Opts, "NamingStyle", NamingStyle); } void LoopConvertCheck::registerMatchers(MatchFinder *Finder) { diff --git a/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp b/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp index e34fd7038bb86..c677043946f7f 100644 --- a/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp @@ -45,7 +45,6 @@ MakeSmartPtrCheck::MakeSmartPtrCheck(StringRef Name, ClangTidyContext *Context, StringRef MakeSmartPtrFunctionName) : ClangTidyCheck(Name, Context), IncludeStyle(Options.getLocalOrGlobal("IncludeStyle", - utils::IncludeSorter::getMapping(), utils::IncludeSorter::IS_LLVM)), MakeSmartPtrFunctionHeader( Options.get("MakeSmartPtrFunctionHeader", StdMemoryHeader)), @@ -54,8 +53,7 @@ MakeSmartPtrCheck::MakeSmartPtrCheck(StringRef Name, ClangTidyContext *Context, IgnoreMacros(Options.getLocalOrGlobal("IgnoreMacros", true)) {} void MakeSmartPtrCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { - Options.store(Opts, "IncludeStyle", IncludeStyle, - utils::IncludeSorter::getMapping()); + Options.store(Opts, "IncludeStyle", IncludeStyle); Options.store(Opts, "MakeSmartPtrFunctionHeader", MakeSmartPtrFunctionHeader); Options.store(Opts, "MakeSmartPtrFunction", MakeSmartPtrFunctionName); Options.store(Opts, "IgnoreMacros", IgnoreMacros); diff --git a/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp b/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp index ed1a1a26bb62b..b6dedfbc2b6eb 100644 --- a/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp @@ -121,13 +121,11 @@ collectParamDecls(const CXXConstructorDecl *Ctor, PassByValueCheck::PassByValueCheck(StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), IncludeStyle(Options.getLocalOrGlobal("IncludeStyle", - utils::IncludeSorter::getMapping(), utils::IncludeSorter::IS_LLVM)), ValuesOnly(Options.get("ValuesOnly", false)) {} void PassByValueCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { - Options.store(Opts, "IncludeStyle", IncludeStyle, - utils::IncludeSorter::getMapping()); + Options.store(Opts, "IncludeStyle", IncludeStyle); Options.store(Opts, "ValuesOnly", ValuesOnly); } diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.cpp b/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.cpp index 295be200bca6f..f98254dbf7c83 100644 --- a/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.cpp @@ -75,12 +75,10 @@ ReplaceAutoPtrCheck::ReplaceAutoPtrCheck(StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), IncludeStyle(Options.getLocalOrGlobal("IncludeStyle", - utils::IncludeSorter::getMapping(), utils::IncludeSorter::IS_LLVM)) {} void ReplaceAutoPtrCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { - Options.store(Opts, "IncludeStyle", IncludeStyle, - utils::IncludeSorter::getMapping()); + Options.store(Opts, "IncludeStyle", IncludeStyle); } void ReplaceAutoPtrCheck::registerMatchers(MatchFinder *Finder) { diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.cpp b/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.cpp index 9cfbd87239dce..66917df3e91d2 100644 --- a/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.cpp @@ -24,7 +24,6 @@ ReplaceRandomShuffleCheck::ReplaceRandomShuffleCheck(StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), IncludeStyle(Options.getLocalOrGlobal("IncludeStyle", - utils::IncludeSorter::getMapping(), utils::IncludeSorter::IS_LLVM)) {} void ReplaceRandomShuffleCheck::registerMatchers(MatchFinder *Finder) { @@ -52,8 +51,7 @@ void ReplaceRandomShuffleCheck::registerPPCallbacks( void ReplaceRandomShuffleCheck::storeOptions( ClangTidyOptions::OptionMap &Opts) { - Options.store(Opts, "IncludeStyle", IncludeStyle, - utils::IncludeSorter::getMapping()); + Options.store(Opts, "IncludeStyle", IncludeStyle); } void ReplaceRandomShuffleCheck::check(const MatchFinder::MatchResult &Result) { diff --git a/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.cpp b/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.cpp index d09673fa7f23f..4cbb014867c4d 100644 --- a/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.cpp @@ -24,7 +24,6 @@ MoveConstructorInitCheck::MoveConstructorInitCheck(StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), IncludeStyle(Options.getLocalOrGlobal("IncludeStyle", - utils::IncludeSorter::getMapping(), utils::IncludeSorter::IS_LLVM)) {} void MoveConstructorInitCheck::registerMatchers(MatchFinder *Finder) { @@ -97,8 +96,7 @@ void MoveConstructorInitCheck::registerPPCallbacks( } void MoveConstructorInitCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { - Options.store(Opts, "IncludeStyle", IncludeStyle, - utils::IncludeSorter::getMapping()); + Options.store(Opts, "IncludeStyle", IncludeStyle); } } // namespace performance diff --git a/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.cpp b/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.cpp index d08cec1a2c3ca..597445d0fc266 100644 --- a/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.cpp @@ -32,7 +32,6 @@ TypePromotionInMathFnCheck::TypePromotionInMathFnCheck( StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), IncludeStyle(Options.getLocalOrGlobal("IncludeStyle", - utils::IncludeSorter::getMapping(), utils::IncludeSorter::IS_LLVM)) {} void TypePromotionInMathFnCheck::registerPPCallbacks( @@ -44,8 +43,7 @@ void TypePromotionInMathFnCheck::registerPPCallbacks( void TypePromotionInMathFnCheck::storeOptions( ClangTidyOptions::OptionMap &Opts) { - Options.store(Opts, "IncludeStyle", IncludeStyle, - utils::IncludeSorter::getMapping()); + Options.store(Opts, "IncludeStyle", IncludeStyle); } void TypePromotionInMathFnCheck::registerMatchers(MatchFinder *Finder) { diff --git a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp index 5b5f2ff994783..5de53b1840f12 100644 --- a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp @@ -69,7 +69,6 @@ UnnecessaryValueParamCheck::UnnecessaryValueParamCheck( StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), IncludeStyle(Options.getLocalOrGlobal("IncludeStyle", - utils::IncludeSorter::getMapping(), utils::IncludeSorter::IS_LLVM)), AllowedTypes( utils::options::parseStringList(Options.get("AllowedTypes", ""))) {} @@ -181,8 +180,7 @@ void UnnecessaryValueParamCheck::registerPPCallbacks( void UnnecessaryValueParamCheck::storeOptions( ClangTidyOptions::OptionMap &Opts) { - Options.store(Opts, "IncludeStyle", IncludeStyle, - utils::IncludeSorter::getMapping()); + Options.store(Opts, "IncludeStyle", IncludeStyle); Options.store(Opts, "AllowedTypes", utils::options::serializeStringList(AllowedTypes)); } diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp index 6e7fcaa4345a5..c885aac89072a 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp @@ -26,6 +26,26 @@ using namespace clang::ast_matchers; namespace clang { namespace tidy { + +llvm::ArrayRef< + std::pair> +OptionEnumMapping< + readability::IdentifierNamingCheck::CaseType>::getEnumMapping() { + static constexpr std::pair + Mapping[] = { + {readability::IdentifierNamingCheck::CT_AnyCase, "aNy_CasE"}, + {readability::IdentifierNamingCheck::CT_LowerCase, "lower_case"}, + {readability::IdentifierNamingCheck::CT_UpperCase, "UPPER_CASE"}, + {readability::IdentifierNamingCheck::CT_CamelBack, "camelBack"}, + {readability::IdentifierNamingCheck::CT_CamelCase, "CamelCase"}, + {readability::IdentifierNamingCheck::CT_CamelSnakeCase, + "Camel_Snake_Case"}, + {readability::IdentifierNamingCheck::CT_CamelSnakeBack, + "camel_Snake_Back"}}; + return llvm::makeArrayRef(Mapping); +} + namespace readability { // clang-format off @@ -99,16 +119,6 @@ static StringRef const StyleNames[] = { #undef NAMING_KEYS // clang-format on -static constexpr std::pair - Mapping[] = { - {"aNy_CasE", IdentifierNamingCheck::CT_AnyCase}, - {"lower_case", IdentifierNamingCheck::CT_LowerCase}, - {"UPPER_CASE", IdentifierNamingCheck::CT_UpperCase}, - {"camelBack", IdentifierNamingCheck::CT_CamelBack}, - {"CamelCase", IdentifierNamingCheck::CT_CamelCase}, - {"Camel_Snake_Case", IdentifierNamingCheck::CT_CamelSnakeCase}, - {"camel_Snake_Back", IdentifierNamingCheck::CT_CamelSnakeBack}}; - IdentifierNamingCheck::IdentifierNamingCheck(StringRef Name, ClangTidyContext *Context) : RenamerClangTidyCheck(Name, Context), @@ -117,7 +127,7 @@ IdentifierNamingCheck::IdentifierNamingCheck(StringRef Name, for (auto const &Name : StyleNames) { auto CaseOptional = [&]() -> llvm::Optional { - auto ValueOr = Options.get((Name + "Case").str(), makeArrayRef(Mapping)); + auto ValueOr = Options.get((Name + "Case").str()); if (ValueOr) return *ValueOr; llvm::logAllUnhandledErrors( @@ -148,7 +158,7 @@ void IdentifierNamingCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { if (NamingStyles[i]) { if (NamingStyles[i]->Case) { Options.store(Opts, (StyleNames[i] + "Case").str(), - *NamingStyles[i]->Case, llvm::makeArrayRef(Mapping)); + *NamingStyles[i]->Case); } Options.store(Opts, (StyleNames[i] + "Prefix").str(), NamingStyles[i]->Prefix); diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h index 04bf53fe16b56..0f6c77b2c9a86 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h @@ -75,6 +75,12 @@ class IdentifierNamingCheck final : public RenamerClangTidyCheck { }; } // namespace readability +template <> +struct OptionEnumMapping { + static llvm::ArrayRef< + std::pair> + getEnumMapping(); +}; } // namespace tidy } // namespace clang diff --git a/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp b/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp index f946b3a1a6f97..c9d018f076e76 100644 --- a/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp +++ b/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp @@ -175,13 +175,14 @@ Optional IncludeSorter::CreateIncludeInsertion(StringRef FileName, IncludeStmt); } -llvm::ArrayRef> -IncludeSorter::getMapping() { - static constexpr std::pair Mapping[] = - {{"llvm", IS_LLVM}, {"google", IS_Google}}; +} // namespace utils + +llvm::ArrayRef> +OptionEnumMapping::getEnumMapping() { + static constexpr std::pair + Mapping[] = {{utils::IncludeSorter::IS_LLVM, "llvm"}, + {utils::IncludeSorter::IS_Google, "google"}}; return makeArrayRef(Mapping); } - -} // namespace utils } // namespace tidy } // namespace clang diff --git a/clang-tools-extra/clang-tidy/utils/IncludeSorter.h b/clang-tools-extra/clang-tidy/utils/IncludeSorter.h index 7dab2cc536a48..1d8997364e5ce 100644 --- a/clang-tools-extra/clang-tidy/utils/IncludeSorter.h +++ b/clang-tools-extra/clang-tidy/utils/IncludeSorter.h @@ -9,7 +9,7 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_INCLUDESORTER_H #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_INCLUDESORTER_H -#include "../ClangTidy.h" +#include "../ClangTidyCheck.h" #include namespace clang { @@ -25,8 +25,6 @@ class IncludeSorter { /// Supported include styles. enum IncludeStyle { IS_LLVM = 0, IS_Google = 1 }; - static ArrayRef> getMapping(); - /// The classifications of inclusions, in the order they should be sorted. enum IncludeKinds { IK_MainTUInclude = 0, ///< e.g. ``#include "foo.h"`` when editing foo.cc @@ -66,6 +64,11 @@ class IncludeSorter { }; } // namespace utils + +template <> struct OptionEnumMapping { + static ArrayRef> + getEnumMapping(); +}; } // namespace tidy } // namespace clang #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_INCLUDESORTER_H diff --git a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp index 665fd5140ceb2..03af5dd1565f8 100644 --- a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp @@ -33,7 +33,6 @@ TransformerClangTidyCheck::TransformerClangTidyCheck( StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), Rule(MakeRule(getLangOpts(), Options)), IncludeStyle(Options.getLocalOrGlobal("IncludeStyle", - IncludeSorter::getMapping(), IncludeSorter::IS_LLVM)) { if (Rule) assert(llvm::all_of(Rule->Cases, hasExplanation) && @@ -46,7 +45,6 @@ TransformerClangTidyCheck::TransformerClangTidyCheck(RewriteRule R, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), Rule(std::move(R)), IncludeStyle(Options.getLocalOrGlobal("IncludeStyle", - IncludeSorter::getMapping(), IncludeSorter::IS_LLVM)) { assert(llvm::all_of(Rule->Cases, hasExplanation) && "clang-tidy checks must have an explanation by default;" @@ -112,8 +110,7 @@ void TransformerClangTidyCheck::check( void TransformerClangTidyCheck::storeOptions( ClangTidyOptions::OptionMap &Opts) { - Options.store(Opts, "IncludeStyle", IncludeStyle, - IncludeSorter::getMapping()); + Options.store(Opts, "IncludeStyle", IncludeStyle); } } // namespace utils diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 9eb06941e4dd3..8db6656e5291a 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -25,8 +25,13 @@ set(LLVM_LINK_COMPONENTS Support AllTargetsInfos FrontendOpenMP + Option ) +if(MSVC AND NOT CLANG_CL) + set_source_files_properties(CompileCommands.cpp PROPERTIES COMPILE_FLAGS -wd4130) # disables C4130: logical operation on address of string constant +endif() + add_clang_library(clangDaemon AST.cpp ClangdLSPServer.cpp diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index b0aba886edbe4..0408b0498488e 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -637,6 +637,8 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, ->insert( {"semanticHighlighting", llvm::json::Object{{"scopes", buildHighlightScopeLookupTable()}}}); + if (ClangdServerOpts.FoldingRanges) + Result.getObject("capabilities")->insert({"foldingRangeProvider", true}); Reply(std::move(Result)); } @@ -929,7 +931,6 @@ void ClangdLSPServer::onDocumentFormatting( static std::vector flattenSymbolHierarchy(llvm::ArrayRef Symbols, const URIForFile &FileURI) { - std::vector Results; std::function Process = [&](const DocumentSymbol &S, llvm::Optional ParentName) { @@ -968,6 +969,12 @@ void ClangdLSPServer::onDocumentSymbol(const DocumentSymbolParams &Params, }); } +void ClangdLSPServer::onFoldingRange( + const FoldingRangeParams &Params, + Callback> Reply) { + Server->foldingRanges(Params.textDocument.uri.file(), std::move(Reply)); +} + static llvm::Optional asCommand(const CodeAction &Action) { Command Cmd; if (Action.command && Action.edit) @@ -1395,6 +1402,8 @@ ClangdLSPServer::ClangdLSPServer( MsgHandler->bind("textDocument/documentLink", &ClangdLSPServer::onDocumentLink); MsgHandler->bind("textDocument/semanticTokens/full", &ClangdLSPServer::onSemanticTokens); MsgHandler->bind("textDocument/semanticTokens/full/delta", &ClangdLSPServer::onSemanticTokensDelta); + if (Opts.FoldingRanges) + MsgHandler->bind("textDocument/foldingRange", &ClangdLSPServer::onFoldingRange); // clang-format on } diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h index a779e9036c4a8..d0c0e814c6418 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.h +++ b/clang-tools-extra/clangd/ClangdLSPServer.h @@ -87,6 +87,8 @@ class ClangdLSPServer : private ClangdServer::Callbacks { // otherwise. void onDocumentSymbol(const DocumentSymbolParams &, Callback); + void onFoldingRange(const FoldingRangeParams &, + Callback>); void onCodeAction(const CodeActionParams &, Callback); void onCompletion(const CompletionParams &, Callback); void onSignatureHelp(const TextDocumentPositionParams &, diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 5d99104dadaf6..ec48556595010 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -48,6 +48,7 @@ #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include @@ -674,6 +675,18 @@ void ClangdServer::documentSymbols(llvm::StringRef File, TUScheduler::InvalidateOnUpdate); } +void ClangdServer::foldingRanges(llvm::StringRef File, + Callback> CB) { + auto Action = + [CB = std::move(CB)](llvm::Expected InpAST) mutable { + if (!InpAST) + return CB(InpAST.takeError()); + CB(clangd::getFoldingRanges(InpAST->AST)); + }; + WorkScheduler.runWithAST("foldingRanges", File, std::move(Action), + TUScheduler::InvalidateOnUpdate); +} + void ClangdServer::findReferences(PathRef File, Position Pos, uint32_t Limit, Callback CB) { auto Action = [Pos, Limit, CB = std::move(CB), @@ -750,6 +763,9 @@ Context ClangdServer::createProcessingContext(PathRef File) const { return Context::current().clone(); config::Params Params; + // Don't reread config files excessively often. + // FIXME: when we see a config file change event, use the event timestamp. + Params.FreshTime = std::chrono::steady_clock::now() - std::chrono::seconds(5); llvm::SmallString<256> PosixPath; if (!File.empty()) { assert(llvm::sys::path::is_absolute(File)); diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index ea82081f24405..3529e5050aa38 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -157,6 +157,9 @@ class ClangdServer { /// Enable notification-based semantic highlighting. bool TheiaSemanticHighlighting = false; + /// Enable preview of FoldingRanges feature. + bool FoldingRanges = false; + /// Returns true if the tweak should be enabled. std::function TweakFilter = [](const Tweak &T) { return !T.hidden(); // only enable non-hidden tweaks. @@ -246,6 +249,9 @@ class ClangdServer { void documentSymbols(StringRef File, Callback> CB); + /// Retrieve ranges that can be used to fold code within the specified file. + void foldingRanges(StringRef File, Callback> CB); + /// Retrieve locations for symbol references. void findReferences(PathRef File, Position Pos, uint32_t Limit, Callback CB); diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp index 0b27e0e3e8284..f6210a43b34eb 100644 --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -9,8 +9,12 @@ #include "CompileCommands.h" #include "Config.h" #include "support/Logger.h" +#include "clang/Driver/Options.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Tooling/ArgumentsAdjusters.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/MemoryBuffer.h" @@ -183,8 +187,7 @@ CommandMangler CommandMangler::forTests() { } void CommandMangler::adjust(std::vector &Cmd) const { - // FIXME: remove const_cast once unique_function is const-compatible. - for (auto &Edit : const_cast(Config::current()).CompileFlags.Edits) + for (auto &Edit : Config::current().CompileFlags.Edits) Edit(Cmd); // Check whether the flag exists, either as -flag or -flag=* @@ -235,5 +238,270 @@ CommandMangler::operator clang::tooling::ArgumentsAdjuster() && { }; } +// ArgStripper implementation +namespace { + +// Determine total number of args consumed by this option. +// Return answers for {Exact, Prefix} match. 0 means not allowed. +std::pair getArgCount(const llvm::opt::Option &Opt) { + constexpr static unsigned Rest = 10000; // Should be all the rest! + // Reference is llvm::opt::Option::acceptInternal() + using llvm::opt::Option; + switch (Opt.getKind()) { + case Option::FlagClass: + return {1, 0}; + case Option::JoinedClass: + case Option::CommaJoinedClass: + return {1, 1}; + case Option::GroupClass: + case Option::InputClass: + case Option::UnknownClass: + case Option::ValuesClass: + return {1, 0}; + case Option::JoinedAndSeparateClass: + return {2, 2}; + case Option::SeparateClass: + return {2, 0}; + case Option::MultiArgClass: + return {1 + Opt.getNumArgs(), 0}; + case Option::JoinedOrSeparateClass: + return {2, 1}; + case Option::RemainingArgsClass: + return {Rest, 0}; + case Option::RemainingArgsJoinedClass: + return {Rest, Rest}; + } + llvm_unreachable("Unhandled option kind"); +} + +// Flag-parsing mode, which affects which flags are available. +enum DriverMode : unsigned char { + DM_None = 0, + DM_GCC = 1, // Default mode e.g. when invoked as 'clang' + DM_CL = 2, // MS CL.exe compatible mode e.g. when invoked as 'clang-cl' + DM_CC1 = 4, // When invoked as 'clang -cc1' or after '-Xclang' + DM_All = 7 +}; + +// Examine args list to determine if we're in GCC, CL-compatible, or cc1 mode. +DriverMode getDriverMode(const std::vector &Args) { + DriverMode Mode = DM_GCC; + llvm::StringRef Argv0 = Args.front(); + if (Argv0.endswith_lower(".exe")) + Argv0 = Argv0.drop_back(strlen(".exe")); + if (Argv0.endswith_lower("cl")) + Mode = DM_CL; + for (const llvm::StringRef Arg : Args) { + if (Arg == "--driver-mode=cl") { + Mode = DM_CL; + break; + } + if (Arg == "-cc1") { + Mode = DM_CC1; + break; + } + } + return Mode; +} + +// Returns the set of DriverModes where an option may be used. +unsigned char getModes(const llvm::opt::Option &Opt) { + // Why is this so complicated?! + // Reference is clang::driver::Driver::getIncludeExcludeOptionFlagMasks() + unsigned char Result = DM_None; + if (Opt.hasFlag(driver::options::CC1Option)) + Result |= DM_CC1; + if (!Opt.hasFlag(driver::options::NoDriverOption)) { + if (Opt.hasFlag(driver::options::CLOption)) { + Result |= DM_CL; + } else { + Result |= DM_GCC; + if (Opt.hasFlag(driver::options::CoreOption)) { + Result |= DM_CL; + } + } + } + return Result; +} + +} // namespace + +llvm::ArrayRef ArgStripper::rulesFor(llvm::StringRef Arg) { + // All the hard work is done once in a static initializer. + // We compute a table containing strings to look for and #args to skip. + // e.g. "-x" => {-x 2 args, -x* 1 arg, --language 2 args, --language=* 1 arg} + using TableTy = + llvm::StringMap, llvm::BumpPtrAllocator>; + static TableTy *Table = [] { + auto &DriverTable = driver::getDriverOptTable(); + using DriverID = clang::driver::options::ID; + + // Collect sets of aliases, so we can treat -foo and -foo= as synonyms. + // Conceptually a double-linked list: PrevAlias[I] -> I -> NextAlias[I]. + // If PrevAlias[I] is INVALID, then I is canonical. + DriverID PrevAlias[DriverID::LastOption] = {DriverID::OPT_INVALID}; + DriverID NextAlias[DriverID::LastOption] = {DriverID::OPT_INVALID}; + auto AddAlias = [&](DriverID Self, DriverID T) { + if (NextAlias[T]) { + PrevAlias[NextAlias[T]] = Self; + NextAlias[Self] = NextAlias[T]; + } + PrevAlias[Self] = T; + NextAlias[T] = Self; + }; + // Also grab prefixes for each option, these are not fully exposed. + const char *const *Prefixes[DriverID::LastOption] = {nullptr}; +#define PREFIX(NAME, VALUE) static const char *const NAME[] = VALUE; +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELP, METAVAR, VALUES) \ + if (DriverID::OPT_##ALIAS != DriverID::OPT_INVALID && ALIASARGS == nullptr) \ + AddAlias(DriverID::OPT_##ID, DriverID::OPT_##ALIAS); \ + Prefixes[DriverID::OPT_##ID] = PREFIX; +#include "clang/Driver/Options.inc" +#undef OPTION +#undef PREFIX + + auto Result = std::make_unique(); + // Iterate over distinct options (represented by the canonical alias). + // Every spelling of this option will get the same set of rules. + for (unsigned ID = 1 /*Skip INVALID */; ID < DriverID::LastOption; ++ID) { + if (PrevAlias[ID] || ID == DriverID::OPT_Xclang) + continue; // Not canonical, or specially handled. + llvm::SmallVector Rules; + // Iterate over each alias, to add rules for parsing it. + for (unsigned A = ID; A != DriverID::OPT_INVALID; A = NextAlias[A]) { + if (Prefixes[A] == nullptr) // option groups. + continue; + auto Opt = DriverTable.getOption(A); + // Exclude - and -foo pseudo-options. + if (Opt.getName().empty()) + continue; + auto Modes = getModes(Opt); + std::pair ArgCount = getArgCount(Opt); + // Iterate over each spelling of the alias, e.g. -foo vs --foo. + for (auto *Prefix = Prefixes[A]; *Prefix != nullptr; ++Prefix) { + llvm::SmallString<64> Buf(*Prefix); + Buf.append(Opt.getName()); + llvm::StringRef Spelling = Result->try_emplace(Buf).first->getKey(); + Rules.emplace_back(); + Rule &R = Rules.back(); + R.Text = Spelling; + R.Modes = Modes; + R.ExactArgs = ArgCount.first; + R.PrefixArgs = ArgCount.second; + // Concrete priority is the index into the option table. + // Effectively, earlier entries take priority over later ones. + assert(ID < std::numeric_limits::max() && + "Rules::Priority overflowed by options table"); + R.Priority = ID; + } + } + // Register the set of rules under each possible name. + for (const auto &R : Rules) + Result->find(R.Text)->second.append(Rules.begin(), Rules.end()); + } +#ifndef NDEBUG + // Dump the table and various measures of its size. + unsigned RuleCount = 0; + dlog("ArgStripper Option spelling table"); + for (const auto &Entry : *Result) { + dlog("{0}", Entry.first()); + RuleCount += Entry.second.size(); + for (const auto &R : Entry.second) + dlog(" {0} #={1} *={2} Mode={3}", R.Text, R.ExactArgs, R.PrefixArgs, + int(R.Modes)); + } + dlog("Table spellings={0} rules={1} string-bytes={2}", Result->size(), + RuleCount, Result->getAllocator().getBytesAllocated()); +#endif + // The static table will never be destroyed. + return Result.release(); + }(); + + auto It = Table->find(Arg); + return (It == Table->end()) ? llvm::ArrayRef() : It->second; +} + +void ArgStripper::strip(llvm::StringRef Arg) { + auto OptionRules = rulesFor(Arg); + if (OptionRules.empty()) { + // Not a recognized flag. Strip it literally. + Storage.emplace_back(Arg); + Rules.emplace_back(); + Rules.back().Text = Storage.back(); + Rules.back().ExactArgs = 1; + if (Rules.back().Text.consume_back("*")) + Rules.back().PrefixArgs = 1; + Rules.back().Modes = DM_All; + Rules.back().Priority = -1; // Max unsigned = lowest priority. + } else { + Rules.append(OptionRules.begin(), OptionRules.end()); + } +} + +const ArgStripper::Rule *ArgStripper::matchingRule(llvm::StringRef Arg, + unsigned Mode, + unsigned &ArgCount) const { + const ArgStripper::Rule *BestRule = nullptr; + for (const Rule &R : Rules) { + // Rule can fail to match if... + if (!(R.Modes & Mode)) + continue; // not applicable to current driver mode + if (BestRule && BestRule->Priority < R.Priority) + continue; // lower-priority than best candidate. + if (!Arg.startswith(R.Text)) + continue; // current arg doesn't match the prefix string + bool PrefixMatch = Arg.size() > R.Text.size(); + // Can rule apply as an exact/prefix match? + if (unsigned Count = PrefixMatch ? R.PrefixArgs : R.ExactArgs) { + BestRule = &R; + ArgCount = Count; + } + // Continue in case we find a higher-priority rule. + } + return BestRule; +} + +void ArgStripper::process(std::vector &Args) const { + if (Args.empty()) + return; + + // We're parsing the args list in some mode (e.g. gcc-compatible) but may + // temporarily switch to another mode with the -Xclang flag. + DriverMode MainMode = getDriverMode(Args); + DriverMode CurrentMode = MainMode; + + // Read and write heads for in-place deletion. + unsigned Read = 0, Write = 0; + bool WasXclang = false; + while (Read < Args.size()) { + unsigned ArgCount = 0; + if (matchingRule(Args[Read], CurrentMode, ArgCount)) { + // Delete it and its args. + if (WasXclang) { + assert(Write > 0); + --Write; // Drop previous -Xclang arg + CurrentMode = MainMode; + WasXclang = false; + } + // Advance to last arg. An arg may be foo or -Xclang foo. + for (unsigned I = 1; Read < Args.size() && I < ArgCount; ++I) { + ++Read; + if (Read < Args.size() && Args[Read] == "-Xclang") + ++Read; + } + } else { + // No match, just copy the arg through. + WasXclang = Args[Read] == "-Xclang"; + CurrentMode = WasXclang ? DM_CC1 : MainMode; + if (Write != Read) + Args[Write] = std::move(Args[Read]); + ++Write; + } + ++Read; + } + Args.resize(Write); +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/CompileCommands.h b/clang-tools-extra/clangd/CompileCommands.h index 51a5574d13d3a..3efd80026cf6b 100644 --- a/clang-tools-extra/clangd/CompileCommands.h +++ b/clang-tools-extra/clangd/CompileCommands.h @@ -12,6 +12,7 @@ #include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/StringMap.h" +#include #include #include @@ -50,6 +51,51 @@ struct CommandMangler { Memoize> ResolvedDriversNoFollow; }; +// Removes args from a command-line in a semantically-aware way. +// +// Internally this builds a large (0.5MB) table of clang options on first use. +// Both strip() and process() are fairly cheap after that. +// +// FIXME: this reimplements much of OptTable, it might be nice to expose more. +// The table-building strategy may not make sense outside clangd. +class ArgStripper { +public: + ArgStripper() = default; + ArgStripper(ArgStripper &&) = default; + ArgStripper(const ArgStripper &) = delete; + ArgStripper &operator=(ArgStripper &&) = default; + ArgStripper &operator=(const ArgStripper &) = delete; + + // Adds the arg to the set which should be removed. + // + // Recognized clang flags are stripped semantically. When "-I" is stripped: + // - so is its value (either as -Ifoo or -I foo) + // - aliases like --include-directory=foo are also stripped + // - CL-style /Ifoo will be removed if the args indicate MS-compatible mode + // Compile args not recognized as flags are removed literally, except: + // - strip("ABC*") will remove any arg with an ABC prefix. + // + // In either case, the -Xclang prefix will be dropped if present. + void strip(llvm::StringRef Arg); + // Remove the targets from a compile command, in-place. + void process(std::vector &Args) const; + +private: + // Deletion rules, to be checked for each arg. + struct Rule { + llvm::StringRef Text; // Rule applies only if arg begins with Text. + unsigned char Modes = 0; // Rule applies only in specified driver modes. + uint16_t Priority = 0; // Lower is better. + uint16_t ExactArgs = 0; // Num args consumed when Arg == Text. + uint16_t PrefixArgs = 0; // Num args consumed when Arg starts with Text. + }; + static llvm::ArrayRef rulesFor(llvm::StringRef Arg); + const Rule *matchingRule(llvm::StringRef Arg, unsigned Mode, + unsigned &ArgCount) const; + llvm::SmallVector Rules; + std::deque Storage; // Store strings not found in option table. +}; + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/Config.h b/clang-tools-extra/clangd/Config.h index 878c9e8549b52..f8dc2df51814a 100644 --- a/clang-tools-extra/clangd/Config.h +++ b/clang-tools-extra/clangd/Config.h @@ -52,9 +52,16 @@ struct Config { /// Controls how the compile command for the current file is determined. struct { // Edits to apply to the compile command, in sequence. - // FIXME: these functions need to be const-callable. For now, const_cast. - std::vector &)>> Edits; + std::vector &) const>> + Edits; } CompileFlags; + + enum class BackgroundPolicy { Build, Skip }; + /// Controls background-index behavior. + struct { + /// Whether this TU should be indexed. + BackgroundPolicy Background = BackgroundPolicy::Build; + } Index; }; } // namespace clangd diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp index 04c0df88bbf76..9b8a48fdaf7b0 100644 --- a/clang-tools-extra/clangd/ConfigCompile.cpp +++ b/clang-tools-extra/clangd/ConfigCompile.cpp @@ -23,11 +23,14 @@ // //===----------------------------------------------------------------------===// +#include "CompileCommands.h" #include "Config.h" #include "ConfigFragment.h" #include "support/Logger.h" #include "support/Trace.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Regex.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" @@ -79,9 +82,56 @@ struct FragmentCompiler { return Result; } + // Helper with similar API to StringSwitch, for parsing enum values. + template class EnumSwitch { + FragmentCompiler &Outer; + llvm::StringRef EnumName; + const Located &Input; + llvm::Optional Result; + llvm::SmallVector ValidValues; + + public: + EnumSwitch(llvm::StringRef EnumName, const Located &In, + FragmentCompiler &Outer) + : Outer(Outer), EnumName(EnumName), Input(In) {} + + EnumSwitch &map(llvm::StringLiteral Name, T Value) { + assert(!llvm::is_contained(ValidValues, Name) && "Duplicate value!"); + ValidValues.push_back(Name); + if (!Result && *Input == Name) + Result = Value; + return *this; + } + + llvm::Optional value() { + if (!Result) + Outer.diag( + Warning, + llvm::formatv("Invalid {0} value '{1}'. Valid values are {2}.", + EnumName, *Input, llvm::join(ValidValues, ", ")) + .str(), + Input.Range); + return Result; + }; + }; + + // Attempt to parse a specified string into an enum. + // Yields llvm::None and produces a diagnostic on failure. + // + // Optional Value = compileEnum("Foo", Frag.Foo) + // .map("Foo", Enum::Foo) + // .map("Bar", Enum::Bar) + // .value(); + template + EnumSwitch compileEnum(llvm::StringRef EnumName, + const Located &In) { + return EnumSwitch(EnumName, In, *this); + } + void compile(Fragment &&F) { compile(std::move(F.If)); compile(std::move(F.CompileFlags)); + compile(std::move(F.Index)); } void compile(Fragment::IfBlock &&F) { @@ -122,6 +172,19 @@ struct FragmentCompiler { } void compile(Fragment::CompileFlagsBlock &&F) { + if (!F.Remove.empty()) { + auto Remove = std::make_shared(); + for (auto &A : F.Remove) + Remove->strip(*A); + Out.Apply.push_back([Remove(std::shared_ptr( + std::move(Remove)))](Config &C) { + C.CompileFlags.Edits.push_back( + [Remove](std::vector &Args) { + Remove->process(Args); + }); + }); + } + if (!F.Add.empty()) { std::vector Add; for (auto &A : F.Add) @@ -134,7 +197,20 @@ struct FragmentCompiler { } } + void compile(Fragment::IndexBlock &&F) { + if (F.Background) { + if (auto Val = compileEnum("Background", + **F.Background) + .map("Build", Config::BackgroundPolicy::Build) + .map("Skip", Config::BackgroundPolicy::Skip) + .value()) + Out.Apply.push_back([Val](Config &C) { C.Index.Background = *Val; }); + } + } + constexpr static llvm::SourceMgr::DiagKind Error = llvm::SourceMgr::DK_Error; + constexpr static llvm::SourceMgr::DiagKind Warning = + llvm::SourceMgr::DK_Warning; void diag(llvm::SourceMgr::DiagKind Kind, llvm::StringRef Message, llvm::SMRange Range) { if (Range.isValid() && SourceMgr != nullptr) diff --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h index 42f9ec2edc724..330f157326f21 100644 --- a/clang-tools-extra/clangd/ConfigFragment.h +++ b/clang-tools-extra/clangd/ConfigFragment.h @@ -117,9 +117,50 @@ struct Fragment { }; IfBlock If; + /// Conditions in the CompileFlags block affect how a file is parsed. + /// + /// clangd emulates how clang would interpret a file. + /// By default, it behaves roughly like `clang $FILENAME`, but real projects + /// usually require setting the include path (with the `-I` flag), defining + /// preprocessor symbols, configuring warnings etc. + /// Often, a compilation database specifies these compile commands. clangd + /// searches for compile_commands.json in parents of the source file. + /// + /// This section modifies how the compile command is constructed. struct CompileFlagsBlock { + /// List of flags to append to the compile command. std::vector> Add; - } CompileFlags; + /// List of flags to remove from the compile command. + /// + /// - If the value is a recognized clang flag (like "-I") then it will be + /// removed along with any arguments. Synonyms like --include-dir= will + /// also be removed. + /// - Otherwise, if the value ends in * (like "-DFOO=*") then any argument + /// with the prefix will be removed. + /// - Otherwise any argument exactly matching the value is removed. + /// + /// In all cases, -Xclang is also removed where needed. + /// + /// Example: + /// Command: clang++ --include-directory=/usr/include -DFOO=42 foo.cc + /// Remove: [-I, -DFOO=*] + /// Result: clang++ foo.cc + /// + /// Flags added by the same CompileFlags entry will not be removed. + std::vector> Remove; + }; + CompileFlagsBlock CompileFlags; + + /// Controls how clangd understands code outside the current file. + /// clangd's indexes provide information about symbols that isn't available + /// to clang's parser, such as incoming references. + struct IndexBlock { + /// Whether files are built in the background to produce a project index. + /// This is checked for translation units only, not headers they include. + /// Legal values are "Build" or "Skip". + llvm::Optional> Background; + }; + IndexBlock Index; }; } // namespace config diff --git a/clang-tools-extra/clangd/ConfigProvider.cpp b/clang-tools-extra/clangd/ConfigProvider.cpp index 4b466d53e2930..a56cdd755322a 100644 --- a/clang-tools-extra/clangd/ConfigProvider.cpp +++ b/clang-tools-extra/clangd/ConfigProvider.cpp @@ -11,8 +11,10 @@ #include "ConfigFragment.h" #include "support/ThreadsafeFS.h" #include "support/Trace.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Path.h" +#include #include namespace clang { @@ -22,21 +24,18 @@ namespace config { // Threadsafe cache around reading a YAML config file from disk. class FileConfigCache { std::mutex Mu; + std::chrono::steady_clock::time_point ValidTime = {}; llvm::SmallVector CachedValue; llvm::sys::TimePoint<> MTime = {}; unsigned Size = -1; - void updateCacheLocked(const llvm::vfs::Status &Stat, - llvm::vfs::FileSystem &FS, DiagnosticCallback DC) { - if (Size == Stat.getSize() && MTime == Stat.getLastModificationTime()) - return; // Already valid. - - Size = Stat.getSize(); - MTime = Stat.getLastModificationTime(); + // Called once we are sure we want to read the file. + // REQUIRES: Cache keys are set. Mutex must be held. + void fillCacheFromDisk(llvm::vfs::FileSystem &FS, DiagnosticCallback DC) { CachedValue.clear(); auto Buf = FS.getBufferForFile(Path); - // If stat() succeeds but we failed to read, don't cache failure. + // If we failed to read (but stat succeeded), don't cache failure. if (!Buf) { Size = -1; MTime = {}; @@ -68,19 +67,40 @@ class FileConfigCache { // - allow caches to be reused based on short elapsed walltime // - allow latency-sensitive operations to skip revalidating the cache void read(const ThreadsafeFS &TFS, DiagnosticCallback DC, + llvm::Optional FreshTime, std::vector &Out) { + std::lock_guard Lock(Mu); + // We're going to update the cache and return whatever's in it. + auto Return = llvm::make_scope_exit( + [&] { llvm::copy(CachedValue, std::back_inserter(Out)); }); + + // Return any sufficiently recent result without doing any further work. + if (FreshTime && ValidTime >= FreshTime) + return; + + // Ensure we bump the ValidTime at the end to allow for reuse. + auto MarkTime = llvm::make_scope_exit( + [&] { ValidTime = std::chrono::steady_clock::now(); }); + + // Stat is cheaper than opening the file, it's usually unchanged. assert(llvm::sys::path::is_absolute(Path)); auto FS = TFS.view(/*CWD=*/llvm::None); auto Stat = FS->status(Path); + // If there's no file, the result is empty. Ensure we have an invalid key. if (!Stat || !Stat->isRegularFile()) { - // No point taking the lock to clear the cache. We know what to return. - // If the file comes back we'll invalidate the cache at that point. + MTime = {}; + Size = -1; + CachedValue.clear(); return; } + // If the modified-time and size match, assume the content does too. + if (Size == Stat->getSize() && MTime == Stat->getLastModificationTime()) + return; - std::lock_guard Lock(Mu); - updateCacheLocked(*Stat, *FS, DC); - llvm::copy(CachedValue, std::back_inserter(Out)); + // OK, the file has actually changed. Update cache key, compute new value. + Size = Stat->getSize(); + MTime = Stat->getLastModificationTime(); + fillCacheFromDisk(*FS, DC); } }; @@ -93,7 +113,7 @@ std::unique_ptr Provider::fromYAMLFile(llvm::StringRef AbsPath, std::vector getFragments(const Params &P, DiagnosticCallback DC) const override { std::vector Result; - Cache.read(FS, DC, Result); + Cache.read(FS, DC, P.FreshTime, Result); return Result; }; @@ -158,7 +178,7 @@ Provider::fromAncestorRelativeYAMLFiles(llvm::StringRef RelPath, // This will take a (per-file) lock for each file that actually exists. std::vector Result; for (FileConfigCache *Cache : Caches) - Cache->read(FS, DC, Result); + Cache->read(FS, DC, P.FreshTime, Result); return Result; }; @@ -173,9 +193,9 @@ Provider::fromAncestorRelativeYAMLFiles(llvm::StringRef RelPath, } std::unique_ptr -Provider::combine(std::vector> Providers) { +Provider::combine(std::vector Providers) { struct CombinedProvider : Provider { - std::vector> Providers; + std::vector Providers; std::vector getFragments(const Params &P, DiagnosticCallback DC) const override { @@ -189,7 +209,11 @@ Provider::combine(std::vector> Providers) { }; auto Result = std::make_unique(); Result->Providers = std::move(Providers); - return Result; + // FIXME: This is a workaround for a bug in older versions of clang (< 3.9) + // The constructor that is supposed to allow for Derived to Base + // conversion does not work. Remove this if we drop support for such + // configurations. + return std::unique_ptr(Result.release()); } Config Provider::getConfig(const Params &P, DiagnosticCallback DC) const { diff --git a/clang-tools-extra/clangd/ConfigProvider.h b/clang-tools-extra/clangd/ConfigProvider.h index a773e56b3bd72..1ef33c79c1e86 100644 --- a/clang-tools-extra/clangd/ConfigProvider.h +++ b/clang-tools-extra/clangd/ConfigProvider.h @@ -20,6 +20,7 @@ #include "llvm/ADT/FunctionExtras.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" +#include #include #include @@ -34,6 +35,10 @@ struct Params { /// Absolute path to a source file we're applying the config to. Unix slashes. /// Empty if not configuring a particular file. llvm::StringRef Path; + /// Hint that stale data is OK to improve performance (e.g. avoid IO). + /// FreshTime sets a bound for how old the data can be. + /// If not set, providers should validate caches against the data source. + llvm::Optional FreshTime; }; /// Used to report problems in parsing or interpreting a config. @@ -71,8 +76,7 @@ class Provider { /// A provider that includes fragments from all the supplied providers. /// Order is preserved; later providers take precedence over earlier ones. - static std::unique_ptr - combine(std::vector>); + static std::unique_ptr combine(std::vector); /// Build a config based on this provider. Config getConfig(const Params &, DiagnosticCallback) const; diff --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp index ef6003b024392..16639f6649c2b 100644 --- a/clang-tools-extra/clangd/ConfigYAML.cpp +++ b/clang-tools-extra/clangd/ConfigYAML.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "ConfigFragment.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/MemoryBuffer.h" @@ -63,6 +64,17 @@ class Parser { if (auto Values = scalarValues(N)) F.Add = std::move(*Values); }); + Dict.handle("Remove", [&](Node &N) { + if (auto Values = scalarValues(N)) + F.Remove = std::move(*Values); + }); + Dict.parse(N); + } + + void parse(Fragment::IndexBlock &F, Node &N) { + DictParser Dict("Index", this); + Dict.handle("Background", + [&](Node &N) { F.Background = scalarValue(N, "Background"); }); Dict.parse(N); } diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp index 58e2ee1e21c77..f5d6a95aa713d 100644 --- a/clang-tools-extra/clangd/FindSymbols.cpp +++ b/clang-tools-extra/clangd/FindSymbols.cpp @@ -136,17 +136,11 @@ llvm::Optional declToSym(ASTContext &Ctx, const NamedDecl &ND) { auto &SM = Ctx.getSourceManager(); SourceLocation NameLoc = nameLocation(ND, SM); - // getFileLoc is a good choice for us, but we also need to make sure - // sourceLocToPosition won't switch files, so we call getSpellingLoc on top of - // that to make sure it does not switch files. - // FIXME: sourceLocToPosition should not switch files! SourceLocation BeginLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getBeginLoc())); SourceLocation EndLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getEndLoc())); - if (NameLoc.isInvalid() || BeginLoc.isInvalid() || EndLoc.isInvalid()) - return llvm::None; - - if (!SM.isWrittenInMainFile(NameLoc) || !SM.isWrittenInMainFile(BeginLoc) || - !SM.isWrittenInMainFile(EndLoc)) + const auto SymbolRange = + toHalfOpenFileRange(SM, Ctx.getLangOpts(), {BeginLoc, EndLoc}); + if (!SymbolRange) return llvm::None; Position NameBegin = sourceLocToPosition(SM, NameLoc); @@ -162,8 +156,8 @@ llvm::Optional declToSym(ASTContext &Ctx, const NamedDecl &ND) { SI.name = printName(Ctx, ND); SI.kind = SK; SI.deprecated = ND.isDeprecated(); - SI.range = - Range{sourceLocToPosition(SM, BeginLoc), sourceLocToPosition(SM, EndLoc)}; + SI.range = Range{sourceLocToPosition(SM, SymbolRange->getBegin()), + sourceLocToPosition(SM, SymbolRange->getEnd())}; SI.selectionRange = Range{NameBegin, NameEnd}; if (!SI.range.contains(SI.selectionRange)) { // 'selectionRange' must be contained in 'range', so in cases where clang diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp index 5e75864ec8d44..23e8c9fe716d0 100644 --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp @@ -298,15 +298,11 @@ void OverlayCDB::setCompileCommand( } llvm::Optional OverlayCDB::getProjectInfo(PathRef File) const { - { - std::lock_guard Lock(Mutex); - auto It = Commands.find(removeDots(File)); - if (It != Commands.end()) - return ProjectInfo{}; - } + // It wouldn't make much sense to treat files with overridden commands + // specially when we can't do the same for the (unknown) local headers they + // include or changing behavior mid-air after receiving an override. if (Base) return Base->getProjectInfo(File); - return llvm::None; } } // namespace clangd diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.h b/clang-tools-extra/clangd/GlobalCompilationDatabase.h index e9a5417d9d69b..95677f9f8c19a 100644 --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.h +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.h @@ -119,7 +119,6 @@ std::unique_ptr getQueryDriverDatabase(llvm::ArrayRef QueryDriverGlobs, std::unique_ptr Base); - /// Wraps another compilation database, and supports overriding the commands /// using an in-memory mapping. class OverlayCDB : public GlobalCompilationDatabase { @@ -134,6 +133,8 @@ class OverlayCDB : public GlobalCompilationDatabase { llvm::Optional getCompileCommand(PathRef File) const override; tooling::CompileCommand getFallbackCommand(PathRef File) const override; + /// Project info is gathered purely from the inner compilation database to + /// ensure consistency. llvm::Optional getProjectInfo(PathRef File) const override; /// Sets or clears the compilation command for a particular file. diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp index 2396037157853..b5dbee54f59db 100644 --- a/clang-tools-extra/clangd/Protocol.cpp +++ b/clang-tools-extra/clangd/Protocol.cpp @@ -1241,5 +1241,24 @@ llvm::json::Value toJSON(const DocumentLink &DocumentLink) { }; } +bool fromJSON(const llvm::json::Value &Params, FoldingRangeParams &R) { + llvm::json::ObjectMapper O(Params); + return O && O.map("textDocument", R.textDocument); +} + +llvm::json::Value toJSON(const FoldingRange &Range) { + llvm::json::Object Result{ + {"startLine", Range.startLine}, + {"endLine", Range.endLine}, + }; + if (Range.startCharacter) + Result["startCharacter"] = Range.startCharacter; + if (Range.endCharacter) + Result["endCharacter"] = Range.endCharacter; + if (Range.kind) + Result["kind"] = *Range.kind; + return Result; +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h index 77d402a6a9ba1..2bb23e5ddd94e 100644 --- a/clang-tools-extra/clangd/Protocol.h +++ b/clang-tools-extra/clangd/Protocol.h @@ -1510,6 +1510,23 @@ struct DocumentLink { }; llvm::json::Value toJSON(const DocumentLink &DocumentLink); +// FIXME(kirillbobyrev): Add FoldingRangeClientCapabilities so we can support +// per-line-folding editors. +struct FoldingRangeParams { + TextDocumentIdentifier textDocument; +}; +bool fromJSON(const llvm::json::Value &, FoldingRangeParams &); + +/// Stores information about a region of code that can be folded. +struct FoldingRange { + unsigned startLine = 0; + unsigned startCharacter; + unsigned endLine = 0; + unsigned endCharacter; + llvm::Optional kind; +}; +llvm::json::Value toJSON(const FoldingRange &Range); + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/Selection.cpp b/clang-tools-extra/clangd/Selection.cpp index 1db15ba6699a8..e94a3ca5a0c38 100644 --- a/clang-tools-extra/clangd/Selection.cpp +++ b/clang-tools-extra/clangd/Selection.cpp @@ -41,10 +41,13 @@ using ast_type_traits::DynTypedNode; void recordMetrics(const SelectionTree &S) { static constexpr trace::Metric SelectionUsedRecovery( "selection_recovery", trace::Metric::Distribution); + static constexpr trace::Metric RecoveryType("selection_recovery_type", + trace::Metric::Distribution); const auto *Common = S.commonAncestor(); for (const auto *N = Common; N; N = N->Parent) { - if (N->ASTNode.get()) { + if (const auto *RE = N->ASTNode.get()) { SelectionUsedRecovery.record(1); // used recovery ast. + RecoveryType.record(RE->isTypeDependent() ? 0 : 1); return; } } diff --git a/clang-tools-extra/clangd/SemanticSelection.cpp b/clang-tools-extra/clangd/SemanticSelection.cpp index a6b1ebfb83275..cfce1520cd082 100644 --- a/clang-tools-extra/clangd/SemanticSelection.cpp +++ b/clang-tools-extra/clangd/SemanticSelection.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// #include "SemanticSelection.h" +#include "FindSymbols.h" #include "ParsedAST.h" #include "Protocol.h" #include "Selection.h" @@ -18,6 +19,7 @@ namespace clang { namespace clangd { namespace { + // Adds Range \p R to the Result if it is distinct from the last added Range. // Assumes that only consecutive ranges can coincide. void addIfDistinct(const Range &R, std::vector &Result) { @@ -25,6 +27,20 @@ void addIfDistinct(const Range &R, std::vector &Result) { Result.push_back(R); } } + +// Recursively collects FoldingRange from a symbol and its children. +void collectFoldingRanges(DocumentSymbol Symbol, + std::vector &Result) { + FoldingRange Range; + Range.startLine = Symbol.range.start.line; + Range.startCharacter = Symbol.range.start.character; + Range.endLine = Symbol.range.end.line; + Range.endCharacter = Symbol.range.end.character; + Result.push_back(Range); + for (const auto &Child : Symbol.children) + collectFoldingRanges(Child, Result); +} + } // namespace llvm::Expected getSemanticRanges(ParsedAST &AST, Position Pos) { @@ -81,5 +97,24 @@ llvm::Expected getSemanticRanges(ParsedAST &AST, Position Pos) { return std::move(Head); } +// FIXME(kirillbobyrev): Collect comments, PP conditional regions, includes and +// other code regions (e.g. public/private/protected sections of classes, +// control flow statement bodies). +// Related issue: +// https://github.com/clangd/clangd/issues/310 +llvm::Expected> getFoldingRanges(ParsedAST &AST) { + // FIXME(kirillbobyrev): getDocumentSymbols() is conveniently available but + // limited (e.g. doesn't yield blocks inside functions and provides ranges for + // nodes themselves instead of their contents which is less useful). Replace + // this with a more general RecursiveASTVisitor implementation instead. + auto DocumentSymbols = getDocumentSymbols(AST); + if (!DocumentSymbols) + return DocumentSymbols.takeError(); + std::vector Result; + for (const auto &Symbol : *DocumentSymbols) + collectFoldingRanges(Symbol, Result); + return Result; +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/SemanticSelection.h b/clang-tools-extra/clangd/SemanticSelection.h index 810cc21d9a58f..2fe37871ec680 100644 --- a/clang-tools-extra/clangd/SemanticSelection.h +++ b/clang-tools-extra/clangd/SemanticSelection.h @@ -25,6 +25,10 @@ namespace clangd { /// If pos is not in any interesting range, return [Pos, Pos). llvm::Expected getSemanticRanges(ParsedAST &AST, Position Pos); +/// Returns a list of ranges whose contents might be collapsible in an editor. +/// This should include large scopes, preprocessor blocks etc. +llvm::Expected> getFoldingRanges(ParsedAST &AST); + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp index 5454b1c92c8a8..ed367005177b2 100644 --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -965,6 +965,7 @@ void ASTWorker::startTask(llvm::StringRef Name, if (RunSync) { assert(!Done && "running a task after stop()"); trace::Span Tracer(Name + ":" + llvm::sys::path::filename(FileName)); + WithContext WithProvidedContext(ContextProvider(FileName)); Task(); return; } @@ -1062,9 +1063,7 @@ void ASTWorker::run() { Status.ASTActivity.K = ASTAction::RunningAction; Status.ASTActivity.Name = CurrentRequest->Name; }); - llvm::Optional WithProvidedContext; - if (ContextProvider) - WithProvidedContext.emplace(ContextProvider(FileName)); + WithContext WithProvidedContext(ContextProvider(FileName)); CurrentRequest->Action(); } @@ -1238,6 +1237,12 @@ TUScheduler::TUScheduler(const GlobalCompilationDatabase &CDB, Barrier(Opts.AsyncThreadsCount), IdleASTs( std::make_unique(Opts.RetentionPolicy.MaxRetainedASTs)) { + // Avoid null checks everywhere. + if (!Opts.ContextProvider) { + this->Opts.ContextProvider = [](llvm::StringRef) { + return Context::current().clone(); + }; + } if (0 < Opts.AsyncThreadsCount) { PreambleTasks.emplace(); WorkerThreads.emplace(); @@ -1300,16 +1305,16 @@ llvm::StringMap TUScheduler::getAllFileContents() const { void TUScheduler::run(llvm::StringRef Name, llvm::StringRef Path, llvm::unique_function Action) { - if (!PreambleTasks) + if (!PreambleTasks) { + WithContext WithProvidedContext(Opts.ContextProvider(Path)); return Action(); + } PreambleTasks->runAsync(Name, [this, Ctx = Context::current().clone(), Path(Path.str()), Action = std::move(Action)]() mutable { std::lock_guard BarrierLock(Barrier); WithContext WC(std::move(Ctx)); - llvm::Optional WithProvidedContext; - if (Opts.ContextProvider) - WithProvidedContext.emplace(Opts.ContextProvider(Path)); + WithContext WithProvidedContext(Opts.ContextProvider(Path)); Action(); }); } @@ -1344,6 +1349,7 @@ void TUScheduler::runWithPreamble(llvm::StringRef Name, PathRef File, SPAN_ATTACH(Tracer, "file", File); std::shared_ptr Preamble = It->second->Worker->getPossiblyStalePreamble(); + WithContext WithProvidedContext(Opts.ContextProvider(File)); Action(InputsAndPreamble{It->second->Contents, It->second->Worker->getCurrentCompileCommand(), Preamble.get()}); @@ -1370,9 +1376,7 @@ void TUScheduler::runWithPreamble(llvm::StringRef Name, PathRef File, WithContext Guard(std::move(Ctx)); trace::Span Tracer(Name); SPAN_ATTACH(Tracer, "file", File); - llvm::Optional WithProvidedContext; - if (Opts.ContextProvider) - WithProvidedContext.emplace(Opts.ContextProvider(File)); + WithContext WithProvidedContext(Opts.ContextProvider(File)); Action(InputsAndPreamble{Contents, Command, Preamble.get()}); }; diff --git a/clang-tools-extra/clangd/TUScheduler.h b/clang-tools-extra/clangd/TUScheduler.h index 05c06da13380d..5d545b366ec3e 100644 --- a/clang-tools-extra/clangd/TUScheduler.h +++ b/clang-tools-extra/clangd/TUScheduler.h @@ -313,7 +313,7 @@ class TUScheduler { private: const GlobalCompilationDatabase &CDB; - const Options Opts; + Options Opts; std::unique_ptr Callbacks; // not nullptr Semaphore Barrier; llvm::StringMap> Files; diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp index 5024ace66b7cc..a22785b01d647 100644 --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -8,6 +8,7 @@ #include "index/Background.h" #include "Compiler.h" +#include "Config.h" #include "Headers.h" #include "ParsedAST.h" #include "SourceCode.h" @@ -354,6 +355,14 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) { // staleness. std::vector BackgroundIndex::loadProject(std::vector MainFiles) { + // Drop files where background indexing is disabled in config. + if (ContextProvider) + llvm::erase_if(MainFiles, [&](const std::string &TU) { + // Load the config for each TU, as indexing may be selectively enabled. + WithContext WithProvidedContext(ContextProvider(TU)); + return Config::current().Index.Background == + Config::BackgroundPolicy::Skip; + }); Rebuilder.startLoading(); // Load shards for all of the mainfiles. const std::vector Result = diff --git a/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp b/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp index 6fc844c189315..80d87aa3f9f51 100644 --- a/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp +++ b/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp @@ -181,7 +181,7 @@ class Lookup : public Command { void run() override { if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) { - llvm::outs() + llvm::errs() << "Missing required argument: please provide id or -name.\n"; return; } @@ -189,7 +189,7 @@ class Lookup : public Command { if (ID.getNumOccurrences()) { auto SID = SymbolID::fromStr(ID); if (!SID) { - llvm::outs() << llvm::toString(SID.takeError()) << "\n"; + llvm::errs() << llvm::toString(SID.takeError()) << "\n"; return; } IDs.push_back(*SID); @@ -205,7 +205,7 @@ class Lookup : public Command { llvm::outs() << toYAML(Sym); }); if (!FoundSymbol) - llvm::outs() << "not found\n"; + llvm::errs() << "not found\n"; } }; @@ -228,7 +228,7 @@ class Refs : public Command { void run() override { if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) { - llvm::outs() + llvm::errs() << "Missing required argument: please provide id or -name.\n"; return; } @@ -236,14 +236,14 @@ class Refs : public Command { if (ID.getNumOccurrences()) { auto SID = SymbolID::fromStr(ID); if (!SID) { - llvm::outs() << llvm::toString(SID.takeError()) << "\n"; + llvm::errs() << llvm::toString(SID.takeError()) << "\n"; return; } IDs.push_back(*SID); } else { IDs = getSymbolIDsFromIndex(Name, Index); if (IDs.size() > 1) { - llvm::outs() << llvm::formatv( + llvm::errs() << llvm::formatv( "The name {0} is ambiguous, found {1} different " "symbols. Please use id flag to disambiguate.\n", Name, IDs.size()); @@ -256,7 +256,7 @@ class Refs : public Command { Index->refs(RefRequest, [&RegexFilter](const Ref &R) { auto U = URI::parse(R.Location.FileURI); if (!U) { - llvm::outs() << U.takeError(); + llvm::errs() << U.takeError(); return; } if (RegexFilter.match(U->body())) @@ -358,7 +358,7 @@ bool runCommand(std::string Request, const SymbolIndex &Index) { return Cmd.Implementation()->parseAndRun(FakeArgv, Cmd.Description, Index); } - llvm::outs() << "Unknown command. Try 'help'.\n"; + llvm::errs() << "Unknown command. Try 'help'.\n"; return false; } @@ -380,7 +380,7 @@ int main(int argc, const char *argv[]) { [&]() { Index = openIndex(IndexLocation); }); if (!Index) { - llvm::outs() << "Failed to open the index.\n"; + llvm::errs() << "Failed to open the index.\n"; return -1; } diff --git a/clang-tools-extra/clangd/index/remote/server/Server.cpp b/clang-tools-extra/clangd/index/remote/server/Server.cpp index 718d623a48456..fecd72806cbc0 100644 --- a/clang-tools-extra/clangd/index/remote/server/Server.cpp +++ b/clang-tools-extra/clangd/index/remote/server/Server.cpp @@ -141,14 +141,14 @@ int main(int argc, char *argv[]) { llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); if (!llvm::sys::path::is_absolute(IndexRoot)) { - llvm::outs() << "Index root should be an absolute path.\n"; + llvm::errs() << "Index root should be an absolute path.\n"; return -1; } std::unique_ptr Index = openIndex(IndexPath); if (!Index) { - llvm::outs() << "Failed to open the index.\n"; + llvm::errs() << "Failed to open the index.\n"; return -1; } diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 6e3d6a231da1e..7bce1c062e817 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -296,6 +296,14 @@ opt RecoveryASTType{ Hidden, }; +opt FoldingRanges{ + "folding-ranges", + cat(Features), + desc("Enable preview of FoldingRanges feature"), + init(false), + Hidden, +}; + opt WorkerThreadsCount{ "j", cat(Misc), @@ -438,7 +446,7 @@ opt EnableConfig{ "\tMac OS: ~/Library/Preferences/\n" "\tOthers: $XDG_CONFIG_HOME, usually ~/.config\n" "Configuration is documented at https://clangd.llvm.org/config.html"), - init(false), + init(true), }; /// Supports a test URI scheme with relaxed constraints for lit tests. @@ -676,6 +684,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var Opts.AsyncThreadsCount = WorkerThreadsCount; Opts.BuildRecoveryAST = RecoveryAST; Opts.PreserveRecoveryASTType = RecoveryASTType; + Opts.FoldingRanges = FoldingRanges; clangd::CodeCompleteOptions CCOpts; CCOpts.IncludeIneligibleResults = IncludeIneligibleResults; @@ -694,9 +703,9 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var CCOpts.RunParser = CodeCompletionParse; RealThreadsafeFS TFS; + std::vector> ProviderStack; std::unique_ptr Config; if (EnableConfig) { - std::vector> ProviderStack; ProviderStack.push_back( config::Provider::fromAncestorRelativeYAMLFiles(".clangd", TFS)); llvm::SmallString<256> UserConfig; @@ -707,7 +716,10 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var } else { elog("Couldn't determine user config file, not loading"); } - Config = config::Provider::combine(std::move(ProviderStack)); + std::vector ProviderPointers; + for (const auto& P : ProviderStack) + ProviderPointers.push_back(P.get()); + Config = config::Provider::combine(std::move(ProviderPointers)); Opts.ConfigProvider = Config.get(); } diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp index cb4d23e0be347..70d5156b10723 100644 --- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp @@ -113,7 +113,7 @@ TEST_F(BackgroundIndexTest, Config) { // Set up two identical TUs, foo and bar. // They define foo::one and bar::one. std::vector Cmds; - for (std::string Name : {"foo", "bar"}) { + for (std::string Name : {"foo", "bar", "baz"}) { std::string Filename = Name + ".cpp"; std::string Header = Name + ".h"; FS.Files[Filename] = "#include \"" + Header + "\""; @@ -126,11 +126,14 @@ TEST_F(BackgroundIndexTest, Config) { } // Context provider that installs a configuration mutating foo's command. // This causes it to define foo::two instead of foo::one. + // It also disables indexing of baz entirely. auto ContextProvider = [](PathRef P) { Config C; if (P.endswith("foo.cpp")) C.CompileFlags.Edits.push_back( [](std::vector &Argv) { Argv.push_back("-Done=two"); }); + if (P.endswith("baz.cpp")) + C.Index.Background = Config::BackgroundPolicy::Skip; return Context::current().derive(Config::Key, std::move(C)); }; // Create the background index. diff --git a/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp b/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp index d86296b84e3f9..1acbcd94ac84e 100644 --- a/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp +++ b/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp @@ -207,6 +207,166 @@ TEST(CommandMangler, ConfigEdits) { EXPECT_THAT(Cmd, ElementsAre(_, "FOO.CC", "--hello", "-fsyntax-only")); } +static std::string strip(llvm::StringRef Arg, llvm::StringRef Argv) { + llvm::SmallVector Parts; + llvm::SplitString(Argv, Parts); + std::vector Args = {Parts.begin(), Parts.end()}; + ArgStripper S; + S.strip(Arg); + S.process(Args); + return llvm::join(Args, " "); +} + +TEST(ArgStripperTest, Spellings) { + // May use alternate prefixes. + EXPECT_EQ(strip("-pedantic", "clang -pedantic foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-pedantic", "clang --pedantic foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("--pedantic", "clang -pedantic foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("--pedantic", "clang --pedantic foo.cc"), "clang foo.cc"); + // May use alternate names. + EXPECT_EQ(strip("-x", "clang -x c++ foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-x", "clang --language=c++ foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("--language=", "clang -x c++ foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("--language=", "clang --language=c++ foo.cc"), + "clang foo.cc"); +} + +TEST(ArgStripperTest, UnknownFlag) { + EXPECT_EQ(strip("-xyzzy", "clang -xyzzy foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-xyz*", "clang -xyzzy foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-xyzzy", "clang -Xclang -xyzzy foo.cc"), "clang foo.cc"); +} + +TEST(ArgStripperTest, Xclang) { + // Flags may be -Xclang escaped. + EXPECT_EQ(strip("-ast-dump", "clang -Xclang -ast-dump foo.cc"), + "clang foo.cc"); + // Args may be -Xclang escaped. + EXPECT_EQ(strip("-add-plugin", "clang -Xclang -add-plugin -Xclang z foo.cc"), + "clang foo.cc"); +} + +TEST(ArgStripperTest, ClangCL) { + // /I is a synonym for -I in clang-cl mode only. + // Not stripped by default. + EXPECT_EQ(strip("-I", "clang -I /usr/inc /Interesting/file.cc"), + "clang /Interesting/file.cc"); + // Stripped when invoked as clang-cl. + EXPECT_EQ(strip("-I", "clang-cl -I /usr/inc /Interesting/file.cc"), + "clang-cl"); + // Stripped when invoked as CL.EXE + EXPECT_EQ(strip("-I", "CL.EXE -I /usr/inc /Interesting/file.cc"), "CL.EXE"); + // Stripped when passed --driver-mode=cl. + EXPECT_EQ(strip("-I", "cc -I /usr/inc /Interesting/file.cc --driver-mode=cl"), + "cc --driver-mode=cl"); +} + +TEST(ArgStripperTest, ArgStyles) { + // Flag + EXPECT_EQ(strip("-Qn", "clang -Qn foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-Qn", "clang -QnZ foo.cc"), "clang -QnZ foo.cc"); + // Joined + EXPECT_EQ(strip("-std=", "clang -std= foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-std=", "clang -std=c++11 foo.cc"), "clang foo.cc"); + // Separate + EXPECT_EQ(strip("-mllvm", "clang -mllvm X foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-mllvm", "clang -mllvmX foo.cc"), "clang -mllvmX foo.cc"); + // RemainingArgsJoined + EXPECT_EQ(strip("/link", "clang-cl /link b c d foo.cc"), "clang-cl"); + EXPECT_EQ(strip("/link", "clang-cl /linka b c d foo.cc"), "clang-cl"); + // CommaJoined + EXPECT_EQ(strip("-Wl,", "clang -Wl,x,y foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-Wl,", "clang -Wl, foo.cc"), "clang foo.cc"); + // MultiArg + EXPECT_EQ(strip("-segaddr", "clang -segaddr a b foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-segaddr", "clang -segaddra b foo.cc"), + "clang -segaddra b foo.cc"); + // JoinedOrSeparate + EXPECT_EQ(strip("-G", "clang -GX foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-G", "clang -G X foo.cc"), "clang foo.cc"); + // JoinedAndSeparate + EXPECT_EQ(strip("-plugin-arg-", "clang -cc1 -plugin-arg-X Y foo.cc"), + "clang -cc1 foo.cc"); + EXPECT_EQ(strip("-plugin-arg-", "clang -cc1 -plugin-arg- Y foo.cc"), + "clang -cc1 foo.cc"); +} + +TEST(ArgStripperTest, EndOfList) { + // When we hit the end-of-args prematurely, we don't crash. + // We consume the incomplete args if we've matched the target option. + EXPECT_EQ(strip("-I", "clang -Xclang"), "clang -Xclang"); + EXPECT_EQ(strip("-I", "clang -Xclang -I"), "clang"); + EXPECT_EQ(strip("-I", "clang -I -Xclang"), "clang"); + EXPECT_EQ(strip("-I", "clang -I"), "clang"); +} + +TEST(ArgStripperTest, Multiple) { + ArgStripper S; + S.strip("-o"); + S.strip("-c"); + std::vector Args = {"clang", "-o", "foo.o", "foo.cc", "-c"}; + S.process(Args); + EXPECT_THAT(Args, ElementsAre("clang", "foo.cc")); +} + +TEST(ArgStripperTest, Warning) { + { + // -W is a flag name + ArgStripper S; + S.strip("-W"); + std::vector Args = {"clang", "-Wfoo", "-Wno-bar", "-Werror", + "foo.cc"}; + S.process(Args); + EXPECT_THAT(Args, ElementsAre("clang", "foo.cc")); + } + { + // -Wfoo is not a flag name, matched literally. + ArgStripper S; + S.strip("-Wunused"); + std::vector Args = {"clang", "-Wunused", "-Wno-unused", + "foo.cc"}; + S.process(Args); + EXPECT_THAT(Args, ElementsAre("clang", "-Wno-unused", "foo.cc")); + } +} + +TEST(ArgStripperTest, Define) { + { + // -D is a flag name + ArgStripper S; + S.strip("-D"); + std::vector Args = {"clang", "-Dfoo", "-Dbar=baz", "foo.cc"}; + S.process(Args); + EXPECT_THAT(Args, ElementsAre("clang", "foo.cc")); + } + { + // -Dbar is not: matched literally + ArgStripper S; + S.strip("-Dbar"); + std::vector Args = {"clang", "-Dfoo", "-Dbar=baz", "foo.cc"}; + S.process(Args); + EXPECT_THAT(Args, ElementsAre("clang", "-Dfoo", "-Dbar=baz", "foo.cc")); + S.strip("-Dfoo"); + S.process(Args); + EXPECT_THAT(Args, ElementsAre("clang", "-Dbar=baz", "foo.cc")); + S.strip("-Dbar=*"); + S.process(Args); + EXPECT_THAT(Args, ElementsAre("clang", "foo.cc")); + } +} + +TEST(ArgStripperTest, OrderDependent) { + ArgStripper S; + // If -include is stripped first, we see -pch as its arg and foo.pch remains. + // To get this case right, we must process -include-pch first. + S.strip("-include"); + S.strip("-include-pch"); + std::vector Args = {"clang", "-include-pch", "foo.pch", + "foo.cc"}; + S.process(Args); + EXPECT_THAT(Args, ElementsAre("clang", "foo.cc")); +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp index 825d6878727d9..c8465dc70edbc 100644 --- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp +++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp @@ -91,13 +91,31 @@ TEST_F(ConfigCompileTests, Condition) { TEST_F(ConfigCompileTests, CompileCommands) { Frag.CompileFlags.Add.emplace_back("-foo"); - std::vector Argv = {"clang", "a.cc"}; + Frag.CompileFlags.Remove.emplace_back("--include-directory="); + std::vector Argv = {"clang", "-I", "bar/", "a.cc"}; EXPECT_TRUE(compileAndApply()); - EXPECT_THAT(Conf.CompileFlags.Edits, SizeIs(1)); - Conf.CompileFlags.Edits.front()(Argv); + EXPECT_THAT(Conf.CompileFlags.Edits, SizeIs(2)); + for (auto &Edit : Conf.CompileFlags.Edits) + Edit(Argv); EXPECT_THAT(Argv, ElementsAre("clang", "a.cc", "-foo")); } +TEST_F(ConfigCompileTests, Index) { + Frag.Index.Background.emplace("Skip"); + EXPECT_TRUE(compileAndApply()); + EXPECT_EQ(Conf.Index.Background, Config::BackgroundPolicy::Skip); + + Frag = {}; + Frag.Index.Background.emplace("Foo"); + EXPECT_TRUE(compileAndApply()); + EXPECT_EQ(Conf.Index.Background, Config::BackgroundPolicy::Build) + << "by default"; + EXPECT_THAT( + Diags.Diagnostics, + ElementsAre(DiagMessage( + "Invalid Background value 'Foo'. Valid values are Build, Skip."))); +} + } // namespace } // namespace config } // namespace clangd diff --git a/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp b/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp index 122b55cf64e01..0cf582410ff81 100644 --- a/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp +++ b/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp @@ -10,10 +10,11 @@ #include "ConfigProvider.h" #include "ConfigTesting.h" #include "TestFS.h" +#include "llvm/Support/SourceMgr.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "llvm/Support/SourceMgr.h" #include +#include namespace clang { namespace clangd { @@ -56,10 +57,9 @@ std::vector getAddedArgs(Config &C) { // cache their results. TEST(ProviderTest, Combine) { CapturedDiags Diags; - std::vector> Providers; - Providers.push_back(std::make_unique("foo")); - Providers.push_back(std::make_unique("bar")); - auto Combined = Provider::combine(std::move(Providers)); + FakeProvider Foo("foo"); + FakeProvider Bar("bar"); + auto Combined = Provider::combine({&Foo, &Bar}); Config Cfg = Combined->getConfig(Params(), Diags.callback()); EXPECT_THAT(Diags.Diagnostics, ElementsAre(DiagMessage("foo"), DiagMessage("bar"))); @@ -150,6 +150,43 @@ TEST(ProviderTest, FromAncestorRelativeYAMLFiles) { EXPECT_THAT(getAddedArgs(Cfg), ElementsAre("bar", "baz")); } +TEST(ProviderTest, Staleness) { + MockFS FS; + + auto StartTime = std::chrono::steady_clock::now(); + Params StaleOK; + StaleOK.FreshTime = StartTime; + Params MustBeFresh; + MustBeFresh.FreshTime = StartTime + std::chrono::hours(1); + CapturedDiags Diags; + auto P = Provider::fromYAMLFile(testPath("foo.yaml"), FS); + + // Initial query always reads, regardless of policy. + FS.Files["foo.yaml"] = AddFooWithErr; + auto Cfg = P->getConfig(StaleOK, Diags.callback()); + EXPECT_THAT(Diags.Diagnostics, + ElementsAre(DiagMessage("Unknown CompileFlags key Unknown"))); + EXPECT_THAT(getAddedArgs(Cfg), ElementsAre("foo")); + Diags.Diagnostics.clear(); + + // Stale value reused by policy. + FS.Files["foo.yaml"] = AddBarBaz; + Cfg = P->getConfig(StaleOK, Diags.callback()); + EXPECT_THAT(Diags.Diagnostics, IsEmpty()) << "Cached, not re-parsed"; + EXPECT_THAT(getAddedArgs(Cfg), ElementsAre("foo")); + + // Cache revalidated by policy. + Cfg = P->getConfig(MustBeFresh, Diags.callback()); + EXPECT_THAT(Diags.Diagnostics, IsEmpty()) << "New config, no errors"; + EXPECT_THAT(getAddedArgs(Cfg), ElementsAre("bar", "baz")); + + // Cache revalidated by (default) policy. + FS.Files.erase("foo.yaml"); + Cfg = P->getConfig(Params(), Diags.callback()); + EXPECT_THAT(Diags.Diagnostics, IsEmpty()); + EXPECT_THAT(getAddedArgs(Cfg), IsEmpty()); +} + } // namespace } // namespace config } // namespace clangd diff --git a/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp b/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp index 31879e356ce0e..07c42fcf20304 100644 --- a/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp @@ -35,7 +35,7 @@ MATCHER_P(QName, Name, "") { } MATCHER_P(WithName, N, "") { return arg.name == N; } MATCHER_P(WithKind, Kind, "") { return arg.kind == Kind; } -MATCHER_P(SymRange, Range, "") { return arg.location.range == Range; } +MATCHER_P(SymRange, Range, "") { return arg.range == Range; } // GMock helpers for matching DocumentSymbol. MATCHER_P(SymNameRange, Range, "") { return arg.selectionRange == Range; } @@ -712,6 +712,72 @@ TEST(DocumentSymbols, QualifiersWithTemplateArgs) { WithName("Foo_type::method3"))); } +TEST(DocumentSymbolsTest, Ranges) { + TestTU TU; + Annotations Main(R"( + $foo[[int foo(bool Argument) { + return 42; + }]] + + $variable[[char GLOBAL_VARIABLE]]; + + $ns[[namespace ns { + $bar[[class Bar { + public: + $ctor[[Bar() {}]] + $dtor[[~Bar()]]; + + private: + $field[[unsigned Baz]]; + + $getbaz[[unsigned getBaz() { return Baz; }]] + }]]; + }]] // namespace ns + + $forwardclass[[class ForwardClassDecl]]; + + $struct[[struct StructDefinition { + $structfield[[int *Pointer = nullptr]]; + }]]; + $forwardstruct[[struct StructDeclaration]]; + + $forwardfunc[[void forwardFunctionDecl(int Something)]]; + )"); + TU.Code = Main.code().str(); + EXPECT_THAT( + getSymbols(TU.build()), + UnorderedElementsAre( + AllOf(WithName("foo"), WithKind(SymbolKind::Function), + SymRange(Main.range("foo"))), + AllOf(WithName("GLOBAL_VARIABLE"), WithKind(SymbolKind::Variable), + SymRange(Main.range("variable"))), + AllOf( + WithName("ns"), WithKind(SymbolKind::Namespace), + SymRange(Main.range("ns")), + Children(AllOf( + WithName("Bar"), WithKind(SymbolKind::Class), + SymRange(Main.range("bar")), + Children( + AllOf(WithName("Bar"), WithKind(SymbolKind::Constructor), + SymRange(Main.range("ctor"))), + AllOf(WithName("~Bar"), WithKind(SymbolKind::Constructor), + SymRange(Main.range("dtor"))), + AllOf(WithName("Baz"), WithKind(SymbolKind::Field), + SymRange(Main.range("field"))), + AllOf(WithName("getBaz"), WithKind(SymbolKind::Method), + SymRange(Main.range("getbaz"))))))), + AllOf(WithName("ForwardClassDecl"), WithKind(SymbolKind::Class), + SymRange(Main.range("forwardclass"))), + AllOf(WithName("StructDefinition"), WithKind(SymbolKind::Struct), + SymRange(Main.range("struct")), + Children(AllOf(WithName("Pointer"), WithKind(SymbolKind::Field), + SymRange(Main.range("structfield"))))), + AllOf(WithName("StructDeclaration"), WithKind(SymbolKind::Struct), + SymRange(Main.range("forwardstruct"))), + AllOf(WithName("forwardFunctionDecl"), WithKind(SymbolKind::Function), + SymRange(Main.range("forwardfunc"))))); +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp b/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp index e68b8d727172e..ef9a299483f62 100644 --- a/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp +++ b/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp @@ -313,9 +313,22 @@ TEST(GlobalCompilationDatabaseTest, NonCanonicalFilenames) { llvm::sys::path::append(File, "blabla", "..", "a.cc"); EXPECT_TRUE(DB.getCompileCommand(File)); - EXPECT_TRUE(DB.getProjectInfo(File)); + EXPECT_FALSE(DB.getProjectInfo(File)); } +TEST_F(OverlayCDBTest, GetProjectInfo) { + OverlayCDB DB(Base.get()); + Path File = testPath("foo.cc"); + Path Header = testPath("foo.h"); + + EXPECT_EQ(DB.getProjectInfo(File)->SourceRoot, testRoot()); + EXPECT_EQ(DB.getProjectInfo(Header)->SourceRoot, testRoot()); + + // Shouldn't change after an override. + DB.setCompileCommand(File, tooling::CompileCommand()); + EXPECT_EQ(DB.getProjectInfo(File)->SourceRoot, testRoot()); + EXPECT_EQ(DB.getProjectInfo(Header)->SourceRoot, testRoot()); +} } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/PreambleTests.cpp b/clang-tools-extra/clangd/unittests/PreambleTests.cpp index 8c9669a945dd7..8482a1cc8237c 100644 --- a/clang-tools-extra/clangd/unittests/PreambleTests.cpp +++ b/clang-tools-extra/clangd/unittests/PreambleTests.cpp @@ -230,8 +230,8 @@ std::string getPreamblePatch(llvm::StringRef Baseline, TEST(PreamblePatchTest, Define) { // BAR should be defined while parsing the AST. struct { - llvm::StringLiteral Contents; - llvm::StringLiteral ExpectedPatch; + const char *const Contents; + const char *const ExpectedPatch; } Cases[] = { { R"cpp( @@ -270,7 +270,7 @@ TEST(PreamblePatchTest, Define) { SCOPED_TRACE(Case.Contents); Annotations Modified(Case.Contents); EXPECT_THAT(getPreamblePatch("", Modified.code()), - MatchesRegex(Case.ExpectedPatch.str())); + MatchesRegex(Case.ExpectedPatch)); auto AST = createPatchedAST("", Modified.code()); ASSERT_TRUE(AST); @@ -304,8 +304,8 @@ TEST(PreamblePatchTest, OrderingPreserved) { TEST(PreamblePatchTest, LocateMacroAtWorks) { struct { - llvm::StringLiteral Baseline; - llvm::StringLiteral Modified; + const char *const Baseline; + const char *const Modified; } Cases[] = { // Addition of new directive { @@ -417,8 +417,8 @@ TEST(PreamblePatchTest, LocateMacroAtDeletion) { TEST(PreamblePatchTest, RefsToMacros) { struct { - llvm::StringLiteral Baseline; - llvm::StringLiteral Modified; + const char *const Baseline; + const char *const Modified; } Cases[] = { // Newly added { @@ -491,8 +491,8 @@ TEST(TranslatePreamblePatchLocation, Simple) { TEST(PreamblePatch, ModifiedBounds) { struct { - llvm::StringLiteral Baseline; - llvm::StringLiteral Modified; + const char *const Baseline; + const char *const Modified; } Cases[] = { // Size increased { diff --git a/clang-tools-extra/clangd/unittests/SelectionTests.cpp b/clang-tools-extra/clangd/unittests/SelectionTests.cpp index 6f8c10e966a88..051580ba6e49b 100644 --- a/clang-tools-extra/clangd/unittests/SelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SelectionTests.cpp @@ -453,6 +453,8 @@ TEST(SelectionTree, Metrics) { auto T = makeSelectionTree(Code, AST); EXPECT_THAT(Tracer.takeMetric("selection_recovery"), testing::ElementsAreArray({1})); + EXPECT_THAT(Tracer.takeMetric("selection_recovery_type"), + testing::ElementsAreArray({1})); } // FIXME: Doesn't select the binary operator node in diff --git a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp index cd1d798341676..5c1a80aae7f9c 100644 --- a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp @@ -17,15 +17,19 @@ #include "TestTU.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include + namespace clang { namespace clangd { namespace { + using ::testing::ElementsAre; using ::testing::ElementsAreArray; +using ::testing::UnorderedElementsAreArray; // front() is SR.range, back() is outermost range. std::vector gatherRanges(const SelectionRange &SR) { @@ -35,6 +39,20 @@ std::vector gatherRanges(const SelectionRange &SR) { return Ranges; } +std::vector +gatherFoldingRanges(llvm::ArrayRef FoldingRanges) { + std::vector Ranges; + Range NextRange; + for (const auto &R : FoldingRanges) { + NextRange.start.line = R.startLine; + NextRange.start.character = R.startCharacter; + NextRange.end.line = R.endLine; + NextRange.end.character = R.endCharacter; + Ranges.push_back(NextRange); + } + return Ranges; +} + TEST(SemanticSelection, All) { const char *Tests[] = { R"cpp( // Single statement in a function body. @@ -118,16 +136,16 @@ TEST(SemanticSelection, All) { )cpp", R"cpp( // Inside struct. struct A { static int a(); }; - [[struct B { + [[struct B { [[static int b() [[{ [[return [[[[1^1]] + 2]]]]; }]]]] }]]; )cpp", // Namespaces. - R"cpp( - [[namespace nsa { - [[namespace nsb { + R"cpp( + [[namespace nsa { + [[namespace nsb { static int ccc(); [[void func() [[{ // int x = nsa::nsb::ccc(); @@ -181,6 +199,41 @@ TEST(SemanticSelection, RunViaClangdServer) { EXPECT_THAT(gatherRanges(Ranges->back()), ElementsAre(SourceAnnotations.range("empty"))); } + +TEST(FoldingRanges, All) { + const char *Tests[] = { + R"cpp( + [[int global_variable]]; + + [[void func() { + int v = 100; + }]] + )cpp", + R"cpp( + [[class Foo { + public: + [[Foo() { + int X = 1; + }]] + + private: + [[int getBar() { + return 42; + }]] + + [[void getFooBar() { }]] + }]]; + )cpp", + }; + for (const char *Test : Tests) { + auto T = Annotations(Test); + auto AST = TestTU::withCode(T.code()).build(); + EXPECT_THAT(gatherFoldingRanges(llvm::cantFail(getFoldingRanges(AST))), + UnorderedElementsAreArray(T.ranges())) + << Test; + } +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp index f40377fd5d85b..61ac4f7a27a4a 100644 --- a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp +++ b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp @@ -864,25 +864,28 @@ TEST_F(TUSchedulerTests, NoChangeDiags) { } TEST_F(TUSchedulerTests, Run) { - auto Opts = optsForTest(); - Opts.ContextProvider = bindPath; - TUScheduler S(CDB, Opts); - std::atomic Counter(0); - S.run("add 1", /*Path=*/"", [&] { ++Counter; }); - S.run("add 2", /*Path=*/"", [&] { Counter += 2; }); - ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(10))); - EXPECT_EQ(Counter.load(), 3); - - Notification TaskRun; - Key TestKey; - WithContextValue CtxWithKey(TestKey, 10); - const char *Path = "somepath"; - S.run("props context", Path, [&] { - EXPECT_EQ(Context::current().getExisting(TestKey), 10); - EXPECT_EQ(Path, boundPath()); - TaskRun.notify(); - }); - TaskRun.wait(); + for (bool Sync : {false, true}) { + auto Opts = optsForTest(); + if (Sync) + Opts.AsyncThreadsCount = 0; + TUScheduler S(CDB, Opts); + std::atomic Counter(0); + S.run("add 1", /*Path=*/"", [&] { ++Counter; }); + S.run("add 2", /*Path=*/"", [&] { Counter += 2; }); + ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(10))); + EXPECT_EQ(Counter.load(), 3); + + Notification TaskRun; + Key TestKey; + WithContextValue CtxWithKey(TestKey, 10); + const char *Path = "somepath"; + S.run("props context", Path, [&] { + EXPECT_EQ(Context::current().getExisting(TestKey), 10); + EXPECT_EQ(Path, boundPath()); + TaskRun.notify(); + }); + TaskRun.wait(); + } } TEST_F(TUSchedulerTests, TUStatus) { diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index c08fd45c2f967..1d447938eae0c 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -1,5 +1,5 @@ ==================================================== -Extra Clang Tools 11.0.0 (In-Progress) Release Notes +Extra Clang Tools 12.0.0 (In-Progress) Release Notes ==================================================== .. contents:: @@ -10,7 +10,7 @@ Written by the `LLVM Team `_ .. warning:: - These are in-progress notes for the upcoming Extra Clang Tools 11 release. + These are in-progress notes for the upcoming Extra Clang Tools 12 release. Release notes for previous releases can be found on `the Download Page `_. @@ -18,7 +18,7 @@ Introduction ============ This document contains the release notes for the Extra Clang Tools, part of the -Clang release 11.0.0. Here we describe the status of the Extra Clang Tools in +Clang release 12.0.0. Here we describe the status of the Extra Clang Tools in some detail, including major improvements from the previous release and new feature work. All LLVM releases may be downloaded from the `LLVM releases web site `_. @@ -32,7 +32,7 @@ main Clang web page, this document applies to the *next* release, not the current one. To see the release notes for a specific release, please see the `releases page `_. -What's New in Extra Clang Tools 11.0.0? +What's New in Extra Clang Tools 12.0.0? ======================================= Some of the major new features and improvements to Extra Clang Tools are listed @@ -67,187 +67,7 @@ The improvements are... Improvements to clang-tidy -------------------------- -New module -^^^^^^^^^^ -- New module `llvmlibc`. - - This module contains checks related to the LLVM-libc coding standards. - -New checks -^^^^^^^^^^ - -- New :doc:`abseil-string-find-str-contains - ` check. - - Finds ``s.find(...) == string::npos`` comparisons (for various string-like types) - and suggests replacing with ``absl::StrContains()``. - -- New :doc:`cppcoreguidelines-avoid-non-const-global-variables - ` check. - Finds non-const global variables as described in check I.2 of C++ Core - Guidelines. - -- New :doc:`bugprone-misplaced-pointer-arithmetic-in-alloc - ` check. - - Finds cases where an integer expression is added to or subtracted from the - result of a memory allocation function (``malloc()``, ``calloc()``, - ``realloc()``, ``alloca()``) instead of its argument. - -- New :doc:`bugprone-no-escape - ` check. - - Finds pointers with the ``noescape`` attribute that are captured by an - asynchronously-executed block. - -- New :doc:`bugprone-spuriously-wake-up-functions - ` check. - - Finds ``cnd_wait``, ``cnd_timedwait``, ``wait``, ``wait_for``, or - ``wait_until`` function calls when the function is not invoked from a loop - that checks whether a condition predicate holds or the function has a - condition parameter. - -- New :doc:`bugprone-reserved-identifier - ` check. - - Checks for usages of identifiers reserved for use by the implementation. - -- New :doc:`bugprone-suspicious-include - ` check. - - Finds cases where an include refers to what appears to be an implementation - file, which often leads to hard-to-track-down ODR violations, and diagnoses - them. - -- New :doc:`cert-oop57-cpp - ` check. - - Flags use of the `C` standard library functions ``memset``, ``memcpy`` and - ``memcmp`` and similar derivatives on non-trivial types. - -- New :doc:`llvmlibc-callee-namespace - ` check. - - Checks all calls resolve to functions within ``__llvm_libc`` namespace. - -- New :doc:`llvmlibc-implementation-in-namespace - ` check. - - Checks all llvm-libc implementation is within the correct namespace. - -- New :doc:`llvmlibc-restrict-system-libc-headers - ` check. - - Finds includes of system libc headers not provided by the compiler within - llvm-libc implementations. - -- New :doc:`modernize-replace-disallow-copy-and-assign-macro - ` check. - - Finds macro expansions of ``DISALLOW_COPY_AND_ASSIGN`` and replaces them with - a deleted copy constructor and a deleted assignment operator. - -- New :doc:`objc-dealloc-in-category - ` check. - - Finds implementations of -dealloc in Objective-C categories. - -- New :doc:`misc-no-recursion - ` check. - - Finds recursive functions and diagnoses them. - -- New :doc:`objc-nsinvocation-argument-lifetime - ` check. - - Finds calls to ``NSInvocation`` methods under ARC that don't have proper - argument object lifetimes. - -- New :doc:`readability-use-anyofallof - ` check. - - Finds range-based for loops that can be replaced by a call to ``std::any_of`` - or ``std::all_of``. - -New check aliases -^^^^^^^^^^^^^^^^^ - -- New alias :doc:`cert-con36-c - ` to - :doc:`bugprone-spuriously-wake-up-functions - ` was added. - -- New alias :doc:`cert-con54-cpp - ` to - :doc:`bugprone-spuriously-wake-up-functions - ` was added. - -- New alias :doc:`cert-dcl37-c - ` to - :doc:`bugprone-reserved-identifier - ` was added. - -- New alias :doc:`cert-dcl51-cpp - ` to - :doc:`bugprone-reserved-identifier - ` was added. - -- New alias :doc:`cert-str34-c - ` to - :doc:`bugprone-signed-char-misuse - ` was added. - -- New alias :doc:`llvm-else-after-return - ` to - :doc:`readability-else-after-return - ` was added. - -Changes in existing checks -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- Improved :doc:`performance-faster-string-find - ` check. - - Now checks ``std::basic_string_view`` by default. - -- Improved :doc:`readability-else-after-return - ` check now supports a - `WarnOnConditionVariables` option to control whether to refactor condition - variables where possible. - -- Improved :doc:`readability-identifier-naming - ` check. - - Now able to rename member references in class template definitions with - explicit access. - -- Improved :doc:`readability-qualified-auto - ` check now supports a - `AddConstToQualified` to enable adding ``const`` qualifiers to variables - typed with ``auto *`` and ``auto &``. - -- Improved :doc:`readability-redundant-string-init - ` check now supports a - `StringNames` option enabling its application to custom string classes. The - check now detects in class initializers and constructor initializers which - are deemed to be redundant. - -- Checks supporting the ``HeaderFileExtensions`` flag now support ``;`` as a - delimiter in addition to ``,``, with the latter being deprecated as of this - release. This simplifies how one specifies the options on the command line: - ``--config="{CheckOptions: [{ key: HeaderFileExtensions, value: h;;hpp;hxx }]}"`` - -Renamed checks -^^^^^^^^^^^^^^ - -- The 'fuchsia-restrict-system-headers' check was renamed to :doc:`portability-restrict-system-includes - ` - -Other improvements -^^^^^^^^^^^^^^^^^^ - -- For 'run-clang-tidy.py' add option to use alpha checkers from clang-analyzer. +The improvements are... Improvements to include-fixer ----------------------------- diff --git a/clang-tools-extra/docs/clang-tidy/checks/openmp-use-default-none.rst b/clang-tools-extra/docs/clang-tidy/checks/openmp-use-default-none.rst index 4223a10bd6e9b..77114100ba1cb 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/openmp-use-default-none.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/openmp-use-default-none.rst @@ -51,3 +51,12 @@ Example // WARNING: OpenMP directive ``parallel`` specifies ``default(shared)`` // clause. Consider using ``default(none)`` clause instead. } + + // ``parallel`` directive can have ``default`` clause, and said clause is + // specified, but with ``firstprivate`` kind, which is not ``none``, diagnose. + void p0_3() { + #pragma omp parallel default(firstprivate) + ; + // WARNING: OpenMP directive ``parallel`` specifies ``default(firstprivate)`` + // clause. Consider using ``default(none)`` clause instead. + } diff --git a/clang-tools-extra/docs/conf.py b/clang-tools-extra/docs/conf.py index 690917ef9a1be..a7579d55737e2 100644 --- a/clang-tools-extra/docs/conf.py +++ b/clang-tools-extra/docs/conf.py @@ -49,9 +49,9 @@ # built documents. # # The short version. -version = '11' +version = '12' # The full version, including alpha/beta/rc tags. -release = '11' +release = '12' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/clang-tools-extra/test/clang-tidy/checkers/openmp-use-default-none.cpp b/clang-tools-extra/test/clang-tidy/checkers/openmp-use-default-none.cpp index 35d2d17b1e0e8..d1d3b0e441f3f 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/openmp-use-default-none.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/openmp-use-default-none.cpp @@ -1,5 +1,5 @@ -// RUN: %check_clang_tidy %s openmp-use-default-none %t -- -- -fopenmp=libomp -fopenmp-version=40 -// RUN: %check_clang_tidy -std=c11 %s openmp-use-default-none %t -- -- -x c -fopenmp=libomp -fopenmp-version=40 +// RUN: %check_clang_tidy %s openmp-use-default-none %t -- -- -fopenmp=libomp -fopenmp-version=51 +// RUN: %check_clang_tidy -std=c11 %s openmp-use-default-none %t -- -- -x c -fopenmp=libomp -fopenmp-version=51 //----------------------------------------------------------------------------// // Null cases. @@ -42,6 +42,15 @@ void p0_2() { // CHECK-NOTES: :[[@LINE-3]]:22: note: existing 'default' clause specified here } +// 'parallel' directive can have 'default' clause, and said clause specified, +// but with 'firstprivate' kind, which is not 'none', diagnose. +void p0_3() { +#pragma omp parallel default(firstprivate) + ; + // CHECK-NOTES: :[[@LINE-2]]:1: warning: OpenMP directive 'parallel' specifies 'default(firstprivate)' clause, consider using 'default(none)' clause instead + // CHECK-NOTES: :[[@LINE-3]]:22: note: existing 'default' clause specified here +} + // 'task' directive. // 'task' directive can have 'default' clause, but said clause is not @@ -68,6 +77,15 @@ void p1_2() { // CHECK-NOTES: :[[@LINE-3]]:18: note: existing 'default' clause specified here } +// 'task' directive can have 'default' clause, and said clause specified, +// but with 'firstprivate' kind, which is not 'none', diagnose. +void p1_3() { +#pragma omp task default(firstprivate) + ; + // CHECK-NOTES: :[[@LINE-2]]:1: warning: OpenMP directive 'task' specifies 'default(firstprivate)' clause, consider using 'default(none)' clause instead + // CHECK-NOTES: :[[@LINE-3]]:18: note: existing 'default' clause specified here +} + // 'teams' directive. (has to be inside of 'target' directive) // 'teams' directive can have 'default' clause, but said clause is not @@ -97,6 +115,16 @@ void p2_2() { // CHECK-NOTES: :[[@LINE-3]]:19: note: existing 'default' clause specified here } +// 'teams' directive can have 'default' clause, and said clause specified, +// but with 'firstprivate' kind, which is not 'none', diagnose. +void p2_3() { +#pragma omp target +#pragma omp teams default(firstprivate) + ; + // CHECK-NOTES: :[[@LINE-2]]:1: warning: OpenMP directive 'teams' specifies 'default(firstprivate)' clause, consider using 'default(none)' clause instead + // CHECK-NOTES: :[[@LINE-3]]:19: note: existing 'default' clause specified here +} + // 'taskloop' directive. // 'taskloop' directive can have 'default' clause, but said clause is not @@ -126,6 +154,16 @@ void p3_2(const int a) { // CHECK-NOTES: :[[@LINE-4]]:22: note: existing 'default' clause specified here } +// 'taskloop' directive can have 'default' clause, and said clause specified, +// but with 'firstprivate' kind, which is not 'none', diagnose. +void p3_3(const int a) { +#pragma omp taskloop default(firstprivate) + for (int b = 0; b < a; b++) + ; + // CHECK-NOTES: :[[@LINE-3]]:1: warning: OpenMP directive 'taskloop' specifies 'default(firstprivate)' clause, consider using 'default(none)' clause instead + // CHECK-NOTES: :[[@LINE-4]]:22: note: existing 'default' clause specified here +} + //----------------------------------------------------------------------------// // Combined directives. // Let's not test every single possible permutation/combination of directives, @@ -158,3 +196,13 @@ void p4_2(const int a) { // CHECK-NOTES: :[[@LINE-3]]:1: warning: OpenMP directive 'parallel for' specifies 'default(shared)' clause, consider using 'default(none)' clause instead // CHECK-NOTES: :[[@LINE-4]]:26: note: existing 'default' clause specified here } + +// 'parallel' directive can have 'default' clause, and said clause specified, +// but with 'firstprivate' kind, which is not 'none', diagnose. +void p4_3(const int a) { +#pragma omp parallel for default(firstprivate) + for (int b = 0; b < a; b++) + ; + // CHECK-NOTES: :[[@LINE-3]]:1: warning: OpenMP directive 'parallel for' specifies 'default(firstprivate)' clause, consider using 'default(none)' clause instead + // CHECK-NOTES: :[[@LINE-4]]:26: note: existing 'default' clause specified here +} diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/config-files.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/config-files.cpp index ee1ed49472baa..d2a0a8c2a150f 100644 --- a/clang-tools-extra/test/clang-tidy/infrastructure/config-files.cpp +++ b/clang-tools-extra/test/clang-tidy/infrastructure/config-files.cpp @@ -18,13 +18,13 @@ // RUN: clang-tidy -dump-config %S/Inputs/config-files/4/44/- -- | FileCheck %s -check-prefix=CHECK-CHILD4 // CHECK-CHILD4: Checks: {{.*}}modernize-loop-convert,modernize-use-using,llvm-qualified-auto // CHECK-CHILD4: - key: llvm-qualified-auto.AddConstToQualified -// CHECK-CHILD4-NEXT: value: '1' +// CHECK-CHILD4-NEXT: value: 'true' // CHECK-CHILD4: - key: modernize-loop-convert.MaxCopySize // CHECK-CHILD4-NEXT: value: '20' // CHECK-CHILD4: - key: modernize-loop-convert.MinConfidence // CHECK-CHILD4-NEXT: value: reasonable // CHECK-CHILD4: - key: modernize-use-using.IgnoreMacros -// CHECK-CHILD4-NEXT: value: '0' +// CHECK-CHILD4-NEXT: value: 'false' // RUN: clang-tidy --explain-config %S/Inputs/config-files/4/44/- -- | FileCheck %s -check-prefix=CHECK-EXPLAIN // CHECK-EXPLAIN: 'llvm-qualified-auto' is enabled in the {{.*}}{{[/\\]}}Inputs{{[/\\]}}config-files{{[/\\]}}4{{[/\\]}}44{{[/\\]}}.clang-tidy. @@ -42,7 +42,7 @@ // CHECK-CHILD5: - key: modernize-loop-convert.MinConfidence // CHECK-CHILD5-NEXT: value: reasonable // CHECK-CHILD5: - key: modernize-use-using.IgnoreMacros -// CHECK-CHILD5-NEXT: value: '0' +// CHECK-CHILD5-NEXT: value: 'false' // RUN: clang-tidy -dump-config \ // RUN: --config='{InheritParentConfig: false, \ diff --git a/clang-tools-extra/unittests/clang-tidy/ClangTidyOptionsTest.cpp b/clang-tools-extra/unittests/clang-tidy/ClangTidyOptionsTest.cpp index a089281bf16c4..63f9a06e91bec 100644 --- a/clang-tools-extra/unittests/clang-tidy/ClangTidyOptionsTest.cpp +++ b/clang-tools-extra/unittests/clang-tidy/ClangTidyOptionsTest.cpp @@ -6,6 +6,20 @@ namespace clang { namespace tidy { + +enum class Colours { Red, Orange, Yellow, Green, Blue, Indigo, Violet }; + +template <> struct OptionEnumMapping { + static llvm::ArrayRef> getEnumMapping() { + static constexpr std::pair Mapping[] = { + {Colours::Red, "Red"}, {Colours::Orange, "Orange"}, + {Colours::Yellow, "Yellow"}, {Colours::Green, "Green"}, + {Colours::Blue, "Blue"}, {Colours::Indigo, "Indigo"}, + {Colours::Violet, "Violet"}}; + return makeArrayRef(Mapping); + } +}; + namespace test { TEST(ParseLineFilter, EmptyFilter) { @@ -208,16 +222,10 @@ TEST(CheckOptionsValidation, ValidIntOptions) { #undef CHECK_ERROR_INT } +// FIXME: Figure out why this test causes crashes on mac os. +#ifndef __APPLE__ TEST(ValidConfiguration, ValidEnumOptions) { - enum class Colours { Red, Orange, Yellow, Green, Blue, Indigo, Violet }; - static constexpr std::pair Mapping[] = { - {"Red", Colours::Red}, {"Orange", Colours::Orange}, - {"Yellow", Colours::Yellow}, {"Green", Colours::Green}, - {"Blue", Colours::Blue}, {"Indigo", Colours::Indigo}, - {"Violet", Colours::Violet}}; - static const auto Map = makeArrayRef(Mapping); - ClangTidyOptions Options; auto &CheckOptions = Options.CheckOptions; @@ -237,34 +245,37 @@ TEST(ValidConfiguration, ValidEnumOptions) { #define CHECK_ERROR_ENUM(Name, Expected) \ CHECK_ERROR(Name, UnparseableEnumOptionError, Expected) - CHECK_VAL(TestCheck.getLocal("Valid", Map), Colours::Red); - CHECK_VAL(TestCheck.getGlobal("GlobalValid", Map), Colours::Violet); - CHECK_VAL(TestCheck.getLocal("ValidWrongCase", Map, /*IgnoreCase*/ true), - Colours::Red); + CHECK_VAL(TestCheck.getIntLocal("Valid"), Colours::Red); + CHECK_VAL(TestCheck.getIntGlobal("GlobalValid"), Colours::Violet); + CHECK_VAL( - TestCheck.getGlobal("GlobalValidWrongCase", Map, /*IgnoreCase*/ true), - Colours::Violet); - CHECK_ERROR_ENUM(TestCheck.getLocal("Invalid", Map), + TestCheck.getIntLocal("ValidWrongCase", /*IgnoreCase*/ true), + Colours::Red); + CHECK_VAL(TestCheck.getIntGlobal("GlobalValidWrongCase", + /*IgnoreCase*/ true), + Colours::Violet); + CHECK_ERROR_ENUM(TestCheck.getIntLocal("Invalid"), "invalid configuration value " "'Scarlet' for option 'test.Invalid'"); - CHECK_ERROR_ENUM(TestCheck.getLocal("ValidWrongCase", Map), + CHECK_ERROR_ENUM(TestCheck.getIntLocal("ValidWrongCase"), "invalid configuration value 'rED' for option " "'test.ValidWrongCase'; did you mean 'Red'?"); - CHECK_ERROR_ENUM(TestCheck.getLocal("NearMiss", Map), + CHECK_ERROR_ENUM(TestCheck.getIntLocal("NearMiss"), "invalid configuration value 'Oragne' for option " "'test.NearMiss'; did you mean 'Orange'?"); - CHECK_ERROR_ENUM(TestCheck.getGlobal("GlobalInvalid", Map), + CHECK_ERROR_ENUM(TestCheck.getIntGlobal("GlobalInvalid"), "invalid configuration value " "'Purple' for option 'GlobalInvalid'"); - CHECK_ERROR_ENUM(TestCheck.getGlobal("GlobalValidWrongCase", Map), + CHECK_ERROR_ENUM(TestCheck.getIntGlobal("GlobalValidWrongCase"), "invalid configuration value 'vIOLET' for option " "'GlobalValidWrongCase'; did you mean 'Violet'?"); - CHECK_ERROR_ENUM(TestCheck.getGlobal("GlobalNearMiss", Map), + CHECK_ERROR_ENUM(TestCheck.getIntGlobal("GlobalNearMiss"), "invalid configuration value 'Yelow' for option " "'GlobalNearMiss'; did you mean 'Yellow'?"); #undef CHECK_ERROR_ENUM } +#endif #undef CHECK_VAL #undef CHECK_ERROR diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index df12fb6b2049c..a29e29f8ce5c9 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -534,7 +534,7 @@ list(APPEND LLVM_COMMON_DEPENDS clang-tablegen-targets) # Force target to be built as soon as possible. Clang modules builds depend # header-wise on it as they ship all headers from the umbrella folders. Building # an entire module might include header, which depends on intrinsics_gen. -if(LLVM_ENABLE_MODULES AND NOT CLANG_BUILT_STANDALONE) +if(LLVM_ENABLE_MODULES) list(APPEND LLVM_COMMON_DEPENDS intrinsics_gen) endif() diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index e84676760c300..6647b117ac596 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -2694,8 +2694,11 @@ the configuration (without a prefix: ``Auto``). Use tabs whenever we need to fill whitespace that spans at least from one tab stop to the next one. + + **WhitespaceSensitiveMacros** (``std::vector``) - A vector of macros which are whitespace-sensitive and should not be touched. + A vector of macros which are whitespace-sensitive and should not + be touched. These are expected to be macros of the form: @@ -2709,9 +2712,7 @@ the configuration (without a prefix: ``Auto``). WhitespaceSensitiveMacros: ['STRINGIZE', 'PP_STRINGIZE'] - For example: BOOST_PP_STRINGIZE. - - + For example: BOOST_PP_STRINGIZE .. END_FORMAT_STYLE_OPTIONS diff --git a/clang/docs/CommandGuide/clang.rst b/clang/docs/CommandGuide/clang.rst index 5978650c32888..2cca04fb31f1a 100644 --- a/clang/docs/CommandGuide/clang.rst +++ b/clang/docs/CommandGuide/clang.rst @@ -146,7 +146,7 @@ Language Selection and Mode Options ISO C 2017 with GNU extensions - The default C language standard is ``gnu11``, except on PS4, where it is + The default C language standard is ``gnu17``, except on PS4, where it is ``gnu99``. Supported values for the C++ language are: diff --git a/clang/docs/LibASTMatchersReference.html b/clang/docs/LibASTMatchersReference.html index 2256cbf718698..60ff6ffe60567 100644 --- a/clang/docs/LibASTMatchersReference.html +++ b/clang/docs/LibASTMatchersReference.html @@ -676,9 +676,10 @@

Node Matchers

#pragma omp parallel default(none) #pragma omp parallel default(shared) + #pragma omp parallel default(firstprivate) #pragma omp parallel -``ompDefaultClause()`` matches ``default(none)`` and ``default(shared)``. +``ompDefaultClause()`` matches ``default(none)``, ``default(shared)``, and ``default(firstprivate)``. @@ -3783,6 +3784,7 @@

Narrowing Matchers

#pragma omp parallel #pragma omp parallel default(none) #pragma omp parallel default(shared) + #pragma omp parallel default(firstprivate) ``ompDefaultClause(isNoneKind())`` matches only ``default(none)``. @@ -3796,11 +3798,26 @@

Narrowing Matchers

#pragma omp parallel #pragma omp parallel default(none) #pragma omp parallel default(shared) + #pragma omp parallel default(firstprivate) ``ompDefaultClause(isSharedKind())`` matches only ``default(shared)``. +Matcher<OMPDefaultClause>isSharedKind +
Matches if the OpenMP ``default`` clause has ``firstprivate`` kind specified.
+
+Given
+
+  #pragma omp parallel
+  #pragma omp parallel default(none)
+  #pragma omp parallel default(shared)
+  #pragma omp parallel default(firstprivate)
+
+``ompDefaultClause(isFirstPrivateKind())`` matches only ``default(firstprivate)``.
+
+ + Matcher<OMPExecutableDirective>isAllowedToContainClauseKindOpenMPClauseKind CKind
Matches if the OpenMP directive is allowed to contain the specified OpenMP
 clause kind.
diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index 000f23141af30..26fbfab96bc8c 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -268,5 +268,5 @@ want to help with the implementation.
 +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
 | loop extension               | Loop tiling transformation                                   | :part:`claimed`          |                                                                       |
 +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device extension             | 'present' map type modifier                                  | :part:`claimed`          |                                                                       |
+| device extension             | 'present' map type modifier                                  | :part:`worked on`        | D83061, D83062                                                        |
 +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 8a9a58aa01f8f..10ead604239c9 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1,5 +1,5 @@
 ========================================
-Clang 11.0.0 (In-Progress) Release Notes
+Clang 12.0.0 (In-Progress) Release Notes
 ========================================
 
 .. contents::
@@ -10,7 +10,7 @@ Written by the `LLVM Team `_
 
 .. warning::
 
-   These are in-progress notes for the upcoming Clang 11 release.
+   These are in-progress notes for the upcoming Clang 12 release.
    Release notes for previous releases can be found on
    `the Download Page `_.
 
@@ -18,7 +18,7 @@ Introduction
 ============
 
 This document contains the release notes for the Clang C/C++/Objective-C
-frontend, part of the LLVM Compiler Infrastructure, release 11.0.0. Here we
+frontend, part of the LLVM Compiler Infrastructure, release 12.0.0. Here we
 describe the status of Clang in some detail, including major
 improvements from the previous release and new feature work. For the
 general LLVM release notes, see `the LLVM
@@ -35,7 +35,7 @@ main Clang web page, this document applies to the *next* release, not
 the current one. To see the release notes for a specific release, please
 see the `releases page `_.
 
-What's New in Clang 11.0.0?
+What's New in Clang 12.0.0?
 ===========================
 
 Some of the major new features and improvements to Clang are listed
@@ -51,62 +51,17 @@ Major New Features
 Improvements to Clang's diagnostics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-- -Wpointer-to-int-cast is a new warning group. This group warns about C-style
-  casts of pointers to a integer type too small to hold all possible values.
-
-- -Wuninitialized-const-reference is a new warning controlled by 
-  -Wuninitialized. It warns on cases where uninitialized variables are passed
-  as const reference arguments to a function.
+- ...
 
 Non-comprehensive list of changes in this release
 -------------------------------------------------
 
-- For the ARM target, C-language intrinsics are now provided for the full Arm
-  v8.1-M MVE instruction set. ```` supports the complete API defined
-  in the Arm C Language Extensions.
-
-- For the ARM target, C-language intrinsics ```` for the CDE
-  instruction set are now provided.
-
-- clang adds support for a set of  extended integer types (``_ExtInt(N)``) that
-  permit non-power of 2 integers, exposing the LLVM integer types. Since a major
-  motivating use case for these types is to limit 'bit' usage, these types don't
-  automatically promote to 'int' when operations are done between two
-  ``ExtInt(N)`` types, instead math occurs at the size of the largest
-  ``ExtInt(N)`` type.
-
-- Users of UBSan, PGO, and coverage on Windows will now need to add clang's
-  library resource directory to their library search path. These features all
-  use runtime libraries, and Clang provides these libraries in its resource
-  directory. For example, if LLVM is installed in ``C:\Program Files\LLVM``,
-  then the profile runtime library will appear at
-  ``C:\Program Files\LLVM\lib\clang\11.0.0\lib\windows\clang_rt.profile-x86_64.lib``.
-  To ensure that the linker can find the appropriate library, users should pass
-  ``/LIBPATH:C:\Program Files\LLVM\lib\clang\11.0.0\lib\windows`` to the
-  linker. If the user links the program with the ``clang`` or ``clang-cl``
-  drivers, the driver will pass this flag for them.
-
-- Clang's profile files generated through ``-fprofile-instr-generate`` are using
-  a fixed hashing algorithm that prevents some collision when loading
-  out-of-date profile informations. Clang can still read old profile files.
+- ...
 
 New Compiler Flags
 ------------------
 
-- -fstack-clash-protection will provide a protection against the stack clash
-  attack for x86, s390x and ppc64 architectures through automatic probing of
-  each page of allocated stack.
-
-- -ffp-exception-behavior={ignore,maytrap,strict} allows the user to specify
-  the floating-point exception behavior. The default setting is ``ignore``.
-
-- -ffp-model={precise,strict,fast} provides the user an umbrella option to
-  simplify access to the many single purpose floating point options. The default
-  setting is ``precise``.
-
-- The default module cache has moved from /tmp to a per-user cache directory.
-  By default, this is ~/.cache but on some platforms or installations, this
-  might be elsewhere. The -fmodules-cache-path=... flag continues to work.
+- ...
 
 Deprecated Compiler Flags
 -------------------------
@@ -119,29 +74,7 @@ future versions of Clang.
 Modified Compiler Flags
 -----------------------
 
-- -fno-common has been enabled as the default for all targets.  Therefore, C
-  code that uses tentative definitions as definitions of a variable in multiple
-  translation units will trigger multiple-definition linker errors. Generally,
-  this occurs when the use of the ``extern`` keyword is neglected in the
-  declaration of a variable in a header file. In some cases, no specific
-  translation unit provides a definition of the variable. The previous
-  behavior can be restored by specifying ``-fcommon``.
-- -Wasm-ignored-qualifier (ex. `asm const ("")`) has been removed and replaced
-  with an error (this matches a recent change in GCC-9).
-- -Wasm-file-asm-volatile (ex. `asm volatile ("")` at global scope) has been
-  removed and replaced with an error (this matches GCC's behavior).
-- Duplicate qualifiers on asm statements (ex. `asm volatile volatile ("")`) no
-  longer produces a warning via -Wduplicate-decl-specifier, but now an error
-  (this matches GCC's behavior).
-- The deprecated argument ``-f[no-]sanitize-recover`` has changed to mean
-  ``-f[no-]sanitize-recover=all`` instead of
-  ``-f[no-]sanitize-recover=undefined,integer`` and is no longer deprecated.
-- The argument to ``-f[no-]sanitize-trap=...`` is now optional and defaults to
-  ``all``.
-- ``-fno-char8_t`` now disables the ``char8_t`` keyword, not just the use of
-  ``char8_t`` as the character type of ``u8`` literals. This restores the
-  Clang 8 behavior that regressed in Clang 9 and 10.
-- -print-targets has been added to print the registered targets.
+- ...
 
 New Pragmas in Clang
 --------------------
@@ -151,9 +84,7 @@ New Pragmas in Clang
 Attribute Changes in Clang
 --------------------------
 
-- Attributes can now be specified by clang plugins. See the
-  `Clang Plugins `_ documentation for
-  details.
+- ...
 
 Windows Support
 ---------------
@@ -161,58 +92,15 @@ Windows Support
 C Language Changes in Clang
 ---------------------------
 
-- The default C language standard used when `-std=` is not specified has been
-  upgraded from gnu11 to gnu17.
-
-- Clang now supports the GNU C extension `asm inline`; it won't do anything
-  *yet*, but it will be parsed.
-
 - ...
 
 C++ Language Changes in Clang
 -----------------------------
 
-- Clang now implements a restriction on giving non-C-compatible anonymous
-  structs a typedef name for linkage purposes, as described in C++ committee
-  paper `P1766R1 `. This paper was adopted by the
-  C++ committee as a Defect Report resolution, so it is applied retroactively
-  to all C++ standard versions. This affects code such as:
-
-  .. code-block:: c++
-
-    typedef struct {
-      int f() { return 0; }
-    } S;
-
-  Previous versions of Clang rejected some constructs of this form
-  (specifically, where the linkage of the type happened to be computed
-  before the parser reached the typedef name); those cases are still rejected
-  in Clang 11. In addition, cases that previous versions of Clang did not
-  reject now produce an extension warning. This warning can be disabled with
-  the warning flag ``-Wno-non-c-typedef-for-linkage``.
-
-  Affected code should be updated to provide a tag name for the anonymous
-  struct:
-
-  .. code-block:: c++
-
-    struct S {
-      int f() { return 0; }
-    };
-
-  If the code is shared with a C compilation (for example, if the parts that
-  are not C-compatible are guarded with ``#ifdef __cplusplus``), the typedef
-  declaration should be retained, but a tag name should still be provided:
-
-  .. code-block:: c++
-
-    typedef struct S {
-      int f() { return 0; }
-    } S;
+- ...
 
 C++1z Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
-
 ...
 
 Objective-C Language Changes in Clang
@@ -239,49 +127,19 @@ CUDA Support in Clang
 Internal API Changes
 --------------------
 
-These are major API changes that have happened since the 10.0.0 release of
+These are major API changes that have happened since the 11.0.0 release of
 Clang. If upgrading an external codebase that uses Clang as a library,
 this section should help get you past the largest hurdles of upgrading.
 
-- ``RecursiveASTVisitor`` no longer calls separate methods to visit specific
-  operator kinds. Previously, ``RecursiveASTVisitor`` treated unary, binary,
-  and compound assignment operators as if they were subclasses of the
-  corresponding AST node. For example, the binary operator plus was treated as
-  if it was a ``BinAdd`` subclass of the ``BinaryOperator`` class: during AST
-  traversal of a ``BinaryOperator`` AST node that had a ``BO_Add`` opcode,
-  ``RecursiveASTVisitor`` was calling the ``TraverseBinAdd`` method instead of
-  ``TraverseBinaryOperator``. This feature was contributing a non-trivial
-  amount of complexity to the implementation of ``RecursiveASTVisitor``, it was
-  used only in a minor way in Clang, was not tested, and as a result it was
-  buggy. Furthermore, this feature was creating a non-uniformity in the API.
-  Since this feature was not documented, it was quite difficult to figure out
-  how to use ``RecursiveASTVisitor`` to visit operators.
-
-  To update your code to the new uniform API, move the code from separate
-  visitation methods into methods that correspond to the actual AST node and
-  perform case analysis based on the operator opcode as needed:
-
-  * ``TraverseUnary*() => TraverseUnaryOperator()``
-  * ``WalkUpFromUnary*() => WalkUpFromUnaryOperator()``
-  * ``VisitUnary*() => VisiUnaryOperator()``
-  * ``TraverseBin*() => TraverseBinaryOperator()``
-  * ``WalkUpFromBin*() => WalkUpFromBinaryOperator()``
-  * ``VisitBin*() => VisiBinaryOperator()``
-  * ``TraverseBin*Assign() => TraverseCompoundAssignOperator()``
-  * ``WalkUpFromBin*Assign() => WalkUpFromCompoundAssignOperator()``
-  * ``VisitBin*Assign() => VisiCompoundAssignOperator()``
+- ...
 
 Build System Changes
 --------------------
 
-These are major changes to the build system that have happened since the 10.0.0
+These are major changes to the build system that have happened since the 11.0.0
 release of Clang. Users of the build system should adjust accordingly.
 
-- clang-tidy and clang-include-fixer are no longer compiled into libclang by
-  default. You can set ``LIBCLANG_INCLUDE_CLANG_TOOLS_EXTRA=ON`` to undo that,
-  but it's expected that that setting will go away eventually. If this is
-  something you need, please reach out to the mailing list to discuss possible
-  ways forward.
+- ...
 
 AST Matchers
 ------------
@@ -291,103 +149,7 @@ AST Matchers
 clang-format
 ------------
 
-- Option ``IndentExternBlock`` has been added to optionally apply indenting inside ``extern "C"`` and ``extern "C++"`` blocks.
-
-- ``IndentExternBlock`` option accepts ``AfterExternBlock`` to use the old behavior, as well as Indent and NoIndent options, which map to true and false, respectively.
-
-  .. code-block:: c++
-
-    Indent:                       NoIndent:
-     #ifdef __cplusplus          #ifdef __cplusplus
-     extern "C" {                extern "C++" {
-     #endif                      #endif
-
-          void f(void);          void f(void);
-
-     #ifdef __cplusplus          #ifdef __cplusplus
-     }                           }
-     #endif                      #endif
-
-- Option ``IndentCaseBlocks`` has been added to support treating the block
-  following a switch case label as a scope block which gets indented itself.
-  It helps avoid having the closing bracket align with the switch statement's
-  closing bracket (when ``IndentCaseLabels`` is ``false``).
-
-  .. code-block:: c++
-
-    switch (fool) {                vs.     switch (fool) {
-    case 1:                                case 1: {
-      {                                      bar();
-         bar();                            } break;
-      }                                    default: {
-      break;                                 plop();
-    default:                               }
-      {                                    }
-        plop();
-      }
-    }
-
-- Option ``ObjCBreakBeforeNestedBlockParam`` has been added to optionally apply
-  linebreaks for function arguments declarations before nested blocks.
-
-- Option ``InsertTrailingCommas`` can be set to ``TCS_Wrapped`` to insert
-  trailing commas in container literals (arrays and objects) that wrap across
-  multiple lines. It is currently only available for JavaScript and disabled by
-  default (``TCS_None``).
-
-- Option ``BraceWrapping.BeforeLambdaBody`` has been added to manage lambda
-  line break inside function parameter call in Allman style.
-
-  .. code-block:: c++
-
-      true:
-      connect(
-        []()
-        {
-          foo();
-          bar();
-        });
-
-      false:
-      connect([]() {
-          foo();
-          bar();
-        });
-
-- Option ``AlignConsecutiveBitFields`` has been added to align bit field
-  declarations across multiple adjacent lines
-
-  .. code-block:: c++
-
-      true:
-        bool aaa  : 1;
-        bool a    : 1;
-        bool bb   : 1;
-
-      false:
-        bool aaa : 1;
-        bool a : 1;
-        bool bb : 1;
-
-- Option ``BraceWrapping.BeforeWhile`` has been added to allow wrapping
-  before the ```while`` in a do..while loop. By default the value is (``false``)
-
-  In previous releases ``IndentBraces`` implied ``BraceWrapping.BeforeWhile``.
-  If using a Custom BraceWrapping style you may need to now set
-  ``BraceWrapping.BeforeWhile`` to (``true``) to be explicit.
-
-  .. code-block:: c++
-
-      true:
-      do {
-        foo();
-      }
-      while(1);
-
-      false:
-      do {
-        foo();
-      } while(1);
+- ...
 
 libclang
 --------
diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst
index 0a27810150db9..76676dfce95b4 100644
--- a/clang/docs/UndefinedBehaviorSanitizer.rst
+++ b/clang/docs/UndefinedBehaviorSanitizer.rst
@@ -127,6 +127,10 @@ Available checks are:
      is annotated with ``_Nonnull``.
   -  ``-fsanitize=nullability-return``: Returning null from a function with
      a return type annotated with ``_Nonnull``.
+  -  ``-fsanitize=objc-cast``: Invalid implicit cast of an ObjC object pointer
+     to an incompatible type. This is often unintentional, but is not undefined
+     behavior, therefore the check is not a part of the ``undefined`` group.
+     Currently only supported on Darwin.
   -  ``-fsanitize=object-size``: An attempt to potentially use bytes which
      the optimizer can determine are not part of the object being accessed.
      This will also detect some types of undefined behavior that may not
diff --git a/clang/docs/analyzer/conf.py b/clang/docs/analyzer/conf.py
index 6873ecc6c9c23..4fa65b2d0dae3 100644
--- a/clang/docs/analyzer/conf.py
+++ b/clang/docs/analyzer/conf.py
@@ -49,9 +49,9 @@
 # built documents.
 #
 # The short version.
-version = '11'
+version = '12'
 # The full version, including alpha/beta/rc tags.
-release = '11'
+release = '12'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/clang/docs/conf.py b/clang/docs/conf.py
index 164f15d1e2859..b63e829bfaf1d 100644
--- a/clang/docs/conf.py
+++ b/clang/docs/conf.py
@@ -50,9 +50,9 @@
 # built documents.
 #
 # The short version.
-version = '11'
+version = '12'
 # The full version, including alpha/beta/rc tags.
-release = '11'
+release = '12'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/clang/include/clang/AST/DeclOpenMP.h b/clang/include/clang/AST/DeclOpenMP.h
index 437feaba28fb7..154ecb977692c 100644
--- a/clang/include/clang/AST/DeclOpenMP.h
+++ b/clang/include/clang/AST/DeclOpenMP.h
@@ -129,7 +129,7 @@ class OMPDeclareReductionDecl final : public ValueDecl, public DeclContext {
   /// the declare reduction construct is declared inside compound statement.
   LazyDeclPtr PrevDeclInScope;
 
-  virtual void anchor();
+  void anchor() override;
 
   OMPDeclareReductionDecl(Kind DK, DeclContext *DC, SourceLocation L,
                           DeclarationName Name, QualType Ty,
@@ -228,7 +228,7 @@ class OMPDeclareMapperDecl final : public ValueDecl, public DeclContext {
 
   LazyDeclPtr PrevDeclInScope;
 
-  virtual void anchor();
+  void anchor() override;
 
   OMPDeclareMapperDecl(Kind DK, DeclContext *DC, SourceLocation L,
                        DeclarationName Name, QualType Ty,
diff --git a/clang/include/clang/AST/DependenceFlags.h b/clang/include/clang/AST/DependenceFlags.h
index 3601cb90bb765..14a7ffaecb2b0 100644
--- a/clang/include/clang/AST/DependenceFlags.h
+++ b/clang/include/clang/AST/DependenceFlags.h
@@ -16,8 +16,18 @@ namespace clang {
 struct ExprDependenceScope {
   enum ExprDependence : uint8_t {
     UnexpandedPack = 1,
+    // This expr depends in any way on
+    //   - a template parameter, it implies that the resolution of this expr may
+    //     cause instantiation to fail
+    //   - or an error (often in a non-template context)
+    //
+    // Note that C++ standard doesn't define the instantiation-dependent term,
+    // we follow the formal definition coming from the Itanium C++ ABI, and
+    // extend it to errors.
     Instantiation = 2,
+    // The type of this expr depends on a template parameter, or an error.
     Type = 4,
+    // The value of this expr depends on a template parameter, or an error.
     Value = 8,
 
     // clang extension: this expr contains or references an error, and is
@@ -42,10 +52,14 @@ struct TypeDependenceScope {
     /// Whether this type contains an unexpanded parameter pack
     /// (for C++11 variadic templates)
     UnexpandedPack = 1,
-    /// Whether this type somehow involves a template parameter, even
-    /// if the resolution of the type does not depend on a template parameter.
+    /// Whether this type somehow involves
+    ///   - a template parameter, even if the resolution of the type does not
+    ///     depend on a template parameter.
+    ///   - or an error.
     Instantiation = 2,
-    /// Whether this type is a dependent type (C++ [temp.dep.type]).
+    /// Whether this type
+    ///   - is a dependent type (C++ [temp.dep.type])
+    ///   - or it somehow involves an error, e.g. decltype(recovery-expr)
     Dependent = 4,
     /// Whether this type is a variably-modified type (C99 6.7.5).
     VariablyModified = 8,
@@ -95,16 +109,17 @@ class Dependence {
 
     // Contains a template parameter pack that wasn't expanded.
     UnexpandedPack = 1,
-    // Uses a template parameter, even if it doesn't affect the result.
-    // Validity depends on the template parameter.
+    // Depends on a template parameter or an error in some way.
+    // Validity depends on how the template is instantiated or the error is
+    // resolved.
     Instantiation = 2,
-    // Expression type depends on template context.
+    // Expression type depends on template context, or an error.
     // Value and Instantiation should also be set.
     Type = 4,
-    // Expression value depends on template context.
+    // Expression value depends on template context, or an error.
     // Instantiation should also be set.
     Value = 8,
-    // Depends on template context.
+    // Depends on template context, or an error.
     // The type/value distinction is only meaningful for expressions.
     Dependent = Type | Value,
     // Includes an error, and depends on how it is resolved.
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 66eafaaab715e..c13b971192850 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -157,9 +157,11 @@ class Expr : public ValueStmt {
     return static_cast(ExprBits.Dependent);
   }
 
-  /// isValueDependent - Determines whether this expression is
-  /// value-dependent (C++ [temp.dep.constexpr]). For example, the
-  /// array bound of "Chars" in the following example is
+  /// Determines whether the value of this expression depends on
+  ///   - a template parameter (C++ [temp.dep.constexpr])
+  ///   - or an error, whose resolution is unknown
+  ///
+  /// For example, the array bound of "Chars" in the following example is
   /// value-dependent.
   /// @code
   /// template struct meta_string;
@@ -168,10 +170,12 @@ class Expr : public ValueStmt {
     return static_cast(getDependence() & ExprDependence::Value);
   }
 
-  /// isTypeDependent - Determines whether this expression is
-  /// type-dependent (C++ [temp.dep.expr]), which means that its type
-  /// could change from one template instantiation to the next. For
-  /// example, the expressions "x" and "x + y" are type-dependent in
+  /// Determines whether the type of this expression depends on
+  ///   - a template paramter (C++ [temp.dep.expr], which means that its type
+  ///     could change from one template instantiation to the next)
+  ///   - or an error
+  ///
+  /// For example, the expressions "x" and "x + y" are type-dependent in
   /// the following code, but "y" is not type-dependent:
   /// @code
   /// template
@@ -184,8 +188,10 @@ class Expr : public ValueStmt {
   }
 
   /// Whether this expression is instantiation-dependent, meaning that
-  /// it depends in some way on a template parameter, even if neither its type
-  /// nor (constant) value can change due to the template instantiation.
+  /// it depends in some way on
+  ///    - a template parameter (even if neither its type nor (constant) value
+  ///      can change due to the template instantiation)
+  ///    - or an error
   ///
   /// In the following example, the expression \c sizeof(sizeof(T() + T())) is
   /// instantiation-dependent (since it involves a template parameter \c T), but
@@ -200,6 +206,12 @@ class Expr : public ValueStmt {
   /// }
   /// \endcode
   ///
+  /// \code
+  /// void func(int) {
+  ///   func(); // the expression is instantiation-dependent, because it depends
+  ///           // on an error.
+  /// }
+  /// \endcode
   bool isInstantiationDependent() const {
     return static_cast(getDependence() & ExprDependence::Instantiation);
   }
@@ -6212,19 +6224,25 @@ class TypoExpr : public Expr {
 /// subexpressions of some expression that we could not construct and source
 /// range covered by the expression.
 ///
-/// By default, RecoveryExpr is type-, value- and instantiation-dependent to
-/// take advantage of existing machinery to deal with dependent code in C++,
-/// e.g. RecoveryExpr is preserved in `decltype()` as part of the
-/// `DependentDecltypeType`. In addition to that, clang does not report most
-/// errors on dependent expressions, so we get rid of bogus errors for free.
-/// However, note that unlike other dependent expressions, RecoveryExpr can be
-/// produced in non-template contexts.
-/// In addition, we will preserve the type in RecoveryExpr when the type is
-/// known, e.g. preserving the return type for a broken non-overloaded function
-/// call, a overloaded call where all candidates have the same return type.
+/// By default, RecoveryExpr uses dependence-bits to take advantage of existing
+/// machinery to deal with dependent code in C++, e.g. RecoveryExpr is preserved
+/// in `decltype()` as part of the `DependentDecltypeType`. In
+/// addition to that, clang does not report most errors on dependent
+/// expressions, so we get rid of bogus errors for free. However, note that
+/// unlike other dependent expressions, RecoveryExpr can be produced in
+/// non-template contexts.
+///
+/// We will preserve the type in RecoveryExpr when the type is known, e.g.
+/// preserving the return type for a broken non-overloaded function call, a
+/// overloaded call where all candidates have the same return type. In this
+/// case, the expression is not type-dependent (unless the known type is itself
+/// dependent)
 ///
 /// One can also reliably suppress all bogus errors on expressions containing
 /// recovery expressions by examining results of Expr::containsErrors().
+///
+/// FIXME: RecoveryExpr is currently generated by default in C++ mode only, as
+/// dependence isn't handled properly on several C-only codepaths.
 class RecoveryExpr final : public Expr,
                            private llvm::TrailingObjects {
 public:
diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h
index 178f4db770618..6f0b68479b9d7 100644
--- a/clang/include/clang/AST/ExprCXX.h
+++ b/clang/include/clang/AST/ExprCXX.h
@@ -1931,6 +1931,7 @@ class LambdaExpr final : public Expr,
 
   /// Const iterator that walks over the capture initialization
   /// arguments.
+  /// FIXME: This interface is prone to being used incorrectly.
   using const_capture_init_iterator = Expr *const *;
 
   /// Retrieve the initialization expressions for this lambda's captures.
diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index 6de7b6deb5149..291eeb942b071 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -4820,6 +4820,11 @@ class OMPMappableExprListClause : public OMPVarListClause,
   /// Total number of components in this clause.
   unsigned NumComponents;
 
+  /// Whether this clause is possible to have user-defined mappers associated.
+  /// It should be true for map, to, and from clauses, and false for
+  /// use_device_ptr and is_device_ptr.
+  const bool SupportsMapper;
+
   /// C++ nested name specifier for the associated user-defined mapper.
   NestedNameSpecifierLoc MapperQualifierLoc;
 
@@ -4840,19 +4845,21 @@ class OMPMappableExprListClause : public OMPVarListClause,
   /// NumUniqueDeclarations: number of unique base declarations in this clause;
   /// 3) NumComponentLists: number of component lists in this clause; and 4)
   /// NumComponents: total number of expression components in the clause.
+  /// \param SupportsMapper Indicates whether this clause is possible to have
+  /// user-defined mappers associated.
   /// \param MapperQualifierLocPtr C++ nested name specifier for the associated
   /// user-defined mapper.
   /// \param MapperIdInfoPtr The identifier of associated user-defined mapper.
   OMPMappableExprListClause(
       OpenMPClauseKind K, const OMPVarListLocTy &Locs,
-      const OMPMappableExprListSizeTy &Sizes,
+      const OMPMappableExprListSizeTy &Sizes, bool SupportsMapper = false,
       NestedNameSpecifierLoc *MapperQualifierLocPtr = nullptr,
       DeclarationNameInfo *MapperIdInfoPtr = nullptr)
       : OMPVarListClause(K, Locs.StartLoc, Locs.LParenLoc, Locs.EndLoc,
                             Sizes.NumVars),
         NumUniqueDeclarations(Sizes.NumUniqueDeclarations),
         NumComponentLists(Sizes.NumComponentLists),
-        NumComponents(Sizes.NumComponents) {
+        NumComponents(Sizes.NumComponents), SupportsMapper(SupportsMapper) {
     if (MapperQualifierLocPtr)
       MapperQualifierLoc = *MapperQualifierLocPtr;
     if (MapperIdInfoPtr)
@@ -5051,6 +5058,8 @@ class OMPMappableExprListClause : public OMPVarListClause,
   /// Get the user-defined mapper references that are in the trailing objects of
   /// the class.
   MutableArrayRef getUDMapperRefs() {
+    assert(SupportsMapper &&
+           "Must be a clause that is possible to have user-defined mappers");
     return llvm::makeMutableArrayRef(
         static_cast(this)->template getTrailingObjects() +
             OMPVarListClause::varlist_size(),
@@ -5060,8 +5069,10 @@ class OMPMappableExprListClause : public OMPVarListClause,
   /// Get the user-defined mappers references that are in the trailing objects
   /// of the class.
   ArrayRef getUDMapperRefs() const {
+    assert(SupportsMapper &&
+           "Must be a clause that is possible to have user-defined mappers");
     return llvm::makeArrayRef(
-        static_cast(this)->template getTrailingObjects() +
+        static_cast(this)->template getTrailingObjects() +
             OMPVarListClause::varlist_size(),
         OMPVarListClause::varlist_size());
   }
@@ -5071,6 +5082,8 @@ class OMPMappableExprListClause : public OMPVarListClause,
   void setUDMapperRefs(ArrayRef DMDs) {
     assert(DMDs.size() == OMPVarListClause::varlist_size() &&
            "Unexpected number of user-defined mappers.");
+    assert(SupportsMapper &&
+           "Must be a clause that is possible to have user-defined mappers");
     std::copy(DMDs.begin(), DMDs.end(), getUDMapperRefs().begin());
   }
 
@@ -5107,6 +5120,12 @@ class OMPMappableExprListClause : public OMPVarListClause,
     // The list number associated with the current declaration.
     ArrayRef::iterator NumListsCur;
 
+    // Whether this clause is possible to have user-defined mappers associated.
+    const bool SupportsMapper;
+
+    // The user-defined mapper associated with the current declaration.
+    ArrayRef::iterator MapperCur;
+
     // Remaining lists for the current declaration.
     unsigned RemainingLists = 0;
 
@@ -5127,16 +5146,20 @@ class OMPMappableExprListClause : public OMPVarListClause,
     explicit const_component_lists_iterator(
         ArrayRef UniqueDecls, ArrayRef DeclsListNum,
         ArrayRef CumulativeListSizes,
-        MappableExprComponentListRef Components)
+        MappableExprComponentListRef Components, bool SupportsMapper,
+        ArrayRef Mappers)
         : const_component_lists_iterator::iterator_adaptor_base(
               Components.begin()),
           DeclCur(UniqueDecls.begin()), NumListsCur(DeclsListNum.begin()),
+          SupportsMapper(SupportsMapper),
           ListSizeCur(CumulativeListSizes.begin()),
           ListSizeEnd(CumulativeListSizes.end()), End(Components.end()) {
       assert(UniqueDecls.size() == DeclsListNum.size() &&
              "Inconsistent number of declarations and list sizes!");
       if (!DeclsListNum.empty())
         RemainingLists = *NumListsCur;
+      if (SupportsMapper)
+        MapperCur = Mappers.begin();
     }
 
     /// Construct an iterator that scan lists for a given declaration \a
@@ -5144,9 +5167,11 @@ class OMPMappableExprListClause : public OMPVarListClause,
     explicit const_component_lists_iterator(
         const ValueDecl *Declaration, ArrayRef UniqueDecls,
         ArrayRef DeclsListNum, ArrayRef CumulativeListSizes,
-        MappableExprComponentListRef Components)
+        MappableExprComponentListRef Components, bool SupportsMapper,
+        ArrayRef Mappers)
         : const_component_lists_iterator(UniqueDecls, DeclsListNum,
-                                         CumulativeListSizes, Components) {
+                                         CumulativeListSizes, Components,
+                                         SupportsMapper, Mappers) {
       // Look for the desired declaration. While we are looking for it, we
       // update the state so that we know the component where a given list
       // starts.
@@ -5161,6 +5186,9 @@ class OMPMappableExprListClause : public OMPVarListClause,
         std::advance(ListSizeCur, *NumListsCur - 1);
         PrevListSize = *ListSizeCur;
         ++ListSizeCur;
+
+        if (SupportsMapper)
+          ++MapperCur;
       }
 
       // If we didn't find any declaration, advance the iterator to after the
@@ -5186,14 +5214,20 @@ class OMPMappableExprListClause : public OMPVarListClause,
 
     // Return the array with the current list. The sizes are cumulative, so the
     // array size is the difference between the current size and previous one.
-    std::pair
+    std::tuple
     operator*() const {
       assert(ListSizeCur != ListSizeEnd && "Invalid iterator!");
-      return std::make_pair(
+      const ValueDecl *Mapper = nullptr;
+      if (SupportsMapper && *MapperCur)
+        Mapper = cast(cast(*MapperCur)->getDecl());
+      return std::make_tuple(
           *DeclCur,
-          MappableExprComponentListRef(&*this->I, *ListSizeCur - PrevListSize));
+          MappableExprComponentListRef(&*this->I, *ListSizeCur - PrevListSize),
+          Mapper);
     }
-    std::pair
+    std::tuple
     operator->() const {
       return **this;
     }
@@ -5216,6 +5250,8 @@ class OMPMappableExprListClause : public OMPVarListClause,
         if (!(--RemainingLists)) {
           ++DeclCur;
           ++NumListsCur;
+          if (SupportsMapper)
+            ++MapperCur;
           RemainingLists = *NumListsCur;
           assert(RemainingLists && "No lists in the following declaration??");
         }
@@ -5233,13 +5269,15 @@ class OMPMappableExprListClause : public OMPVarListClause,
   const_component_lists_iterator component_lists_begin() const {
     return const_component_lists_iterator(
         getUniqueDeclsRef(), getDeclNumListsRef(), getComponentListSizesRef(),
-        getComponentsRef());
+        getComponentsRef(), SupportsMapper,
+        SupportsMapper ? getUDMapperRefs() : llvm::None);
   }
   const_component_lists_iterator component_lists_end() const {
     return const_component_lists_iterator(
         ArrayRef(), ArrayRef(), ArrayRef(),
         MappableExprComponentListRef(getComponentsRef().end(),
-                                     getComponentsRef().end()));
+                                     getComponentsRef().end()),
+        SupportsMapper, llvm::None);
   }
   const_component_lists_range component_lists() const {
     return {component_lists_begin(), component_lists_end()};
@@ -5251,7 +5289,8 @@ class OMPMappableExprListClause : public OMPVarListClause,
   decl_component_lists_begin(const ValueDecl *VD) const {
     return const_component_lists_iterator(
         VD, getUniqueDeclsRef(), getDeclNumListsRef(),
-        getComponentListSizesRef(), getComponentsRef());
+        getComponentListSizesRef(), getComponentsRef(), SupportsMapper,
+        SupportsMapper ? getUDMapperRefs() : llvm::None);
   }
   const_component_lists_iterator decl_component_lists_end() const {
     return component_lists_end();
@@ -5399,7 +5438,8 @@ class OMPMapClause final : public OMPMappableExprListClause,
                         SourceLocation MapLoc, const OMPVarListLocTy &Locs,
                         const OMPMappableExprListSizeTy &Sizes)
       : OMPMappableExprListClause(llvm::omp::OMPC_map, Locs, Sizes,
-                                  &MapperQualifierLoc, &MapperIdInfo),
+                                  /*SupportsMapper=*/true, &MapperQualifierLoc,
+                                  &MapperIdInfo),
         MapType(MapType), MapTypeIsImplicit(MapTypeIsImplicit), MapLoc(MapLoc) {
     assert(llvm::array_lengthof(MapTypeModifiers) == MapModifiers.size() &&
            "Unexpected number of map type modifiers.");
@@ -5419,8 +5459,8 @@ class OMPMapClause final : public OMPMappableExprListClause,
   /// 3) NumComponentLists: number of component lists in this clause; and 4)
   /// NumComponents: total number of expression components in the clause.
   explicit OMPMapClause(const OMPMappableExprListSizeTy &Sizes)
-      : OMPMappableExprListClause(llvm::omp::OMPC_map, OMPVarListLocTy(),
-                                  Sizes) {}
+      : OMPMappableExprListClause(llvm::omp::OMPC_map, OMPVarListLocTy(), Sizes,
+                                  /*SupportsMapper=*/true) {}
 
   /// Set map-type-modifier for the clause.
   ///
@@ -6307,7 +6347,8 @@ class OMPToClause final : public OMPMappableExprListClause,
                        const OMPVarListLocTy &Locs,
                        const OMPMappableExprListSizeTy &Sizes)
       : OMPMappableExprListClause(llvm::omp::OMPC_to, Locs, Sizes,
-                                  &MapperQualifierLoc, &MapperIdInfo) {}
+                                  /*SupportsMapper=*/true, &MapperQualifierLoc,
+                                  &MapperIdInfo) {}
 
   /// Build an empty clause.
   ///
@@ -6317,8 +6358,8 @@ class OMPToClause final : public OMPMappableExprListClause,
   /// 3) NumComponentLists: number of component lists in this clause; and 4)
   /// NumComponents: total number of expression components in the clause.
   explicit OMPToClause(const OMPMappableExprListSizeTy &Sizes)
-      : OMPMappableExprListClause(llvm::omp::OMPC_to, OMPVarListLocTy(),
-                                  Sizes) {}
+      : OMPMappableExprListClause(llvm::omp::OMPC_to, OMPVarListLocTy(), Sizes,
+                                  /*SupportsMapper=*/true) {}
 
   /// Define the sizes of each trailing object array except the last one. This
   /// is required for TrailingObjects to work properly.
@@ -6426,7 +6467,8 @@ class OMPFromClause final
                          const OMPVarListLocTy &Locs,
                          const OMPMappableExprListSizeTy &Sizes)
       : OMPMappableExprListClause(llvm::omp::OMPC_from, Locs, Sizes,
-                                  &MapperQualifierLoc, &MapperIdInfo) {}
+                                  /*SupportsMapper=*/true, &MapperQualifierLoc,
+                                  &MapperIdInfo) {}
 
   /// Build an empty clause.
   ///
@@ -6437,7 +6479,7 @@ class OMPFromClause final
   /// NumComponents: total number of expression components in the clause.
   explicit OMPFromClause(const OMPMappableExprListSizeTy &Sizes)
       : OMPMappableExprListClause(llvm::omp::OMPC_from, OMPVarListLocTy(),
-                                  Sizes) {}
+                                  Sizes, /*SupportsMapper=*/true) {}
 
   /// Define the sizes of each trailing object array except the last one. This
   /// is required for TrailingObjects to work properly.
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 1aaa127760990..fe88753c0063d 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -1938,6 +1938,11 @@ class alignas(8) Type : public ExtQualsTypeCommonBase {
   bool isSizelessType() const;
   bool isSizelessBuiltinType() const;
 
+  /// Determines if this is a sizeless type supported by the
+  /// 'arm_sve_vector_bits' type attribute, which can be applied to a single
+  /// SVE vector or predicate, excluding tuple types such as svint32x4_t.
+  bool isVLSTBuiltinType() const;
+
   /// Types are partitioned into 3 broad categories (C99 6.2.5p1):
   /// object types, function types, and incomplete types.
 
diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index f16fb876cdd38..643419743a119 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -7190,10 +7190,12 @@ AST_MATCHER_P(OMPExecutableDirective, hasAnyClause,
 /// \code
 ///   #pragma omp parallel default(none)
 ///   #pragma omp parallel default(shared)
+///   #pragma omp parallel default(firstprivate)
 ///   #pragma omp parallel
 /// \endcode
 ///
-/// ``ompDefaultClause()`` matches ``default(none)`` and ``default(shared)``.
+/// ``ompDefaultClause()`` matches ``default(none)``, ``default(shared)``, and
+/// ``default(firstprivate)``
 extern const internal::VariadicDynCastAllOfMatcher
     ompDefaultClause;
 
@@ -7205,6 +7207,7 @@ extern const internal::VariadicDynCastAllOfMatcher
 ///   #pragma omp parallel
 ///   #pragma omp parallel default(none)
 ///   #pragma omp parallel default(shared)
+///   #pragma omp parallel default(firstprivate)
 /// \endcode
 ///
 /// ``ompDefaultClause(isNoneKind())`` matches only ``default(none)``.
@@ -7220,6 +7223,7 @@ AST_MATCHER(OMPDefaultClause, isNoneKind) {
 ///   #pragma omp parallel
 ///   #pragma omp parallel default(none)
 ///   #pragma omp parallel default(shared)
+///   #pragma omp parallel default(firstprivate)
 /// \endcode
 ///
 /// ``ompDefaultClause(isSharedKind())`` matches only ``default(shared)``.
@@ -7227,6 +7231,24 @@ AST_MATCHER(OMPDefaultClause, isSharedKind) {
   return Node.getDefaultKind() == llvm::omp::OMP_DEFAULT_shared;
 }
 
+/// Matches if the OpenMP ``default`` clause has ``firstprivate`` kind
+/// specified.
+///
+/// Given
+///
+/// \code
+///   #pragma omp parallel
+///   #pragma omp parallel default(none)
+///   #pragma omp parallel default(shared)
+///   #pragma omp parallel default(firstprivate)
+/// \endcode
+///
+/// ``ompDefaultClause(isFirstPrivateKind())`` matches only
+/// ``default(firstprivate)``.
+AST_MATCHER(OMPDefaultClause, isFirstPrivateKind) {
+  return Node.getDefaultKind() == llvm::omp::OMP_DEFAULT_firstprivate;
+}
+
 /// Matches if the OpenMP directive is allowed to contain the specified OpenMP
 /// clause kind.
 ///
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 5a607dbc2396d..cfa980f14b67a 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -2022,6 +2022,12 @@ def NeonVectorType : TypeAttr {
   let ASTNode = 0;
 }
 
+def ArmSveVectorBits : TypeAttr {
+  let Spellings = [GNU<"arm_sve_vector_bits">];
+  let Args = [IntArgument<"NumBits">];
+  let Documentation = [ArmSveVectorBitsDocs];
+}
+
 def ArmMveStrictPolymorphism : TypeAttr, TargetSpecificAttr {
   let Spellings = [Clang<"__clang_arm_mve_strict_polymorphism">];
   let Documentation = [ArmMveStrictPolymorphismDocs];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index faf956c13f3e8..0812e3476d5d5 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -5329,6 +5329,43 @@ close the handle. It is also assumed to require an open handle to work with.
   }];
 }
 
+def ArmSveVectorBitsDocs : Documentation {
+  let Category = DocCatType;
+  let Content = [{
+The ``arm_sve_vector_bits(N)`` attribute is defined by the Arm C Language
+Extensions (ACLE) for SVE. It is used to define fixed-length (VLST) variants of
+sizeless types (VLAT).
+
+For example:
+
+.. code-block:: c
+
+  #include 
+
+  #if __ARM_FEATURE_SVE_BITS==512
+  typedef svint32_t fixed_svint32_t __attribute__((arm_sve_vector_bits(512)));
+  #endif
+
+Creates a type ``fixed_svint32_t`` that is a fixed-length variant of
+``svint32_t`` that contains exactly 512-bits. Unlike ``svint32_t``, this type
+can be used in globals, structs, unions, and arrays, all of which are
+unsupported for sizeless types.
+
+The attribute can be attached to a single SVE vector (such as ``svint32_t``) or
+to the SVE predicate type ``svbool_t``, this excludes tuple types such as
+``svint32x4_t``. The behavior of the attribute is undefined unless
+``N==__ARM_FEATURE_SVE_BITS``, the implementation defined feature macro that is
+enabled under the ``-msve-vector-bits`` flag.
+
+NOTE: This feature is currently WIP, the ``-msve-vector-bits=`` flag defines
+the ``__ARM_FEATURE_SVE_BITS_EXPERIMENTAL`` macro. This feature is complete
+when experimental is dropped.
+
+For more information See `Arm C Language Extensions for SVE
+`_ for more information.
+}];
+}
+
 def ArmMveStrictPolymorphismDocs : Documentation {
     let Category = DocCatType;
     let Content = [{
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index d0df5fcd15523..5d445c253a855 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -321,12 +321,12 @@ BUILTIN(__builtin_altivec_vsldbi, "V16UcV16UcV16UcIi", "")
 BUILTIN(__builtin_altivec_vsrdbi, "V16UcV16UcV16UcIi", "")
 
 // P10 Vector Insert built-ins.
-BUILTIN(__builtin_altivec_vinsblx, "V16UcV16UcULLiULLi", "")
-BUILTIN(__builtin_altivec_vinsbrx, "V16UcV16UcULLiULLi", "")
-BUILTIN(__builtin_altivec_vinshlx, "V8UsV8UsULLiULLi", "")
-BUILTIN(__builtin_altivec_vinshrx, "V8UsV8UsULLiULLi", "")
-BUILTIN(__builtin_altivec_vinswlx, "V4UiV4UiULLiULLi", "")
-BUILTIN(__builtin_altivec_vinswrx, "V4UiV4UiULLiULLi", "")
+BUILTIN(__builtin_altivec_vinsblx, "V16UcV16UcUiUi", "")
+BUILTIN(__builtin_altivec_vinsbrx, "V16UcV16UcUiUi", "")
+BUILTIN(__builtin_altivec_vinshlx, "V8UsV8UsUiUi", "")
+BUILTIN(__builtin_altivec_vinshrx, "V8UsV8UsUiUi", "")
+BUILTIN(__builtin_altivec_vinswlx, "V4UiV4UiUiUi", "")
+BUILTIN(__builtin_altivec_vinswrx, "V4UiV4UiUiUi", "")
 BUILTIN(__builtin_altivec_vinsdlx, "V2ULLiV2ULLiULLiULLi", "")
 BUILTIN(__builtin_altivec_vinsdrx, "V2ULLiV2ULLiULLiULLi", "")
 BUILTIN(__builtin_altivec_vinsbvlx, "V16UcV16UcULLiV16Uc", "")
@@ -467,6 +467,8 @@ BUILTIN(__builtin_vsx_xxsldwi, "v.", "t")
 
 BUILTIN(__builtin_vsx_xxeval, "V2ULLiV2ULLiV2ULLiV2ULLiIi", "")
 
+BUILTIN(__builtin_vsx_xvtlsbb, "iV16Ucb", "")
+
 // P10 Vector Permute Extended built-in.
 BUILTIN(__builtin_vsx_xxpermx, "V16UcV16UcV16UcV16UcIi", "")
 
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 142edb8ab3857..ccfa91cb7f2b0 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -533,4 +533,7 @@ def warn_drv_libstdcxx_not_found : Warning<
 def err_drv_cannot_mix_options : Error<"cannot specify '%1' along with '%0'">;
 
 def err_drv_invalid_object_mode : Error<"OBJECT_MODE setting %0 is not recognized and is not a valid setting.">;
+
+def err_drv_invalid_sve_vector_bits : Error<
+  "'-msve-vector-bits' is not supported without SVE enabled">;
 }
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 6becf7cda1626..192a62b0035b4 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -280,9 +280,12 @@ def CXX98CompatPedantic : DiagGroup<"c++98-compat-pedantic",
 
 def CXX11Narrowing : DiagGroup<"c++11-narrowing">;
 
-def CXX11WarnOverrideDestructor :
+def CXX11WarnInconsistentOverrideDestructor :
   DiagGroup<"inconsistent-missing-destructor-override">;
-def CXX11WarnOverrideMethod : DiagGroup<"inconsistent-missing-override">;
+def CXX11WarnInconsistentOverrideMethod :
+  DiagGroup<"inconsistent-missing-override">;
+def CXX11WarnSuggestOverrideDestructor : DiagGroup<"suggest-destructor-override">;
+def CXX11WarnSuggestOverride : DiagGroup<"suggest-override">;
 
 // Original name of this warning in Clang
 def : DiagGroup<"c++0x-narrowing", [CXX11Narrowing]>;
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 4c314f90e270e..3ed6518d23aca 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -934,6 +934,8 @@ def err_lambda_capture_misplaced_ellipsis : Error<
   "the name of the capture">;
 def err_lambda_capture_multiple_ellipses : Error<
   "multiple ellipses in pack capture">;
+def err_capture_default_first : Error<
+  "capture default must be first">;
 // C++17 lambda expressions
 def err_expected_star_this_capture : Error<
   "expected 'this' following '*' in lambda capture list">;
@@ -1334,6 +1336,8 @@ def warn_omp_more_one_device_type_clause
       InGroup;
 def err_omp_variant_ctx_second_match_extension : Error<
   "only a single match extension allowed per OpenMP context selector">;
+def err_omp_invalid_dsa: Error<
+  "data-sharing attribute '%0' in '%1' clause requires OpenMP version %2 or above">;
 
 // Pragma loop support.
 def err_pragma_loop_missing_argument : Error<
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 9157e7e9d2442..9c4fc3ab93b59 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2398,12 +2398,22 @@ def override_keyword_hides_virtual_member_function : Error<
   "%select{function|functions}1">;
 def err_function_marked_override_not_overriding : Error<
   "%0 marked 'override' but does not override any member functions">;
-def warn_destructor_marked_not_override_overriding : Warning <
-  "%0 overrides a destructor but is not marked 'override'">,
-  InGroup, DefaultIgnore;
-def warn_function_marked_not_override_overriding : Warning <
-  "%0 overrides a member function but is not marked 'override'">,
-  InGroup;
+def warn_destructor_marked_not_override_overriding : TextSubstitution <
+  "%0 overrides a destructor but is not marked 'override'">;
+def warn_function_marked_not_override_overriding : TextSubstitution <
+  "%0 overrides a member function but is not marked 'override'">;
+def warn_inconsistent_destructor_marked_not_override_overriding : Warning <
+  "%sub{warn_destructor_marked_not_override_overriding}0">,
+  InGroup, DefaultIgnore;
+def warn_inconsistent_function_marked_not_override_overriding : Warning <
+  "%sub{warn_function_marked_not_override_overriding}0">,
+  InGroup;
+def warn_suggest_destructor_marked_not_override_overriding : Warning <
+  "%sub{warn_destructor_marked_not_override_overriding}0">,
+  InGroup, DefaultIgnore;
+def warn_suggest_function_marked_not_override_overriding : Warning <
+  "%sub{warn_function_marked_not_override_overriding}0">,
+  InGroup, DefaultIgnore;
 def err_class_marked_final_used_as_base : Error<
   "base %0 is marked '%select{final|sealed}1'">;
 def warn_abstract_final_class : Warning<
@@ -2837,6 +2847,13 @@ def err_attribute_invalid_vector_type : Error<"invalid vector element type %0">;
 def err_attribute_invalid_matrix_type : Error<"invalid matrix element type %0">;
 def err_attribute_bad_neon_vector_size : Error<
   "Neon vector size must be 64 or 128 bits">;
+def err_attribute_invalid_sve_type : Error<
+  "%0 attribute applied to non-SVE type %1">;
+def err_attribute_bad_sve_vector_size : Error<
+  "invalid SVE vector size '%0', must match value set by "
+  "'-msve-vector-bits' ('%1')">;
+def err_attribute_arm_feature_sve_bits_unsupported : Error<
+  "%0 is not supported when '-msve-vector-bits=' is not specified">;
 def err_attribute_requires_positive_integer : Error<
   "%0 attribute requires a %select{positive|non-negative}1 "
   "integral compile time constant expression">;
@@ -7970,6 +7987,8 @@ def err_atomic_builtin_pointer_size : Error<
 def err_atomic_exclusive_builtin_pointer_size : Error<
   "address argument to load or store exclusive builtin must be a pointer to"
   " 1,2,4 or 8 byte type (%0 invalid)">;
+def err_atomic_builtin_ext_int_size : Error<
+  "Atomic memory operand must have a power-of-two size">;
 def err_atomic_op_needs_atomic : Error<
   "address argument to atomic operation must be a pointer to _Atomic "
   "type (%0 invalid)">;
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 67153c36d10f5..55ee5c7aabbd6 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -388,6 +388,8 @@ LANGOPT(SpeculativeLoadHardening, 1, 0, "Speculative load hardening enabled")
 LANGOPT(RelativeCXXABIVTables, 1, 0,
         "Use an ABI-incompatible v-table layout that uses relative references")
 
+LANGOPT(ArmSveVectorBits, 32, 0, "SVE vector size in bits")
+
 #undef LANGOPT
 #undef COMPATIBLE_LANGOPT
 #undef BENIGN_LANGOPT
diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def
index 0037cc2146f26..2912bdd44b2db 100644
--- a/clang/include/clang/Basic/Sanitizers.def
+++ b/clang/include/clang/Basic/Sanitizers.def
@@ -156,6 +156,8 @@ SANITIZER_GROUP("implicit-integer-arithmetic-value-change",
                 ImplicitIntegerArithmeticValueChange,
                 ImplicitIntegerSignChange | ImplicitSignedIntegerTruncation)
 
+SANITIZER("objc-cast", ObjCCast)
+
 // FIXME:
 //SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion,
 //                ImplicitIntegerArithmeticValueChange |
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index eadc77e0373e6..b4e7cbf987c9c 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2253,6 +2253,7 @@ def municode : Joined<["-"], "municode">, Group, Flags<[DriverOption]>;
 def mthreads : Joined<["-"], "mthreads">, Group, Flags<[DriverOption]>;
 def mcpu_EQ : Joined<["-"], "mcpu=">, Group;
 def mmcu_EQ : Joined<["-"], "mmcu=">, Group;
+def msim : Flag<["-"], "msim">, Group;
 def mdynamic_no_pic : Joined<["-"], "mdynamic-no-pic">, Group;
 def mfix_and_continue : Flag<["-"], "mfix-and-continue">, Group;
 def mieee_fp : Flag<["-"], "mieee-fp">, Group;
@@ -2338,9 +2339,9 @@ def m_seses : Flag<["-"], "mseses">, Group, Flags<[CoreOption, DriverOp
 def mno_seses : Flag<["-"], "mno-seses">, Group, Flags<[CoreOption, DriverOption]>,
   HelpText<"Disable speculative execution side effect suppression (SESES)">;
 
-def mrelax : Flag<["-"], "mrelax">, Group,
+def mrelax : Flag<["-"], "mrelax">, Group,
   HelpText<"Enable linker relaxation">;
-def mno_relax : Flag<["-"], "mno-relax">, Group,
+def mno_relax : Flag<["-"], "mno-relax">, Group,
   HelpText<"Disable linker relaxation">;
 def msmall_data_limit_EQ : Joined<["-"], "msmall-data-limit=">, Group,
   Alias,
@@ -2403,6 +2404,11 @@ foreach i = {8-15,18} in
   def fcall_saved_x#i : Flag<["-"], "fcall-saved-x"#i>, Group,
     HelpText<"Make the x"#i#" register call-saved (AArch64 only)">;
 
+def msve_vector_bits_EQ : Joined<["-"], "msve-vector-bits=">,
+  Group, Flags<[DriverOption,CC1Option]>,
+  HelpText<"Set the size of fixed-length SVE vectors in bits.">,
+  Values<"128,256,512,1024,2048">;
+
 def msign_return_address_EQ : Joined<["-"], "msign-return-address=">,
   Flags<[CC1Option]>, Group, Values<"none,all,non-leaf">,
   HelpText<"Select return address signing scope">;
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 3549ec9eee0e5..7201c11f1158e 100755
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -1425,15 +1425,20 @@ struct FormatStyle {
   /// For example: TESTSUITE
   std::vector NamespaceMacros;
 
-  /// A vector of macros which are whitespace-sensitive and shouldn't be
-  /// touched.
+  /// A vector of macros which are whitespace-sensitive and should not
+  /// be touched.
   ///
   /// These are expected to be macros of the form:
   /// \code
   ///   STRINGIZE(...)
   /// \endcode
   ///
-  /// For example: STRINGIZE
+  /// In the .clang-format configuration file, this can be configured like:
+  /// \code{.yaml}
+  ///   WhitespaceSensitiveMacros: ['STRINGIZE', 'PP_STRINGIZE']
+  /// \endcode
+  ///
+  /// For example: BOOST_PP_STRINGIZE
   std::vector WhitespaceSensitiveMacros;
 
   tooling::IncludeStyle IncludeStyle;
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index ff03b2d8207b4..3c7396a5fd5d8 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -7120,7 +7120,7 @@ class Sema final {
 
   /// DiagnoseAbsenceOfOverrideControl - Diagnose if 'override' keyword was
   /// not used in the declaration of an overriding method.
-  void DiagnoseAbsenceOfOverrideControl(NamedDecl *D);
+  void DiagnoseAbsenceOfOverrideControl(NamedDecl *D, bool Inconsistent);
 
   /// CheckForFunctionMarkedFinal - Checks whether a virtual member function
   /// overrides a virtual member function marked 'final', according to
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h
index d75f9f63286db..a2a98c558a4b7 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h
@@ -554,7 +554,7 @@ class SimpleFunctionCall : public AnyFunctionCall {
 
 /// Represents a call to a block.
 ///
-/// Example: ^{ /* ... */ }()
+/// Example: ^{ statement-body }()
 class BlockCall : public CallEvent {
   friend class CallEventManager;
 
diff --git a/clang/include/clang/Testing/TestClangConfig.h b/clang/include/clang/Testing/TestClangConfig.h
index eefa36dc2ebb9..5d6be4f65d0ad 100644
--- a/clang/include/clang/Testing/TestClangConfig.h
+++ b/clang/include/clang/Testing/TestClangConfig.h
@@ -51,6 +51,8 @@ struct TestClangConfig {
     return Language == Lang_CXX17 || Language == Lang_CXX20;
   }
 
+  bool isCXX20OrLater() const { return Language == Lang_CXX20; }
+
   bool supportsCXXDynamicExceptionSpecification() const {
     return Language == Lang_CXX03 || Language == Lang_CXX11 ||
            Language == Lang_CXX14;
diff --git a/clang/lib/ARCMigrate/CMakeLists.txt b/clang/lib/ARCMigrate/CMakeLists.txt
index 6f19bea476daa..1d5a185c3b6a7 100644
--- a/clang/lib/ARCMigrate/CMakeLists.txt
+++ b/clang/lib/ARCMigrate/CMakeLists.txt
@@ -2,6 +2,12 @@ set(LLVM_LINK_COMPONENTS
   Support
   )
 
+# By default MSVC has a 2^16 limit on the number of sections in an object
+# file, and Transforms.cpp needs more than that.
+if (MSVC)
+  set_source_files_properties(Transforms.cpp PROPERTIES COMPILE_FLAGS /bigobj)
+endif()
+
 add_clang_library(clangARCMigrate
   ARCMT.cpp
   ARCMTActions.cpp
diff --git a/clang/lib/AST/ComputeDependence.cpp b/clang/lib/AST/ComputeDependence.cpp
index 53c43b194b38c..2333993dbeb40 100644
--- a/clang/lib/AST/ComputeDependence.cpp
+++ b/clang/lib/AST/ComputeDependence.cpp
@@ -495,13 +495,16 @@ ExprDependence clang::computeDependence(DeclRefExpr *E, const ASTContext &Ctx) {
 }
 
 ExprDependence clang::computeDependence(RecoveryExpr *E) {
-  // Mark the expression as value- and instantiation- dependent to reuse
-  // existing suppressions for dependent code, e.g. avoiding
-  // constant-evaluation.
-  // FIXME: drop type+value+instantiation once Error is sufficient to suppress
-  // bogus dianostics.
+  // RecoveryExpr is
+  //   - always value-dependent, and therefore instantiation dependent
+  //   - contains errors (ExprDependence::Error), by definition
+  //   - type-dependent if we don't know the type (fallback to an opaque
+  //     dependent type), or the type is known and dependent, or it has
+  //     type-dependent subexpressions.
   auto D = toExprDependence(E->getType()->getDependence()) |
            ExprDependence::ValueInstantiation | ExprDependence::Error;
+  // FIXME: remove the type-dependent bit from subexpressions, if the
+  // RecoveryExpr has a non-dependent type.
   for (auto *S : E->subExpressions())
     D |= S->getDependence();
   return D;
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 343a271c33944..399e7e13c4459 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -3629,7 +3629,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
   case LambdaExprClass: {
     const LambdaExpr *LE = cast(this);
     for (Expr *E : LE->capture_inits())
-      if (E->HasSideEffects(Ctx, IncludePossibleEffects))
+      if (E && E->HasSideEffects(Ctx, IncludePossibleEffects))
         return true;
     return false;
   }
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 9808a248c95fb..1e313647aa2e5 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -9929,8 +9929,17 @@ namespace {
     bool ZeroInitialization(const Expr *E) {
       const ConstantArrayType *CAT =
           Info.Ctx.getAsConstantArrayType(E->getType());
-      if (!CAT)
+      if (!CAT) {
+        if (E->getType()->isIncompleteArrayType()) {
+          // We can be asked to zero-initialize a flexible array member; this
+          // is represented as an ImplicitValueInitExpr of incomplete array
+          // type. In this case, the array has zero elements.
+          Result = APValue(APValue::UninitArray(), 0, 0);
+          return true;
+        }
+        // FIXME: We could handle VLAs here.
         return Error(E);
+      }
 
       Result = APValue(APValue::UninitArray(), 0,
                        CAT->getSize().getZExtValue());
diff --git a/clang/lib/AST/Interp/InterpFrame.h b/clang/lib/AST/Interp/InterpFrame.h
index b8391b0bcf92c..304e2ad66537b 100644
--- a/clang/lib/AST/Interp/InterpFrame.h
+++ b/clang/lib/AST/Interp/InterpFrame.h
@@ -45,16 +45,16 @@ class InterpFrame final : public Frame {
   void popArgs();
 
   /// Describes the frame with arguments for diagnostic purposes.
-  void describe(llvm::raw_ostream &OS);
+  void describe(llvm::raw_ostream &OS) override;
 
   /// Returns the parent frame object.
-  Frame *getCaller() const;
+  Frame *getCaller() const override;
 
   /// Returns the location of the call to the frame.
-  SourceLocation getCallLocation() const;
+  SourceLocation getCallLocation() const override;
 
   /// Returns the caller.
-  const FunctionDecl *getCallee() const;
+  const FunctionDecl *getCallee() const override;
 
   /// Returns the current function.
   Function *getFunction() const { return Func; }
diff --git a/clang/lib/AST/OSLog.cpp b/clang/lib/AST/OSLog.cpp
index df2f808728cfb..094c0102854b1 100644
--- a/clang/lib/AST/OSLog.cpp
+++ b/clang/lib/AST/OSLog.cpp
@@ -55,9 +55,9 @@ class OSLogFormatStringHandler
     ArgsData.reserve(Args.size());
   }
 
-  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
-                                     const char *StartSpecifier,
-                                     unsigned SpecifierLen) {
+  bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+                             const char *StartSpecifier,
+                             unsigned SpecifierLen) override {
     if (!FS.consumesDataArgument() &&
         FS.getConversionSpecifier().getKind() !=
             clang::analyze_format_string::ConversionSpecifier::PrintErrno)
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index f797f5fe8e6d1..ea160025ae3dc 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -2005,8 +2005,23 @@ void StmtPrinter::VisitLambdaExpr(LambdaExpr *Node) {
     if (C->isPackExpansion())
       OS << "...";
 
-    if (Node->isInitCapture(C))
-      PrintExpr(C->getCapturedVar()->getInit());
+    if (Node->isInitCapture(C)) {
+      VarDecl *D = C->getCapturedVar();
+
+      llvm::StringRef Pre;
+      llvm::StringRef Post;
+      if (D->getInitStyle() == VarDecl::CallInit &&
+          !isa(D->getInit())) {
+        Pre = "(";
+        Post = ")";
+      } else if (D->getInitStyle() == VarDecl::CInit) {
+        Pre = " = ";
+      }
+
+      OS << Pre;
+      PrintExpr(D->getInit());
+      OS << Post;
+    }
   }
   OS << ']';
 
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index db1d8226b770d..baf335ab9e546 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2294,6 +2294,30 @@ bool Type::isSizelessBuiltinType() const {
 
 bool Type::isSizelessType() const { return isSizelessBuiltinType(); }
 
+bool Type::isVLSTBuiltinType() const {
+  if (const BuiltinType *BT = getAs()) {
+    switch (BT->getKind()) {
+    case BuiltinType::SveInt8:
+    case BuiltinType::SveInt16:
+    case BuiltinType::SveInt32:
+    case BuiltinType::SveInt64:
+    case BuiltinType::SveUint8:
+    case BuiltinType::SveUint16:
+    case BuiltinType::SveUint32:
+    case BuiltinType::SveUint64:
+    case BuiltinType::SveFloat16:
+    case BuiltinType::SveFloat32:
+    case BuiltinType::SveFloat64:
+    case BuiltinType::SveBFloat16:
+    case BuiltinType::SveBool:
+      return true;
+    default:
+      return false;
+    }
+  }
+  return false;
+}
+
 bool QualType::isPODType(const ASTContext &Context) const {
   // C++11 has a more relaxed definition of POD.
   if (Context.getLangOpts().CPlusPlus11)
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 0505d53d6f99d..ab82b5d338c6b 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -1651,6 +1651,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
   case attr::ArmMveStrictPolymorphism:
     OS << "__clang_arm_mve_strict_polymorphism";
     break;
+  case attr::ArmSveVectorBits:
+    OS << "arm_sve_vector_bits";
+    break;
   }
   OS << "))";
 }
diff --git a/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp b/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp
index 989ee0fa75cdd..33fb7a92955bb 100644
--- a/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp
+++ b/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp
@@ -120,7 +120,8 @@ static constexpr std::pair
         {"BasicRegex", llvm::Regex::RegexFlags::BasicRegex},
 };
 
-llvm::Optional getRegexFlag(llvm::StringRef Flag) {
+static llvm::Optional
+getRegexFlag(llvm::StringRef Flag) {
   for (const auto &StringFlag : RegexMap) {
     if (Flag == StringFlag.first)
       return StringFlag.second;
@@ -128,7 +129,8 @@ llvm::Optional getRegexFlag(llvm::StringRef Flag) {
   return llvm::None;
 }
 
-llvm::Optional getCloseRegexMatch(llvm::StringRef Flag) {
+static llvm::Optional
+getCloseRegexMatch(llvm::StringRef Flag) {
   for (const auto &StringFlag : RegexMap) {
     if (Flag.edit_distance(StringFlag.first) < 3)
       return StringFlag.first;
diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
index a0a65092a92b4..ec2215804c098 100644
--- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -389,6 +389,7 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(isExpr);
   REGISTER_MATCHER(isExternC);
   REGISTER_MATCHER(isFinal);
+  REGISTER_MATCHER(isFirstPrivateKind);
   REGISTER_MATCHER(isImplicit);
   REGISTER_MATCHER(isInStdNamespace);
   REGISTER_MATCHER(isInTemplateInstantiation);
diff --git a/clang/lib/Analysis/PathDiagnostic.cpp b/clang/lib/Analysis/PathDiagnostic.cpp
index c88e6c1e1535f..9aa3386129d7f 100644
--- a/clang/lib/Analysis/PathDiagnostic.cpp
+++ b/clang/lib/Analysis/PathDiagnostic.cpp
@@ -327,6 +327,10 @@ static Optional comparePath(const PathPieces &X, const PathPieces &Y) {
 }
 
 static bool compareCrossTUSourceLocs(FullSourceLoc XL, FullSourceLoc YL) {
+  if (XL.isInvalid() && YL.isValid())
+    return true;
+  if (XL.isValid() && YL.isInvalid())
+    return false;
   std::pair XOffs = XL.getDecomposedLoc();
   std::pair YOffs = YL.getDecomposedLoc();
   const SourceManager &SM = XL.getManager();
@@ -349,6 +353,10 @@ static bool compare(const PathDiagnostic &X, const PathDiagnostic &Y) {
   FullSourceLoc YL = Y.getLocation().asLocation();
   if (XL != YL)
     return compareCrossTUSourceLocs(XL, YL);
+  FullSourceLoc XUL = X.getUniqueingLoc().asLocation();
+  FullSourceLoc YUL = Y.getUniqueingLoc().asLocation();
+  if (XUL != YUL)
+    return compareCrossTUSourceLocs(XUL, YUL);
   if (X.getBugType() != Y.getBugType())
     return X.getBugType() < Y.getBugType();
   if (X.getCategory() != Y.getCategory())
@@ -357,20 +365,27 @@ static bool compare(const PathDiagnostic &X, const PathDiagnostic &Y) {
     return X.getVerboseDescription() < Y.getVerboseDescription();
   if (X.getShortDescription() != Y.getShortDescription())
     return X.getShortDescription() < Y.getShortDescription();
-  if (X.getDeclWithIssue() != Y.getDeclWithIssue()) {
-    const Decl *XD = X.getDeclWithIssue();
-    if (!XD)
+  auto CompareDecls = [&XL](const Decl *D1, const Decl *D2) -> Optional {
+    if (D1 == D2)
+      return None;
+    if (!D1)
       return true;
-    const Decl *YD = Y.getDeclWithIssue();
-    if (!YD)
+    if (!D2)
       return false;
-    SourceLocation XDL = XD->getLocation();
-    SourceLocation YDL = YD->getLocation();
-    if (XDL != YDL) {
+    SourceLocation D1L = D1->getLocation();
+    SourceLocation D2L = D2->getLocation();
+    if (D1L != D2L) {
       const SourceManager &SM = XL.getManager();
-      return compareCrossTUSourceLocs(FullSourceLoc(XDL, SM),
-                                      FullSourceLoc(YDL, SM));
+      return compareCrossTUSourceLocs(FullSourceLoc(D1L, SM),
+                                      FullSourceLoc(D2L, SM));
     }
+    return None;
+  };
+  if (auto Result = CompareDecls(X.getDeclWithIssue(), Y.getDeclWithIssue()))
+    return *Result;
+  if (XUL.isValid()) {
+    if (auto Result = CompareDecls(X.getUniqueingDecl(), Y.getUniqueingDecl()))
+      return *Result;
   }
   PathDiagnostic::meta_iterator XI = X.meta_begin(), XE = X.meta_end();
   PathDiagnostic::meta_iterator YI = Y.meta_begin(), YE = Y.meta_end();
@@ -1118,6 +1133,8 @@ void PathDiagnosticPopUpPiece::Profile(llvm::FoldingSetNodeID &ID) const {
 
 void PathDiagnostic::Profile(llvm::FoldingSetNodeID &ID) const {
   ID.Add(getLocation());
+  ID.Add(getUniqueingLoc());
+  ID.AddPointer(getUniqueingLoc().isValid() ? getUniqueingDecl() : nullptr);
   ID.AddString(BugType);
   ID.AddString(VerboseDesc);
   ID.AddString(Category);
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 25c02cb888c1b..6fd97d4e57869 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -376,6 +376,10 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
+
+  if (Opts.ArmSveVectorBits)
+    Builder.defineMacro("__ARM_FEATURE_SVE_BITS_EXPERIMENTAL",
+                        Twine(Opts.ArmSveVectorBits));
 }
 
 ArrayRef AArch64TargetInfo::getTargetBuiltins() const {
diff --git a/clang/lib/Basic/Targets/MSP430.cpp b/clang/lib/Basic/Targets/MSP430.cpp
index ef53ee352c329..90890500ae274 100644
--- a/clang/lib/Basic/Targets/MSP430.cpp
+++ b/clang/lib/Basic/Targets/MSP430.cpp
@@ -29,5 +29,6 @@ void MSP430TargetInfo::getTargetDefines(const LangOptions &Opts,
                                         MacroBuilder &Builder) const {
   Builder.defineMacro("MSP430");
   Builder.defineMacro("__MSP430__");
+  Builder.defineMacro("__ELF__");
   // FIXME: defines for different 'flavours' of MCU
 }
diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h
index af04b75392f59..cfa362bef1b1c 100644
--- a/clang/lib/Basic/Targets/OSTargets.h
+++ b/clang/lib/Basic/Targets/OSTargets.h
@@ -821,7 +821,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyOSTargetInfo
     : public OSTargetInfo {
 protected:
   void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
-                    MacroBuilder &Builder) const {
+                    MacroBuilder &Builder) const override {
     // A common platform macro.
     if (Opts.POSIXThreads)
       Builder.defineMacro("_REENTRANT");
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index 522776437cd21..4ba703c8dd1aa 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -13,6 +13,7 @@
 #include "RISCV.h"
 #include "clang/Basic/MacroBuilder.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/TargetParser.h"
 
 using namespace clang;
 using namespace clang::targets;
@@ -166,3 +167,23 @@ bool RISCVTargetInfo::handleTargetFeatures(std::vector &Features,
 
   return true;
 }
+
+bool RISCV32TargetInfo::isValidCPUName(StringRef Name) const {
+  return llvm::RISCV::checkCPUKind(llvm::RISCV::parseCPUKind(Name),
+                                   /*Is64Bit=*/false);
+}
+
+void RISCV32TargetInfo::fillValidCPUList(
+    SmallVectorImpl &Values) const {
+  llvm::RISCV::fillValidCPUArchList(Values, false);
+}
+
+bool RISCV64TargetInfo::isValidCPUName(StringRef Name) const {
+  return llvm::RISCV::checkCPUKind(llvm::RISCV::parseCPUKind(Name),
+                                   /*Is64Bit=*/true);
+}
+
+void RISCV64TargetInfo::fillValidCPUList(
+    SmallVectorImpl &Values) const {
+  llvm::RISCV::fillValidCPUArchList(Values, true);
+}
diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h
index 73652b409e9ce..6db526da4c59f 100644
--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@@ -24,7 +24,7 @@ namespace targets {
 // RISC-V Target
 class RISCVTargetInfo : public TargetInfo {
 protected:
-  std::string ABI;
+  std::string ABI, CPU;
   bool HasM;
   bool HasA;
   bool HasF;
@@ -44,6 +44,13 @@ class RISCVTargetInfo : public TargetInfo {
     WIntType = UnsignedInt;
   }
 
+  bool setCPU(const std::string &Name) override {
+    if (!isValidCPUName(Name))
+      return false;
+    CPU = Name;
+    return true;
+  }
+
   StringRef getABI() const override { return ABI; }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
@@ -97,6 +104,9 @@ class LLVM_LIBRARY_VISIBILITY RISCV32TargetInfo : public RISCVTargetInfo {
     return false;
   }
 
+  bool isValidCPUName(StringRef Name) const override;
+  void fillValidCPUList(SmallVectorImpl &Values) const override;
+
   void setMaxAtomicWidth() override {
     MaxAtomicPromoteWidth = 128;
 
@@ -121,6 +131,9 @@ class LLVM_LIBRARY_VISIBILITY RISCV64TargetInfo : public RISCVTargetInfo {
     return false;
   }
 
+  bool isValidCPUName(StringRef Name) const override;
+  void fillValidCPUList(SmallVectorImpl &Values) const override;
+
   void setMaxAtomicWidth() override {
     MaxAtomicPromoteWidth = 128;
 
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index e280a72166451..543f232d24591 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -1062,9 +1062,9 @@ void X86TargetInfo::getCPUSpecificCPUDispatchFeatures(
 bool X86TargetInfo::validateCpuIs(StringRef FeatureStr) const {
   return llvm::StringSwitch(FeatureStr)
 #define X86_VENDOR(ENUM, STRING) .Case(STRING, true)
-#define X86_CPU_TYPE_COMPAT_ALIAS(ENUM, ALIAS) .Case(ALIAS, true)
-#define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) .Case(STR, true)
-#define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) .Case(STR, true)
+#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true)
+#define X86_CPU_TYPE(ENUM, STR) .Case(STR, true)
+#define X86_CPU_SUBTYPE(ENUM, STR) .Case(STR, true)
 #include "llvm/Support/X86TargetParser.def"
       .Default(false);
 }
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e54e3e405b25a..5bf2cc0beea85 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6367,6 +6367,12 @@ Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
                             llvm::ConstantInt::get(Int32Ty, Value));
 }
 
+enum SpecialRegisterAccessKind {
+  NormalRead,
+  VolatileRead,
+  Write,
+};
+
 // Generates the IR for the read/write special register builtin,
 // ValueType is the type of the value that is to be written or read,
 // RegisterType is the type of the register being written to or read from.
@@ -6374,7 +6380,7 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
                                          const CallExpr *E,
                                          llvm::Type *RegisterType,
                                          llvm::Type *ValueType,
-                                         bool IsRead,
+                                         SpecialRegisterAccessKind AccessKind,
                                          StringRef SysReg = "") {
   // write and register intrinsics only support 32 and 64 bit operations.
   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
@@ -6399,8 +6405,12 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
             && "Can't fit 64-bit value in 32-bit register");
 
-  if (IsRead) {
-    llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
+  if (AccessKind != Write) {
+    assert(AccessKind == NormalRead || AccessKind == VolatileRead);
+    llvm::Function *F = CGM.getIntrinsic(
+        AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
+                                   : llvm::Intrinsic::read_register,
+        Types);
     llvm::Value *Call = Builder.CreateCall(F, Metadata);
 
     if (MixedTypes)
@@ -6779,9 +6789,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
       BuiltinID == ARM::BI__builtin_arm_wsrp) {
 
-    bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
-                  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
-                  BuiltinID == ARM::BI__builtin_arm_rsrp;
+    SpecialRegisterAccessKind AccessKind = Write;
+    if (BuiltinID == ARM::BI__builtin_arm_rsr ||
+        BuiltinID == ARM::BI__builtin_arm_rsr64 ||
+        BuiltinID == ARM::BI__builtin_arm_rsrp)
+      AccessKind = VolatileRead;
 
     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
                             BuiltinID == ARM::BI__builtin_arm_wsrp;
@@ -6800,7 +6812,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
       ValueType = RegisterType = Int32Ty;
     }
 
-    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
+    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
+                                      AccessKind);
   }
 
   // Deal with MVE builtins
@@ -8840,9 +8853,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
 
-    bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
-                  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
-                  BuiltinID == AArch64::BI__builtin_arm_rsrp;
+    SpecialRegisterAccessKind AccessKind = Write;
+    if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
+        BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
+        BuiltinID == AArch64::BI__builtin_arm_rsrp)
+      AccessKind = VolatileRead;
 
     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
@@ -8860,7 +8875,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
       ValueType = Int32Ty;
     }
 
-    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
+    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
+                                      AccessKind);
   }
 
   if (BuiltinID == AArch64::BI_ReadStatusReg ||
@@ -11661,11 +11677,11 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
   std::tie(Index, Value) = StringSwitch>(CPUStr)
 #define X86_VENDOR(ENUM, STRING)                                               \
   .Case(STRING, {0u, static_cast(llvm::X86::ENUM)})
-#define X86_CPU_TYPE_COMPAT_ALIAS(ENUM, ALIAS)             \
+#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)                                        \
   .Case(ALIAS, {1u, static_cast(llvm::X86::ENUM)})
-#define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR)                               \
+#define X86_CPU_TYPE(ENUM, STR)                                                \
   .Case(STR, {1u, static_cast(llvm::X86::ENUM)})
-#define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR)                            \
+#define X86_CPU_SUBTYPE(ENUM, STR)                                             \
   .Case(STR, {2u, static_cast(llvm::X86::ENUM)})
 #include "llvm/Support/X86TargetParser.def"
                                .Default({0, 0});
@@ -14803,7 +14819,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
   }
   case AMDGPU::BI__builtin_amdgcn_read_exec: {
     CallInst *CI = cast(
-      EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
+      EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, NormalRead, "exec"));
     CI->setConvergent();
     return CI;
   }
@@ -14812,7 +14828,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
       "exec_lo" : "exec_hi";
     CallInst *CI = cast(
-      EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName));
+      EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, NormalRead, RegName));
     CI->setConvergent();
     return CI;
   }
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 2571dad3efe53..f1355c1ec21f9 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -4875,6 +4875,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   } else {
     // Otherwise, nounwind call sites will never throw.
     CannotThrow = Attrs.hasFnAttribute(llvm::Attribute::NoUnwind);
+
+    if (auto *FPtr = dyn_cast(CalleePtr))
+      if (FPtr->hasFnAttribute(llvm::Attribute::NoUnwind))
+        CannotThrow = true;
   }
 
   // If we made a temporary, be sure to clean up after ourselves. Note that we
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index 9240027c5e1cb..c6e6c94df2e30 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -250,7 +250,8 @@ llvm::Function *CodeGenFunction::createAtExitStub(const VarDecl &VD,
   CodeGenFunction CGF(CGM);
 
   CGF.StartFunction(GlobalDecl(&VD, DynamicInitKind::AtExit),
-                    CGM.getContext().VoidTy, fn, FI, FunctionArgList());
+                    CGM.getContext().VoidTy, fn, FI, FunctionArgList(),
+                    VD.getLocation(), VD.getInit()->getExprLoc());
 
   llvm::CallInst *call = CGF.Builder.CreateCall(dtor, addr);
 
diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp
index 213a6db02bc99..dc2b2fc14e8db 100644
--- a/clang/lib/CodeGen/CGException.cpp
+++ b/clang/lib/CodeGen/CGException.cpp
@@ -1815,6 +1815,48 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF,
     llvm::Constant *ParentI8Fn =
         llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
     ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentI8Fn, EntryFP});
+
+    // if the parent is a _finally, the passed-in ParentFP is the FP
+    // of parent _finally, not Establisher's FP (FP of outermost function).
+    // Establkisher FP is 2nd paramenter passed into parent _finally.
+    // Fortunately, it's always saved in parent's frame. The following
+    // code retrieves it, and escapes it so that spill instruction won't be
+    // optimized away.
+    if (ParentCGF.ParentCGF != nullptr) {
+      // Locate and escape Parent's frame_pointer.addr alloca
+      // Depending on target, should be 1st/2nd one in LocalDeclMap.
+      // Let's just scan for ImplicitParamDecl with VoidPtrTy.
+      llvm::AllocaInst *FramePtrAddrAlloca = nullptr;
+      for (auto &I : ParentCGF.LocalDeclMap) {
+        const VarDecl *D = cast(I.first);
+        if (isa(D) &&
+            D->getType() == getContext().VoidPtrTy) {
+          assert(D->getName().startswith("frame_pointer"));
+          FramePtrAddrAlloca = cast(I.second.getPointer());
+          break;
+        }
+      }
+      assert(FramePtrAddrAlloca);
+      auto InsertPair = ParentCGF.EscapedLocals.insert(
+          std::make_pair(FramePtrAddrAlloca, ParentCGF.EscapedLocals.size()));
+      int FrameEscapeIdx = InsertPair.first->second;
+
+      // an example of a filter's prolog::
+      // %0 = call i8* @llvm.eh.recoverfp(bitcast(@"?fin$0@0@main@@"),..)
+      // %1 = call i8* @llvm.localrecover(bitcast(@"?fin$0@0@main@@"),..)
+      // %2 = bitcast i8* %1 to i8**
+      // %3 = load i8*, i8* *%2, align 8
+      //   ==> %3 is the frame-pointer of outermost host function
+      llvm::Function *FrameRecoverFn = llvm::Intrinsic::getDeclaration(
+          &CGM.getModule(), llvm::Intrinsic::localrecover);
+      llvm::Constant *ParentI8Fn =
+          llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
+      ParentFP = Builder.CreateCall(
+          FrameRecoverFn, {ParentI8Fn, ParentFP,
+                           llvm::ConstantInt::get(Int32Ty, FrameEscapeIdx)});
+      ParentFP = Builder.CreateBitCast(ParentFP, CGM.VoidPtrPtrTy);
+      ParentFP = Builder.CreateLoad(Address(ParentFP, getPointerAlign()));
+    }
   }
 
   // Create llvm.localrecover calls for all captures.
@@ -2013,6 +2055,7 @@ void CodeGenFunction::pushSEHCleanup(CleanupKind Kind,
 
 void CodeGenFunction::EnterSEHTryStmt(const SEHTryStmt &S) {
   CodeGenFunction HelperCGF(CGM, /*suppressNewContext=*/true);
+  HelperCGF.ParentCGF = this;
   if (const SEHFinallyStmt *Finally = S.getFinallyHandler()) {
     // Outline the finally block.
     llvm::Function *FinallyFunc =
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index 158a548e66c1e..cd2b84f5dd203 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -1836,6 +1836,40 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
   llvm::Value *CurrentItem =
     Builder.CreateAlignedLoad(CurrentItemPtr, getPointerAlign());
 
+  if (SanOpts.has(SanitizerKind::ObjCCast)) {
+    // Before using an item from the collection, check that the implicit cast
+    // from id to the element type is valid. This is done with instrumentation
+    // roughly corresponding to:
+    //
+    //   if (![item isKindOfClass:expectedCls]) { /* emit diagnostic */ }
+    const ObjCObjectPointerType *ObjPtrTy =
+        elementType->getAsObjCInterfacePointerType();
+    const ObjCInterfaceType *InterfaceTy =
+        ObjPtrTy ? ObjPtrTy->getInterfaceType() : nullptr;
+    if (InterfaceTy) {
+      SanitizerScope SanScope(this);
+      auto &C = CGM.getContext();
+      assert(InterfaceTy->getDecl() && "No decl for ObjC interface type");
+      Selector IsKindOfClassSel = GetUnarySelector("isKindOfClass", C);
+      CallArgList IsKindOfClassArgs;
+      llvm::Value *Cls =
+          CGM.getObjCRuntime().GetClass(*this, InterfaceTy->getDecl());
+      IsKindOfClassArgs.add(RValue::get(Cls), C.getObjCClassType());
+      llvm::Value *IsClass =
+          CGM.getObjCRuntime()
+              .GenerateMessageSend(*this, ReturnValueSlot(), C.BoolTy,
+                                   IsKindOfClassSel, CurrentItem,
+                                   IsKindOfClassArgs)
+              .getScalarVal();
+      llvm::Constant *StaticData[] = {
+          EmitCheckSourceLocation(S.getBeginLoc()),
+          EmitCheckTypeDescriptor(QualType(InterfaceTy, 0))};
+      EmitCheck({{IsClass, SanitizerKind::ObjCCast}},
+                SanitizerHandler::InvalidObjCCast,
+                ArrayRef(StaticData), CurrentItem);
+    }
+  }
+
   // Cast that value to the right type.
   CurrentItem = Builder.CreateBitCast(CurrentItem, convertedElementType,
                                       "currentitem");
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 43cbe9c720ea3..f6d36bd84385f 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -886,8 +886,11 @@ void ReductionCodeGen::emitInitialization(
       SharedType, SharedAddresses[N].first.getBaseInfo(),
       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
+    if (DRD && DRD->getInitializer())
+      (void)DefaultInit(CGF);
     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
+    (void)DefaultInit(CGF);
     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
                                      PrivateAddr, SharedLVal.getAddress(CGF),
                                      SharedLVal.getType());
@@ -7075,6 +7078,28 @@ class MappableExprsHandler {
   using MapBaseValuesArrayTy = SmallVector;
   using MapValuesArrayTy = SmallVector;
   using MapFlagsArrayTy = SmallVector;
+  using MapMappersArrayTy = SmallVector;
+
+  /// This structure contains combined information generated for mappable
+  /// clauses, including base pointers, pointers, sizes, map types, and
+  /// user-defined mappers.
+  struct MapCombinedInfoTy {
+    MapBaseValuesArrayTy BasePointers;
+    MapValuesArrayTy Pointers;
+    MapValuesArrayTy Sizes;
+    MapFlagsArrayTy Types;
+    MapMappersArrayTy Mappers;
+
+    /// Append arrays in \a CurInfo.
+    void append(MapCombinedInfoTy &CurInfo) {
+      BasePointers.append(CurInfo.BasePointers.begin(),
+                          CurInfo.BasePointers.end());
+      Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
+      Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
+      Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
+      Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
+    }
+  };
 
   /// Map between a struct and the its lowest & highest elements which have been
   /// mapped.
@@ -7096,6 +7121,7 @@ class MappableExprsHandler {
     ArrayRef MapModifiers;
     bool ReturnDevicePointer = false;
     bool IsImplicit = false;
+    const ValueDecl *Mapper = nullptr;
     bool ForDeviceAddr = false;
 
     MapInfo() = default;
@@ -7103,10 +7129,11 @@ class MappableExprsHandler {
         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
         OpenMPMapClauseKind MapType,
         ArrayRef MapModifiers, bool ReturnDevicePointer,
-        bool IsImplicit, bool ForDeviceAddr = false)
+        bool IsImplicit, const ValueDecl *Mapper = nullptr,
+        bool ForDeviceAddr = false)
         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
-          ForDeviceAddr(ForDeviceAddr) {}
+          Mapper(Mapper), ForDeviceAddr(ForDeviceAddr) {}
   };
 
   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
@@ -7302,17 +7329,17 @@ class MappableExprsHandler {
     return ConstLength.getSExtValue() != 1;
   }
 
-  /// Generate the base pointers, section pointers, sizes and map type
-  /// bits for the provided map type, map modifier, and expression components.
-  /// \a IsFirstComponent should be set to true if the provided set of
-  /// components is the first associated with a capture.
+  /// Generate the base pointers, section pointers, sizes, map type bits, and
+  /// user-defined mappers (all included in \a CombinedInfo) for the provided
+  /// map type, map modifier, and expression components. \a IsFirstComponent
+  /// should be set to true if the provided set of components is the first
+  /// associated with a capture.
   void generateInfoForComponentList(
       OpenMPMapClauseKind MapType, ArrayRef MapModifiers,
       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
-      MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
-      MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
-      StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
-      bool IsImplicit, bool ForDeviceAddr = false,
+      MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
+      bool IsFirstComponentList, bool IsImplicit,
+      const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
       ArrayRef
           OverlappedElements = llvm::None) const {
     // The following summarizes what has to be generated for each map and the
@@ -7672,31 +7699,37 @@ class MappableExprsHandler {
                 break;
               }
             }
-            BasePointers.push_back(BP.getPointer());
-            Pointers.push_back(LB.getPointer());
-            Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
-                                                      /*isSigned=*/true));
-            Types.push_back(Flags);
+            CombinedInfo.BasePointers.push_back(BP.getPointer());
+            CombinedInfo.Pointers.push_back(LB.getPointer());
+            CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
+                Size, CGF.Int64Ty, /*isSigned=*/true));
+            CombinedInfo.Types.push_back(Flags);
+            CombinedInfo.Mappers.push_back(nullptr);
             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
           }
-          BasePointers.push_back(BP.getPointer());
-          Pointers.push_back(LB.getPointer());
+          CombinedInfo.BasePointers.push_back(BP.getPointer());
+          CombinedInfo.Pointers.push_back(LB.getPointer());
           Size = CGF.Builder.CreatePtrDiff(
               CGF.EmitCastToVoidPtr(
                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
               CGF.EmitCastToVoidPtr(LB.getPointer()));
-          Sizes.push_back(
+          CombinedInfo.Sizes.push_back(
               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
-          Types.push_back(Flags);
+          CombinedInfo.Types.push_back(Flags);
+          CombinedInfo.Mappers.push_back(nullptr);
           break;
         }
         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
         if (!IsMemberPointerOrAddr) {
-          BasePointers.push_back(BP.getPointer());
-          Pointers.push_back(LB.getPointer());
-          Sizes.push_back(
+          CombinedInfo.BasePointers.push_back(BP.getPointer());
+          CombinedInfo.Pointers.push_back(LB.getPointer());
+          CombinedInfo.Sizes.push_back(
               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
 
+          // If Mapper is valid, the last component inherits the mapper.
+          bool HasMapper = Mapper && Next == CE;
+          CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
+
           // We need to add a pointer flag for each map that comes from the
           // same expression except for the first one. We also need to signal
           // this map is the first one that relates with the current capture
@@ -7723,7 +7756,7 @@ class MappableExprsHandler {
             }
           }
 
-          Types.push_back(Flags);
+          CombinedInfo.Types.push_back(Flags);
         }
 
         // If we have encountered a member expression so far, keep track of the
@@ -7896,7 +7929,7 @@ class MappableExprsHandler {
     // Extract device pointer clause information.
     for (const auto *C : Dir.getClausesOfKind())
       for (auto L : C->component_lists())
-        DevPointersMap[L.first].push_back(L.second);
+        DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
   }
 
   /// Constructor for the declare mapper directive.
@@ -7906,15 +7939,16 @@ class MappableExprsHandler {
   /// Generate code for the combined entry if we have a partially mapped struct
   /// and take care of the mapping flags of the arguments corresponding to
   /// individual struct members.
-  void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
-                         MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
-                         MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
+  void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
+                         MapFlagsArrayTy &CurTypes,
                          const StructRangeInfoTy &PartialStruct) const {
     // Base is the base of the struct
-    BasePointers.push_back(PartialStruct.Base.getPointer());
+    CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
     // Pointer is the address of the lowest element
     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
-    Pointers.push_back(LB);
+    CombinedInfo.Pointers.push_back(LB);
+    // There should not be a mapper for a combined entry.
+    CombinedInfo.Mappers.push_back(nullptr);
     // Size is (addr of {highest+1} element) - (addr of lowest element)
     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
@@ -7923,9 +7957,9 @@ class MappableExprsHandler {
     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
                                                   /*isSigned=*/false);
-    Sizes.push_back(Size);
+    CombinedInfo.Sizes.push_back(Size);
     // Map type is always TARGET_PARAM
-    Types.push_back(OMP_MAP_TARGET_PARAM);
+    CombinedInfo.Types.push_back(OMP_MAP_TARGET_PARAM);
     // Remove TARGET_PARAM flag from the first element
     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
 
@@ -7933,18 +7967,20 @@ class MappableExprsHandler {
     // (except for PTR_AND_OBJ entries which do not have a placeholder value
     // 0xFFFF in the MEMBER_OF field).
     OpenMPOffloadMappingFlags MemberOfFlag =
-        getMemberOfFlag(BasePointers.size() - 1);
+        getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
     for (auto &M : CurTypes)
       setCorrectMemberOfFlag(M, MemberOfFlag);
   }
 
-  /// Generate all the base pointers, section pointers, sizes and map
-  /// types for the extracted mappable expressions. Also, for each item that
-  /// relates with a device pointer, a pair of the relevant declaration and
-  /// index where it occurs is appended to the device pointers info array.
-  void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
-                       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
-                       MapFlagsArrayTy &Types) const {
+  /// Generate all the base pointers, section pointers, sizes, map types, and
+  /// mappers for the extracted mappable expressions (all included in \a
+  /// CombinedInfo). Also, for each item that relates with a device pointer, a
+  /// pair of the relevant declaration and index where it occurs is appended to
+  /// the device pointers info array.
+  void generateAllInfo(
+      MapCombinedInfoTy &CombinedInfo,
+      const llvm::DenseSet> &SkipVarSet =
+          llvm::DenseSet>()) const {
     // We have to process the component lists that relate with the same
     // declaration in a single chunk so that we can generate the map flags
     // correctly. Therefore, we organize all lists in a map.
@@ -7953,16 +7989,19 @@ class MappableExprsHandler {
     // Helper function to fill the information map for the different supported
     // clauses.
     auto &&InfoGen =
-        [&Info](const ValueDecl *D,
-                OMPClauseMappableExprCommon::MappableExprComponentListRef L,
-                OpenMPMapClauseKind MapType,
-                ArrayRef MapModifiers,
-                bool ReturnDevicePointer, bool IsImplicit,
-                bool ForDeviceAddr = false) {
+        [&Info, &SkipVarSet](
+            const ValueDecl *D,
+            OMPClauseMappableExprCommon::MappableExprComponentListRef L,
+            OpenMPMapClauseKind MapType,
+            ArrayRef MapModifiers,
+            bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
+            bool ForDeviceAddr = false) {
           const ValueDecl *VD =
               D ? cast(D->getCanonicalDecl()) : nullptr;
+          if (SkipVarSet.count(VD))
+            return;
           Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
-                                IsImplicit, ForDeviceAddr);
+                                IsImplicit, Mapper, ForDeviceAddr);
         };
 
     assert(CurDir.is() &&
@@ -7970,18 +8009,19 @@ class MappableExprsHandler {
     const auto *CurExecDir = CurDir.get();
     for (const auto *C : CurExecDir->getClausesOfKind())
       for (const auto L : C->component_lists()) {
-        InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
-            /*ReturnDevicePointer=*/false, C->isImplicit());
+        InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(),
+                C->getMapTypeModifiers(), /*ReturnDevicePointer=*/false,
+                C->isImplicit(), std::get<2>(L));
       }
     for (const auto *C : CurExecDir->getClausesOfKind())
       for (const auto L : C->component_lists()) {
-        InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
-            /*ReturnDevicePointer=*/false, C->isImplicit());
+        InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None,
+                /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L));
       }
     for (const auto *C : CurExecDir->getClausesOfKind())
       for (const auto L : C->component_lists()) {
-        InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
-            /*ReturnDevicePointer=*/false, C->isImplicit());
+        InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None,
+                /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L));
       }
 
     // Look at the use_device_ptr clause information and mark the existing map
@@ -7996,10 +8036,13 @@ class MappableExprsHandler {
     for (const auto *C :
          CurExecDir->getClausesOfKind()) {
       for (const auto L : C->component_lists()) {
-        assert(!L.second.empty() && "Not expecting empty list of components!");
-        const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
+        OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
+            std::get<1>(L);
+        assert(!Components.empty() &&
+               "Not expecting empty list of components!");
+        const ValueDecl *VD = Components.back().getAssociatedDeclaration();
         VD = cast(VD->getCanonicalDecl());
-        const Expr *IE = L.second.back().getAssociatedExpression();
+        const Expr *IE = Components.back().getAssociatedExpression();
         // If the first component is a member expression, we have to look into
         // 'this', which maps to null in the map of map information. Otherwise
         // look directly for the information.
@@ -8031,16 +8074,19 @@ class MappableExprsHandler {
           // Nonetheless, generateInfoForComponentList must be called to take
           // the pointer into account for the calculation of the range of the
           // partial struct.
-          InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
-                  /*ReturnDevicePointer=*/false, C->isImplicit());
+          InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None,
+                  /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr);
           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
         } else {
           llvm::Value *Ptr =
               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
-          BasePointers.emplace_back(Ptr, VD);
-          Pointers.push_back(Ptr);
-          Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
-          Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
+          CombinedInfo.BasePointers.emplace_back(Ptr, VD);
+          CombinedInfo.Pointers.push_back(Ptr);
+          CombinedInfo.Sizes.push_back(
+              llvm::Constant::getNullValue(CGF.Int64Ty));
+          CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM |
+                                       OMP_MAP_TARGET_PARAM);
+          CombinedInfo.Mappers.push_back(nullptr);
         }
       }
     }
@@ -8055,12 +8101,13 @@ class MappableExprsHandler {
     for (const auto *C :
          CurExecDir->getClausesOfKind()) {
       for (const auto L : C->component_lists()) {
-        assert(!L.second.empty() && "Not expecting empty list of components!");
-        const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
+        assert(!std::get<1>(L).empty() &&
+               "Not expecting empty list of components!");
+        const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
         if (!Processed.insert(VD).second)
           continue;
         VD = cast(VD->getCanonicalDecl());
-        const Expr *IE = L.second.back().getAssociatedExpression();
+        const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
         // If the first component is a member expression, we have to look into
         // 'this', which maps to null in the map of map information. Otherwise
         // look directly for the information.
@@ -8091,8 +8138,8 @@ class MappableExprsHandler {
           // Nonetheless, generateInfoForComponentList must be called to take
           // the pointer into account for the calculation of the range of the
           // partial struct.
-          InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
-                  /*ReturnDevicePointer=*/false, C->isImplicit(),
+          InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
+                  /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr,
                   /*ForDeviceAddr=*/true);
           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
         } else {
@@ -8101,10 +8148,11 @@ class MappableExprsHandler {
             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
           else
             Ptr = CGF.EmitScalarExpr(IE);
-          BasePointers.emplace_back(Ptr, VD);
-          Pointers.push_back(Ptr);
-          Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
-          Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
+          CombinedInfo.BasePointers.emplace_back(Ptr, VD);
+          CombinedInfo.Pointers.push_back(Ptr);
+          CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
+          CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
+          CombinedInfo.Mappers.push_back(nullptr);
         }
       }
     }
@@ -8114,11 +8162,8 @@ class MappableExprsHandler {
       // associated with a capture, because the mapping flags depend on it.
       bool IsFirstComponentList = true;
 
-      // Temporary versions of arrays
-      MapBaseValuesArrayTy CurBasePointers;
-      MapValuesArrayTy CurPointers;
-      MapValuesArrayTy CurSizes;
-      MapFlagsArrayTy CurTypes;
+      // Temporary generated information.
+      MapCombinedInfoTy CurInfo;
       StructRangeInfoTy PartialStruct;
 
       for (const MapInfo &L : M.second) {
@@ -8126,16 +8171,15 @@ class MappableExprsHandler {
                "Not expecting declaration with no component lists.");
 
         // Remember the current base pointer index.
-        unsigned CurrentBasePointersIdx = CurBasePointers.size();
+        unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
         generateInfoForComponentList(
-            L.MapType, L.MapModifiers, L.Components, CurBasePointers,
-            CurPointers, CurSizes, CurTypes, PartialStruct,
-            IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
+            L.MapType, L.MapModifiers, L.Components, CurInfo, PartialStruct,
+            IsFirstComponentList, L.IsImplicit, L.Mapper, L.ForDeviceAddr);
 
         // If this entry relates with a device pointer, set the relevant
         // declaration and add the 'return pointer' flag.
         if (L.ReturnDevicePointer) {
-          assert(CurBasePointers.size() > CurrentBasePointersIdx &&
+          assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
                  "Unexpected number of mapped base pointers.");
 
           const ValueDecl *RelevantVD =
@@ -8143,8 +8187,9 @@ class MappableExprsHandler {
           assert(RelevantVD &&
                  "No relevant declaration related with device pointer??");
 
-          CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
-          CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
+          CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
+              RelevantVD);
+          CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
         }
         IsFirstComponentList = false;
       }
@@ -8165,7 +8210,7 @@ class MappableExprsHandler {
             // Entry is RETURN_PARAM. Also, set the placeholder value
             // MEMBER_OF=FFFF so that the entry is later updated with the
             // correct value of MEMBER_OF.
-            CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
+            CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
           } else {
             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
@@ -8173,35 +8218,31 @@ class MappableExprsHandler {
             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
             // value MEMBER_OF=FFFF so that the entry is later updated with the
             // correct value of MEMBER_OF.
-            CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
-                               OMP_MAP_MEMBER_OF);
+            CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
+                                    OMP_MAP_MEMBER_OF);
           }
-          CurBasePointers.emplace_back(BasePtr, L.VD);
-          CurPointers.push_back(Ptr);
-          CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
+          CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
+          CurInfo.Pointers.push_back(Ptr);
+          CurInfo.Sizes.push_back(
+              llvm::Constant::getNullValue(this->CGF.Int64Ty));
+          CurInfo.Mappers.push_back(nullptr);
         }
       }
 
       // If there is an entry in PartialStruct it means we have a struct with
       // individual members mapped. Emit an extra combined entry.
       if (PartialStruct.Base.isValid())
-        emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
-                          PartialStruct);
+        emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
 
       // We need to append the results of this capture to what we already have.
-      BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
-      Pointers.append(CurPointers.begin(), CurPointers.end());
-      Sizes.append(CurSizes.begin(), CurSizes.end());
-      Types.append(CurTypes.begin(), CurTypes.end());
+      CombinedInfo.append(CurInfo);
     }
   }
 
-  /// Generate all the base pointers, section pointers, sizes and map types for
-  /// the extracted map clauses of user-defined mapper.
-  void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
-                                MapValuesArrayTy &Pointers,
-                                MapValuesArrayTy &Sizes,
-                                MapFlagsArrayTy &Types) const {
+  /// Generate all the base pointers, section pointers, sizes, map types, and
+  /// mappers for the extracted map clauses of user-defined mapper (all included
+  /// in \a CombinedInfo).
+  void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
     assert(CurDir.is() &&
            "Expect a declare mapper directive");
     const auto *CurMapperDir = CurDir.get();
@@ -8210,25 +8251,17 @@ class MappableExprsHandler {
     // correctly. Therefore, we organize all lists in a map.
     llvm::MapVector> Info;
 
-    // Helper function to fill the information map for the different supported
-    // clauses.
-    auto &&InfoGen = [&Info](
-        const ValueDecl *D,
-        OMPClauseMappableExprCommon::MappableExprComponentListRef L,
-        OpenMPMapClauseKind MapType,
-        ArrayRef MapModifiers,
-        bool ReturnDevicePointer, bool IsImplicit) {
-      const ValueDecl *VD =
-          D ? cast(D->getCanonicalDecl()) : nullptr;
-      Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
-                            IsImplicit);
-    };
-
+    // Fill the information map for map clauses.
     for (const auto *C : CurMapperDir->clauselists()) {
       const auto *MC = cast(C);
       for (const auto L : MC->component_lists()) {
-        InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
-                /*ReturnDevicePointer=*/false, MC->isImplicit());
+        const ValueDecl *VD =
+            std::get<0>(L) ? cast(std::get<0>(L)->getCanonicalDecl())
+                           : nullptr;
+        // Get the corresponding user-defined mapper.
+        Info[VD].emplace_back(
+            std::get<1>(L), MC->getMapType(), MC->getMapTypeModifiers(),
+            /*ReturnDevicePointer=*/false, MC->isImplicit(), std::get<2>(L));
       }
     }
 
@@ -8237,42 +8270,32 @@ class MappableExprsHandler {
       // associated with a capture, because the mapping flags depend on it.
       bool IsFirstComponentList = true;
 
-      // Temporary versions of arrays
-      MapBaseValuesArrayTy CurBasePointers;
-      MapValuesArrayTy CurPointers;
-      MapValuesArrayTy CurSizes;
-      MapFlagsArrayTy CurTypes;
+      // Temporary generated information.
+      MapCombinedInfoTy CurInfo;
       StructRangeInfoTy PartialStruct;
 
       for (const MapInfo &L : M.second) {
         assert(!L.Components.empty() &&
                "Not expecting declaration with no component lists.");
         generateInfoForComponentList(
-            L.MapType, L.MapModifiers, L.Components, CurBasePointers,
-            CurPointers, CurSizes, CurTypes, PartialStruct,
-            IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
+            L.MapType, L.MapModifiers, L.Components, CurInfo, PartialStruct,
+            IsFirstComponentList, L.IsImplicit, L.Mapper, L.ForDeviceAddr);
         IsFirstComponentList = false;
       }
 
       // If there is an entry in PartialStruct it means we have a struct with
       // individual members mapped. Emit an extra combined entry.
       if (PartialStruct.Base.isValid())
-        emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
-                          PartialStruct);
+        emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
 
       // We need to append the results of this capture to what we already have.
-      BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
-      Pointers.append(CurPointers.begin(), CurPointers.end());
-      Sizes.append(CurSizes.begin(), CurSizes.end());
-      Types.append(CurTypes.begin(), CurTypes.end());
+      CombinedInfo.append(CurInfo);
     }
   }
 
   /// Emit capture info for lambdas for variables captured by reference.
   void generateInfoForLambdaCaptures(
-      const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
-      MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
-      MapFlagsArrayTy &Types,
+      const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
       llvm::DenseMap &LambdaPointers) const {
     const auto *RD = VD->getType()
                          .getCanonicalType()
@@ -8292,13 +8315,14 @@ class MappableExprsHandler {
       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
                                  VDLVal.getPointer(CGF));
-      BasePointers.push_back(ThisLVal.getPointer(CGF));
-      Pointers.push_back(ThisLValVal.getPointer(CGF));
-      Sizes.push_back(
+      CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
+      CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
+      CombinedInfo.Sizes.push_back(
           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
                                     CGF.Int64Ty, /*isSigned=*/true));
-      Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
-                      OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+      CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
+                                   OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+      CombinedInfo.Mappers.push_back(nullptr);
     }
     for (const LambdaCapture &LC : RD->captures()) {
       if (!LC.capturesVariable())
@@ -8313,9 +8337,9 @@ class MappableExprsHandler {
         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
                                    VDLVal.getPointer(CGF));
-        BasePointers.push_back(VarLVal.getPointer(CGF));
-        Pointers.push_back(VarLValVal.getPointer(CGF));
-        Sizes.push_back(CGF.Builder.CreateIntCast(
+        CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
+        CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
+        CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
             CGF.getTypeSize(
                 VD->getType().getCanonicalType().getNonReferenceType()),
             CGF.Int64Ty, /*isSigned=*/true));
@@ -8323,12 +8347,13 @@ class MappableExprsHandler {
         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
                                    VDLVal.getPointer(CGF));
-        BasePointers.push_back(VarLVal.getPointer(CGF));
-        Pointers.push_back(VarRVal.getScalarVal());
-        Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
+        CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
+        CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
+        CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
       }
-      Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
-                      OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+      CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
+                                   OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+      CombinedInfo.Mappers.push_back(nullptr);
     }
   }
 
@@ -8361,13 +8386,10 @@ class MappableExprsHandler {
     }
   }
 
-  /// Generate the base pointers, section pointers, sizes and map types
-  /// associated to a given capture.
+  /// Generate the base pointers, section pointers, sizes, map types, and
+  /// mappers associated to a given capture (all included in \a CombinedInfo).
   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
-                              llvm::Value *Arg,
-                              MapBaseValuesArrayTy &BasePointers,
-                              MapValuesArrayTy &Pointers,
-                              MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
+                              llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
                               StructRangeInfoTy &PartialStruct) const {
     assert(!Cap->capturesVariableArrayType() &&
            "Not expecting to generate map info for a variable array type!");
@@ -8381,31 +8403,35 @@ class MappableExprsHandler {
     // pass the pointer by value. If it is a reference to a declaration, we just
     // pass its value.
     if (DevPointersMap.count(VD)) {
-      BasePointers.emplace_back(Arg, VD);
-      Pointers.push_back(Arg);
-      Sizes.push_back(
+      CombinedInfo.BasePointers.emplace_back(Arg, VD);
+      CombinedInfo.Pointers.push_back(Arg);
+      CombinedInfo.Sizes.push_back(
           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
                                     CGF.Int64Ty, /*isSigned=*/true));
-      Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
+      CombinedInfo.Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
+      CombinedInfo.Mappers.push_back(nullptr);
       return;
     }
 
     using MapData =
         std::tuple, bool>;
+                   OpenMPMapClauseKind, ArrayRef, bool,
+                   const ValueDecl *>;
     SmallVector DeclComponentLists;
     assert(CurDir.is() &&
            "Expect a executable directive");
     const auto *CurExecDir = CurDir.get();
     for (const auto *C : CurExecDir->getClausesOfKind()) {
       for (const auto L : C->decl_component_lists(VD)) {
-        assert(L.first == VD &&
-               "We got information for the wrong declaration??");
-        assert(!L.second.empty() &&
+        const ValueDecl *VDecl, *Mapper;
+        OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
+        std::tie(VDecl, Components, Mapper) = L;
+        assert(VDecl == VD && "We got information for the wrong declaration??");
+        assert(!Components.empty() &&
                "Not expecting declaration with no component lists.");
-        DeclComponentLists.emplace_back(L.second, C->getMapType(),
+        DeclComponentLists.emplace_back(Components, C->getMapType(),
                                         C->getMapTypeModifiers(),
-                                        C->isImplicit());
+                                        C->isImplicit(), Mapper);
       }
     }
 
@@ -8422,11 +8448,12 @@ class MappableExprsHandler {
       OpenMPMapClauseKind MapType;
       ArrayRef MapModifiers;
       bool IsImplicit;
-      std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
+      const ValueDecl *Mapper;
+      std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
       ++Count;
       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
-        std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
+        std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper) = L1;
         auto CI = Components.rbegin();
         auto CE = Components.rend();
         auto SI = Components1.rbegin();
@@ -8512,14 +8539,15 @@ class MappableExprsHandler {
       OpenMPMapClauseKind MapType;
       ArrayRef MapModifiers;
       bool IsImplicit;
-      std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
+      const ValueDecl *Mapper;
+      std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
       ArrayRef
           OverlappedComponents = Pair.getSecond();
       bool IsFirstComponentList = true;
       generateInfoForComponentList(
-          MapType, MapModifiers, Components, BasePointers, Pointers, Sizes,
-          Types, PartialStruct, IsFirstComponentList, IsImplicit,
-          /*ForDeviceAddr=*/false, OverlappedComponents);
+          MapType, MapModifiers, Components, CombinedInfo, PartialStruct,
+          IsFirstComponentList, IsImplicit, Mapper, /*ForDeviceAddr=*/false,
+          OverlappedComponents);
     }
     // Go through other elements without overlapped elements.
     bool IsFirstComponentList = OverlappedData.empty();
@@ -8528,84 +8556,47 @@ class MappableExprsHandler {
       OpenMPMapClauseKind MapType;
       ArrayRef MapModifiers;
       bool IsImplicit;
-      std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
+      const ValueDecl *Mapper;
+      std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
       auto It = OverlappedData.find(&L);
       if (It == OverlappedData.end())
         generateInfoForComponentList(MapType, MapModifiers, Components,
-                                     BasePointers, Pointers, Sizes, Types,
-                                     PartialStruct, IsFirstComponentList,
-                                     IsImplicit);
+                                     CombinedInfo, PartialStruct,
+                                     IsFirstComponentList, IsImplicit, Mapper);
       IsFirstComponentList = false;
     }
   }
 
-  /// Generate the base pointers, section pointers, sizes and map types
-  /// associated with the declare target link variables.
-  void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
-                                        MapValuesArrayTy &Pointers,
-                                        MapValuesArrayTy &Sizes,
-                                        MapFlagsArrayTy &Types) const {
-    assert(CurDir.is() &&
-           "Expect a executable directive");
-    const auto *CurExecDir = CurDir.get();
-    // Map other list items in the map clause which are not captured variables
-    // but "declare target link" global variables.
-    for (const auto *C : CurExecDir->getClausesOfKind()) {
-      for (const auto L : C->component_lists()) {
-        if (!L.first)
-          continue;
-        const auto *VD = dyn_cast(L.first);
-        if (!VD)
-          continue;
-        llvm::Optional Res =
-            OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
-        if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
-            !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
-          continue;
-        StructRangeInfoTy PartialStruct;
-        generateInfoForComponentList(
-            C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
-            Pointers, Sizes, Types, PartialStruct,
-            /*IsFirstComponentList=*/true, C->isImplicit());
-        assert(!PartialStruct.Base.isValid() &&
-               "No partial structs for declare target link expected.");
-      }
-    }
-  }
-
   /// Generate the default map information for a given capture \a CI,
   /// record field declaration \a RI and captured value \a CV.
   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
                               const FieldDecl &RI, llvm::Value *CV,
-                              MapBaseValuesArrayTy &CurBasePointers,
-                              MapValuesArrayTy &CurPointers,
-                              MapValuesArrayTy &CurSizes,
-                              MapFlagsArrayTy &CurMapTypes) const {
+                              MapCombinedInfoTy &CombinedInfo) const {
     bool IsImplicit = true;
     // Do the default mapping.
     if (CI.capturesThis()) {
-      CurBasePointers.push_back(CV);
-      CurPointers.push_back(CV);
+      CombinedInfo.BasePointers.push_back(CV);
+      CombinedInfo.Pointers.push_back(CV);
       const auto *PtrTy = cast(RI.getType().getTypePtr());
-      CurSizes.push_back(
+      CombinedInfo.Sizes.push_back(
           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
                                     CGF.Int64Ty, /*isSigned=*/true));
       // Default map type.
-      CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
+      CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
     } else if (CI.capturesVariableByCopy()) {
-      CurBasePointers.push_back(CV);
-      CurPointers.push_back(CV);
+      CombinedInfo.BasePointers.push_back(CV);
+      CombinedInfo.Pointers.push_back(CV);
       if (!RI.getType()->isAnyPointerType()) {
         // We have to signal to the runtime captures passed by value that are
         // not pointers.
-        CurMapTypes.push_back(OMP_MAP_LITERAL);
-        CurSizes.push_back(CGF.Builder.CreateIntCast(
+        CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
+        CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
       } else {
         // Pointers are implicitly mapped with a zero size and no flags
         // (other than first map that is added for all implicit maps).
-        CurMapTypes.push_back(OMP_MAP_NONE);
-        CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
+        CombinedInfo.Types.push_back(OMP_MAP_NONE);
+        CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
       }
       const VarDecl *VD = CI.getCapturedVar();
       auto I = FirstPrivateDecls.find(VD);
@@ -8615,12 +8606,12 @@ class MappableExprsHandler {
       assert(CI.capturesVariable() && "Expected captured reference.");
       const auto *PtrTy = cast(RI.getType().getTypePtr());
       QualType ElementType = PtrTy->getPointeeType();
-      CurSizes.push_back(CGF.Builder.CreateIntCast(
+      CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
       // The default map type for a scalar/complex type is 'to' because by
       // default the value doesn't have to be retrieved. For an aggregate
       // type, the default is 'tofrom'.
-      CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
+      CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
       const VarDecl *VD = CI.getCapturedVar();
       auto I = FirstPrivateDecls.find(VD);
       if (I != FirstPrivateDecls.end() &&
@@ -8631,30 +8622,33 @@ class MappableExprsHandler {
         CGF.Builder.CreateMemCpy(
             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
-            CurSizes.back(), /*IsVolatile=*/false);
+            CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
         // Use new global variable as the base pointers.
-        CurBasePointers.push_back(Addr);
-        CurPointers.push_back(Addr);
+        CombinedInfo.BasePointers.push_back(Addr);
+        CombinedInfo.Pointers.push_back(Addr);
       } else {
-        CurBasePointers.push_back(CV);
+        CombinedInfo.BasePointers.push_back(CV);
         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
               CV, ElementType, CGF.getContext().getDeclAlign(VD),
               AlignmentSource::Decl));
-          CurPointers.push_back(PtrAddr.getPointer());
+          CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
         } else {
-          CurPointers.push_back(CV);
+          CombinedInfo.Pointers.push_back(CV);
         }
       }
       if (I != FirstPrivateDecls.end())
         IsImplicit = I->getSecond();
     }
     // Every default map produces a single argument which is a target parameter.
-    CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
+    CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
 
     // Add flag stating this is an implicit map.
     if (IsImplicit)
-      CurMapTypes.back() |= OMP_MAP_IMPLICIT;
+      CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
+
+    // No user-defined mapper for default mapping.
+    CombinedInfo.Mappers.push_back(nullptr);
   }
 };
 } // anonymous namespace
@@ -8664,23 +8658,20 @@ class MappableExprsHandler {
 /// return nullptr by reference.
 static void
 emitOffloadingArrays(CodeGenFunction &CGF,
-                     MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
-                     MappableExprsHandler::MapValuesArrayTy &Pointers,
-                     MappableExprsHandler::MapValuesArrayTy &Sizes,
-                     MappableExprsHandler::MapFlagsArrayTy &MapTypes,
+                     MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
                      CGOpenMPRuntime::TargetDataInfo &Info) {
   CodeGenModule &CGM = CGF.CGM;
   ASTContext &Ctx = CGF.getContext();
 
   // Reset the array information.
   Info.clearArrayInfo();
-  Info.NumberOfPtrs = BasePointers.size();
+  Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
 
   if (Info.NumberOfPtrs) {
     // Detect if we have any capture size requiring runtime evaluation of the
     // size so that a constant array could be eventually used.
     bool hasRuntimeEvaluationCaptureSize = false;
-    for (llvm::Value *S : Sizes)
+    for (llvm::Value *S : CombinedInfo.Sizes)
       if (!isa(S)) {
         hasRuntimeEvaluationCaptureSize = true;
         break;
@@ -8695,6 +8686,9 @@ emitOffloadingArrays(CodeGenFunction &CGF,
         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
     Info.PointersArray =
         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
+    Address MappersArray =
+        CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
+    Info.MappersArray = MappersArray.getPointer();
 
     // If we don't have any VLA types or other types that require runtime
     // evaluation, we can use a constant array for the map sizes, otherwise we
@@ -8711,7 +8705,7 @@ emitOffloadingArrays(CodeGenFunction &CGF,
       // We expect all the sizes to be constant, so we collect them to create
       // a constant array.
       SmallVector ConstSizes;
-      for (llvm::Value *S : Sizes)
+      for (llvm::Value *S : CombinedInfo.Sizes)
         ConstSizes.push_back(cast(S));
 
       auto *SizesArrayInit = llvm::ConstantArray::get(
@@ -8727,8 +8721,8 @@ emitOffloadingArrays(CodeGenFunction &CGF,
 
     // The map types are always constant so we don't need to generate code to
     // fill arrays. Instead, we create an array constant.
-    SmallVector Mapping(MapTypes.size(), 0);
-    llvm::copy(MapTypes, Mapping.begin());
+    SmallVector Mapping(CombinedInfo.Types.size(), 0);
+    llvm::copy(CombinedInfo.Types, Mapping.begin());
     llvm::Constant *MapTypesArrayInit =
         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
     std::string MaptypesName =
@@ -8741,7 +8735,7 @@ emitOffloadingArrays(CodeGenFunction &CGF,
     Info.MapTypesArray = MapTypesArrayGbl;
 
     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
-      llvm::Value *BPVal = *BasePointers[I];
+      llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
           Info.BasePointersArray, 0, I);
@@ -8751,10 +8745,11 @@ emitOffloadingArrays(CodeGenFunction &CGF,
       CGF.Builder.CreateStore(BPVal, BPAddr);
 
       if (Info.requiresDevicePointerInfo())
-        if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
+        if (const ValueDecl *DevVD =
+                CombinedInfo.BasePointers[I].getDevicePtrDecl())
           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
 
-      llvm::Value *PVal = Pointers[I];
+      llvm::Value *PVal = CombinedInfo.Pointers[I];
       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
           Info.PointersArray, 0, I);
@@ -8770,20 +8765,33 @@ emitOffloadingArrays(CodeGenFunction &CGF,
             /*Idx0=*/0,
             /*Idx1=*/I);
         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
-        CGF.Builder.CreateStore(
-            CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
-            SAddr);
+        CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
+                                                          CGM.Int64Ty,
+                                                          /*isSigned=*/true),
+                                SAddr);
+      }
+
+      // Fill up the mapper array.
+      llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
+      if (CombinedInfo.Mappers[I]) {
+        MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+            cast(CombinedInfo.Mappers[I]));
+        MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
+        Info.HasMapper = true;
       }
+      Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
+      CGF.Builder.CreateStore(MFunc, MAddr);
     }
   }
 }
 
 /// Emit the arguments to be passed to the runtime library based on the
-/// arrays of pointers, sizes and map types.
+/// arrays of base pointers, pointers, sizes, map types, and mappers.
 static void emitOffloadingArraysArgument(
     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
-    llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
+    llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg,
+    CGOpenMPRuntime::TargetDataInfo &Info) {
   CodeGenModule &CGM = CGF.CGM;
   if (Info.NumberOfPtrs) {
     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
@@ -8803,12 +8811,17 @@ static void emitOffloadingArraysArgument(
         Info.MapTypesArray,
         /*Idx0=*/0,
         /*Idx1=*/0);
+    MappersArrayArg =
+        Info.HasMapper
+            ? CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy)
+            : llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
   } else {
     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
     MapTypesArrayArg =
         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
+    MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
   }
 }
 
@@ -8995,6 +9008,9 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
       C.getPointerType(Int64Ty), Loc);
+  // Convert the size in bytes into the number of array elements.
+  Size = MapperCGF.Builder.CreateExactUDiv(
+      Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
@@ -9033,6 +9049,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
 
   // Emit the loop body block.
   MapperCGF.EmitBlock(BodyBB);
+  llvm::BasicBlock *LastBB = BodyBB;
   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
   PtrPHI->addIncoming(PtrBegin, EntryBB);
@@ -9050,12 +9067,9 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
   (void)Scope.Privatize();
 
   // Get map clause information. Fill up the arrays with all mapped variables.
-  MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
-  MappableExprsHandler::MapValuesArrayTy Pointers;
-  MappableExprsHandler::MapValuesArrayTy Sizes;
-  MappableExprsHandler::MapFlagsArrayTy MapTypes;
+  MappableExprsHandler::MapCombinedInfoTy Info;
   MappableExprsHandler MEHandler(*D, MapperCGF);
-  MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
+  MEHandler.generateAllInfoForMapper(Info);
 
   // Call the runtime API __tgt_mapper_num_components to get the number of
   // pre-existing components.
@@ -9069,17 +9083,17 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
 
   // Fill up the runtime mapper handle for all components.
-  for (unsigned I = 0; I < BasePointers.size(); ++I) {
+  for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
-        *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
+        *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
-        Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
-    llvm::Value *CurSizeArg = Sizes[I];
+        Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
+    llvm::Value *CurSizeArg = Info.Sizes[I];
 
     // Extract the MEMBER_OF field from the map type.
     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
     MapperCGF.EmitBlock(MemberBB);
-    llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
+    llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
         OriMapType,
         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
@@ -9155,6 +9169,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
     // In case of tofrom, do nothing.
     MapperCGF.EmitBlock(EndBB);
+    LastBB = EndBB;
     llvm::PHINode *CurMapType =
         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
     CurMapType->addIncoming(AllocMapType, AllocBB);
@@ -9162,23 +9177,29 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
     CurMapType->addIncoming(FromMapType, FromBB);
     CurMapType->addIncoming(MemberMapType, ToElseBB);
 
-    // TODO: call the corresponding mapper function if a user-defined mapper is
-    // associated with this map clause.
-    // Call the runtime API __tgt_push_mapper_component to fill up the runtime
-    // data structure.
     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
                                      CurSizeArg, CurMapType};
-    MapperCGF.EmitRuntimeCall(
-        OMPBuilder.getOrCreateRuntimeFunction(
-            CGM.getModule(), OMPRTL___tgt_push_mapper_component),
-        OffloadingArgs);
+    if (Info.Mappers[I]) {
+      // Call the corresponding mapper function.
+      llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
+          cast(Info.Mappers[I]));
+      assert(MapperFunc && "Expect a valid mapper function is available.");
+      MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
+    } else {
+      // Call the runtime API __tgt_push_mapper_component to fill up the runtime
+      // data structure.
+      MapperCGF.EmitRuntimeCall(
+          OMPBuilder.getOrCreateRuntimeFunction(
+              CGM.getModule(), OMPRTL___tgt_push_mapper_component),
+          OffloadingArgs);
+    }
   }
 
   // Update the pointer to point to the next element that needs to be mapped,
   // and check whether we have mapped all elements.
   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
-  PtrPHI->addIncoming(PtrNext, BodyBB);
+  PtrPHI->addIncoming(PtrNext, LastBB);
   llvm::Value *IsDone =
       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
@@ -9256,6 +9277,15 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
       OffloadingArgs);
 }
 
+llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
+    const OMPDeclareMapperDecl *D) {
+  auto I = UDMMap.find(D);
+  if (I != UDMMap.end())
+    return I->second;
+  emitUserDefinedMapper(D);
+  return UDMMap.lookup(D);
+}
+
 void CGOpenMPRuntime::emitTargetNumIterationsCall(
     CodeGenFunction &CGF, const OMPExecutableDirective &D,
     llvm::Value *DeviceID,
@@ -9401,12 +9431,14 @@ void CGOpenMPRuntime::emitTargetCall(
                                        InputInfo.PointersArray.getPointer(),
                                        InputInfo.SizesArray.getPointer(),
                                        MapTypesArray,
+                                       InputInfo.MappersArray.getPointer(),
                                        NumTeams,
                                        NumThreads};
       Return = CGF.EmitRuntimeCall(
           OMPBuilder.getOrCreateRuntimeFunction(
-              CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait
-                                         : OMPRTL___tgt_target_teams),
+              CGM.getModule(), HasNowait
+                                   ? OMPRTL___tgt_target_teams_nowait_mapper
+                                   : OMPRTL___tgt_target_teams_mapper),
           OffloadingArgs);
     } else {
       llvm::Value *OffloadingArgs[] = {DeviceID,
@@ -9415,11 +9447,12 @@ void CGOpenMPRuntime::emitTargetCall(
                                        InputInfo.BasePointersArray.getPointer(),
                                        InputInfo.PointersArray.getPointer(),
                                        InputInfo.SizesArray.getPointer(),
-                                       MapTypesArray};
+                                       MapTypesArray,
+                                       InputInfo.MappersArray.getPointer()};
       Return = CGF.EmitRuntimeCall(
           OMPBuilder.getOrCreateRuntimeFunction(
-              CGM.getModule(),
-              HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target),
+              CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
+                                         : OMPRTL___tgt_target_mapper),
           OffloadingArgs);
     }
 
@@ -9457,92 +9490,87 @@ void CGOpenMPRuntime::emitTargetCall(
                           &CapturedVars, RequiresOuterTask,
                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
     // Fill up the arrays with all the captured variables.
-    MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
-    MappableExprsHandler::MapValuesArrayTy Pointers;
-    MappableExprsHandler::MapValuesArrayTy Sizes;
-    MappableExprsHandler::MapFlagsArrayTy MapTypes;
+    MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
 
     // Get mappable expression information.
     MappableExprsHandler MEHandler(D, CGF);
     llvm::DenseMap LambdaPointers;
+    llvm::DenseSet> MappedVarSet;
 
     auto RI = CS.getCapturedRecordDecl()->field_begin();
     auto CV = CapturedVars.begin();
     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
                                               CE = CS.capture_end();
          CI != CE; ++CI, ++RI, ++CV) {
-      MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
-      MappableExprsHandler::MapValuesArrayTy CurPointers;
-      MappableExprsHandler::MapValuesArrayTy CurSizes;
-      MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
+      MappableExprsHandler::MapCombinedInfoTy CurInfo;
       MappableExprsHandler::StructRangeInfoTy PartialStruct;
 
       // VLA sizes are passed to the outlined region by copy and do not have map
       // information associated.
       if (CI->capturesVariableArrayType()) {
-        CurBasePointers.push_back(*CV);
-        CurPointers.push_back(*CV);
-        CurSizes.push_back(CGF.Builder.CreateIntCast(
+        CurInfo.BasePointers.push_back(*CV);
+        CurInfo.Pointers.push_back(*CV);
+        CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
         // Copy to the device as an argument. No need to retrieve it.
-        CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
-                              MappableExprsHandler::OMP_MAP_TARGET_PARAM |
-                              MappableExprsHandler::OMP_MAP_IMPLICIT);
+        CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
+                                MappableExprsHandler::OMP_MAP_TARGET_PARAM |
+                                MappableExprsHandler::OMP_MAP_IMPLICIT);
+        CurInfo.Mappers.push_back(nullptr);
       } else {
         // If we have any information in the map clause, we use it, otherwise we
         // just do a default mapping.
-        MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
-                                         CurSizes, CurMapTypes, PartialStruct);
-        if (CurBasePointers.empty())
-          MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
-                                           CurPointers, CurSizes, CurMapTypes);
+        MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
+        if (!CI->capturesThis())
+          MappedVarSet.insert(CI->getCapturedVar());
+        else
+          MappedVarSet.insert(nullptr);
+        if (CurInfo.BasePointers.empty())
+          MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
         // Generate correct mapping for variables captured by reference in
         // lambdas.
         if (CI->capturesVariable())
-          MEHandler.generateInfoForLambdaCaptures(
-              CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
-              CurMapTypes, LambdaPointers);
+          MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
+                                                  CurInfo, LambdaPointers);
       }
       // We expect to have at least an element of information for this capture.
-      assert(!CurBasePointers.empty() &&
+      assert(!CurInfo.BasePointers.empty() &&
              "Non-existing map pointer for capture!");
-      assert(CurBasePointers.size() == CurPointers.size() &&
-             CurBasePointers.size() == CurSizes.size() &&
-             CurBasePointers.size() == CurMapTypes.size() &&
+      assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
+             CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
+             CurInfo.BasePointers.size() == CurInfo.Types.size() &&
+             CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
              "Inconsistent map information sizes!");
 
       // If there is an entry in PartialStruct it means we have a struct with
       // individual members mapped. Emit an extra combined entry.
       if (PartialStruct.Base.isValid())
-        MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
-                                    CurMapTypes, PartialStruct);
+        MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
 
       // We need to append the results of this capture to what we already have.
-      BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
-      Pointers.append(CurPointers.begin(), CurPointers.end());
-      Sizes.append(CurSizes.begin(), CurSizes.end());
-      MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
+      CombinedInfo.append(CurInfo);
     }
     // Adjust MEMBER_OF flags for the lambdas captures.
-    MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
-                                              Pointers, MapTypes);
-    // Map other list items in the map clause which are not captured variables
-    // but "declare target link" global variables.
-    MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
-                                               MapTypes);
+    MEHandler.adjustMemberOfForLambdaCaptures(
+        LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
+        CombinedInfo.Types);
+    // Map any list items in a map clause that were not captures because they
+    // weren't referenced within the construct.
+    MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
 
     TargetDataInfo Info;
     // Fill up the arrays and create the arguments.
-    emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
+    emitOffloadingArrays(CGF, CombinedInfo, Info);
     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
                                  Info.PointersArray, Info.SizesArray,
-                                 Info.MapTypesArray, Info);
+                                 Info.MapTypesArray, Info.MappersArray, Info);
     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
     InputInfo.BasePointersArray =
         Address(Info.BasePointersArray, CGM.getPointerAlign());
     InputInfo.PointersArray =
         Address(Info.PointersArray, CGM.getPointerAlign());
     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
+    InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
     MapTypesArray = Info.MapTypesArray;
     if (RequiresOuterTask)
       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
@@ -10131,24 +10159,23 @@ void CGOpenMPRuntime::emitTargetDataCalls(
   auto &&BeginThenGen = [this, &D, Device, &Info,
                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
     // Fill up the arrays with all the mapped variables.
-    MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
-    MappableExprsHandler::MapValuesArrayTy Pointers;
-    MappableExprsHandler::MapValuesArrayTy Sizes;
-    MappableExprsHandler::MapFlagsArrayTy MapTypes;
+    MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
 
     // Get map clause information.
-    MappableExprsHandler MCHandler(D, CGF);
-    MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
+    MappableExprsHandler MEHandler(D, CGF);
+    MEHandler.generateAllInfo(CombinedInfo);
 
     // Fill up the arrays and create the arguments.
-    emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
+    emitOffloadingArrays(CGF, CombinedInfo, Info);
 
     llvm::Value *BasePointersArrayArg = nullptr;
     llvm::Value *PointersArrayArg = nullptr;
     llvm::Value *SizesArrayArg = nullptr;
     llvm::Value *MapTypesArrayArg = nullptr;
+    llvm::Value *MappersArrayArg = nullptr;
     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
-                                 SizesArrayArg, MapTypesArrayArg, Info);
+                                 SizesArrayArg, MapTypesArrayArg,
+                                 MappersArrayArg, Info);
 
     // Emit device ID if any.
     llvm::Value *DeviceID = nullptr;
@@ -10163,11 +10190,12 @@ void CGOpenMPRuntime::emitTargetDataCalls(
     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
 
     llvm::Value *OffloadingArgs[] = {
-        DeviceID,         PointerNum,    BasePointersArrayArg,
-        PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
-    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-                            CGM.getModule(), OMPRTL___tgt_target_data_begin),
-                        OffloadingArgs);
+        DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
+        SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
+    CGF.EmitRuntimeCall(
+        OMPBuilder.getOrCreateRuntimeFunction(
+            CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
+        OffloadingArgs);
 
     // If device pointer privatization is required, emit the body of the region
     // here. It will have to be duplicated: with and without privatization.
@@ -10184,8 +10212,10 @@ void CGOpenMPRuntime::emitTargetDataCalls(
     llvm::Value *PointersArrayArg = nullptr;
     llvm::Value *SizesArrayArg = nullptr;
     llvm::Value *MapTypesArrayArg = nullptr;
+    llvm::Value *MappersArrayArg = nullptr;
     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
-                                 SizesArrayArg, MapTypesArrayArg, Info);
+                                 SizesArrayArg, MapTypesArrayArg,
+                                 MappersArrayArg, Info);
 
     // Emit device ID if any.
     llvm::Value *DeviceID = nullptr;
@@ -10200,11 +10230,12 @@ void CGOpenMPRuntime::emitTargetDataCalls(
     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
 
     llvm::Value *OffloadingArgs[] = {
-        DeviceID,         PointerNum,    BasePointersArrayArg,
-        PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
-    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-                            CGM.getModule(), OMPRTL___tgt_target_data_end),
-                        OffloadingArgs);
+        DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
+        SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
+    CGF.EmitRuntimeCall(
+        OMPBuilder.getOrCreateRuntimeFunction(
+            CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
+        OffloadingArgs);
   };
 
   // If we need device pointer privatization, we need to emit the body of the
@@ -10278,24 +10309,25 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
                                      InputInfo.BasePointersArray.getPointer(),
                                      InputInfo.PointersArray.getPointer(),
                                      InputInfo.SizesArray.getPointer(),
-                                     MapTypesArray};
+                                     MapTypesArray,
+                                     InputInfo.MappersArray.getPointer()};
 
-    // Select the right runtime function call for each expected standalone
+    // Select the right runtime function call for each standalone
     // directive.
     const bool HasNowait = D.hasClausesOfKind();
     RuntimeFunction RTLFn;
     switch (D.getDirectiveKind()) {
     case OMPD_target_enter_data:
-      RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait
-                        : OMPRTL___tgt_target_data_begin;
+      RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
+                        : OMPRTL___tgt_target_data_begin_mapper;
       break;
     case OMPD_target_exit_data:
-      RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait
-                        : OMPRTL___tgt_target_data_end;
+      RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
+                        : OMPRTL___tgt_target_data_end_mapper;
       break;
     case OMPD_target_update:
-      RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait
-                        : OMPRTL___tgt_target_data_update;
+      RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
+                        : OMPRTL___tgt_target_data_update_mapper;
       break;
     case OMPD_parallel:
     case OMPD_for:
@@ -10372,21 +10404,18 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
                              CodeGenFunction &CGF, PrePostActionTy &) {
     // Fill up the arrays with all the mapped variables.
-    MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
-    MappableExprsHandler::MapValuesArrayTy Pointers;
-    MappableExprsHandler::MapValuesArrayTy Sizes;
-    MappableExprsHandler::MapFlagsArrayTy MapTypes;
+    MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
 
     // Get map clause information.
     MappableExprsHandler MEHandler(D, CGF);
-    MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
+    MEHandler.generateAllInfo(CombinedInfo);
 
     TargetDataInfo Info;
     // Fill up the arrays and create the arguments.
-    emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
+    emitOffloadingArrays(CGF, CombinedInfo, Info);
     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
                                  Info.PointersArray, Info.SizesArray,
-                                 Info.MapTypesArray, Info);
+                                 Info.MapTypesArray, Info.MappersArray, Info);
     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
     InputInfo.BasePointersArray =
         Address(Info.BasePointersArray, CGM.getPointerAlign());
@@ -10394,6 +10423,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
         Address(Info.PointersArray, CGM.getPointerAlign());
     InputInfo.SizesArray =
         Address(Info.SizesArray, CGM.getPointerAlign());
+    InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
     MapTypesArray = Info.MapTypesArray;
     if (D.hasClausesOfKind())
       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index eb22f155f5ef4..0b91975343f70 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -912,6 +912,10 @@ class CGOpenMPRuntime {
   /// Emit the function for the user defined mapper construct.
   void emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
                              CodeGenFunction *CGF = nullptr);
+  /// Get the function for the specified user-defined mapper. If it does not
+  /// exist, create one.
+  llvm::Function *
+  getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D);
 
   /// Emits outlined function for the specified OpenMP parallel directive
   /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
@@ -1620,6 +1624,10 @@ class CGOpenMPRuntime {
     llvm::Value *SizesArray = nullptr;
     /// The array of map types passed to the runtime library.
     llvm::Value *MapTypesArray = nullptr;
+    /// The array of user-defined mappers passed to the runtime library.
+    llvm::Value *MappersArray = nullptr;
+    /// Indicate whether any user-defined mapper exists.
+    bool HasMapper = false;
     /// The total number of pointers passed to the runtime library.
     unsigned NumberOfPtrs = 0u;
     /// Map between the a declaration of a capture and the corresponding base
@@ -1635,12 +1643,14 @@ class CGOpenMPRuntime {
       PointersArray = nullptr;
       SizesArray = nullptr;
       MapTypesArray = nullptr;
+      MappersArray = nullptr;
+      HasMapper = false;
       NumberOfPtrs = 0u;
     }
     /// Return true if the current target data information has valid arrays.
     bool isValid() {
       return BasePointersArray && PointersArray && SizesArray &&
-             MapTypesArray && NumberOfPtrs;
+             MapTypesArray && (!HasMapper || MappersArray) && NumberOfPtrs;
     }
     bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
   };
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
new file mode 100644
index 0000000000000..92eca33ee97d6
--- /dev/null
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -0,0 +1,5230 @@
+//===---- CGOpenMPRuntimeGPU.cpp - Interface to OpenMP GPU Runtimes ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a generalized class for OpenMP runtime code generation
+// specialized by GPU target NVPTX.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CGOpenMPRuntimeGPU.h"
+#include "CGOpenMPRuntimeNVPTX.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/Attr.h"
+#include "clang/AST/DeclOpenMP.h"
+#include "clang/AST/StmtOpenMP.h"
+#include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/Cuda.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
+
+using namespace clang;
+using namespace CodeGen;
+using namespace llvm::omp;
+
+namespace {
+enum OpenMPRTLFunctionNVPTX {
+  /// Call to void __kmpc_kernel_init(kmp_int32 thread_limit,
+  /// int16_t RequiresOMPRuntime);
+  OMPRTL_NVPTX__kmpc_kernel_init,
+  /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
+  OMPRTL_NVPTX__kmpc_kernel_deinit,
+  /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
+  /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
+  OMPRTL_NVPTX__kmpc_spmd_kernel_init,
+  /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
+  OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
+  /// Call to void __kmpc_kernel_prepare_parallel(void
+  /// *outlined_function);
+  OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
+  /// Call to bool __kmpc_kernel_parallel(void **outlined_function);
+  OMPRTL_NVPTX__kmpc_kernel_parallel,
+  /// Call to void __kmpc_kernel_end_parallel();
+  OMPRTL_NVPTX__kmpc_kernel_end_parallel,
+  /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
+  /// global_tid);
+  OMPRTL_NVPTX__kmpc_serialized_parallel,
+  /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
+  /// global_tid);
+  OMPRTL_NVPTX__kmpc_end_serialized_parallel,
+  /// Call to int32_t __kmpc_shuffle_int32(int32_t element,
+  /// int16_t lane_offset, int16_t warp_size);
+  OMPRTL_NVPTX__kmpc_shuffle_int32,
+  /// Call to int64_t __kmpc_shuffle_int64(int64_t element,
+  /// int16_t lane_offset, int16_t warp_size);
+  OMPRTL_NVPTX__kmpc_shuffle_int64,
+  /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32
+  /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
+  /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+  /// lane_offset, int16_t shortCircuit),
+  /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
+  OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2,
+  /// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
+  /// global_tid, void *global_buffer, int32_t num_of_records, void*
+  /// reduce_data,
+  /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+  /// lane_offset, int16_t shortCircuit),
+  /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
+  /// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
+  /// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
+  /// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
+  /// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
+  /// *buffer, int idx, void *reduce_data));
+  OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2,
+  /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
+  OMPRTL_NVPTX__kmpc_end_reduce_nowait,
+  /// Call to void __kmpc_data_sharing_init_stack();
+  OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
+  /// Call to void __kmpc_data_sharing_init_stack_spmd();
+  OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
+  /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size,
+  /// int16_t UseSharedMemory);
+  OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
+  /// Call to void* __kmpc_data_sharing_push_stack(size_t size, int16_t
+  /// UseSharedMemory);
+  OMPRTL_NVPTX__kmpc_data_sharing_push_stack,
+  /// Call to void __kmpc_data_sharing_pop_stack(void *a);
+  OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
+  /// Call to void __kmpc_begin_sharing_variables(void ***args,
+  /// size_t n_args);
+  OMPRTL_NVPTX__kmpc_begin_sharing_variables,
+  /// Call to void __kmpc_end_sharing_variables();
+  OMPRTL_NVPTX__kmpc_end_sharing_variables,
+  /// Call to void __kmpc_get_shared_variables(void ***GlobalArgs)
+  OMPRTL_NVPTX__kmpc_get_shared_variables,
+  /// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32
+  /// global_tid);
+  OMPRTL_NVPTX__kmpc_parallel_level,
+  /// Call to int8_t __kmpc_is_spmd_exec_mode();
+  OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
+  /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
+  /// const void *buf, size_t size, int16_t is_shared, const void **res);
+  OMPRTL_NVPTX__kmpc_get_team_static_memory,
+  /// Call to void __kmpc_restore_team_static_memory(int16_t
+  /// isSPMDExecutionMode, int16_t is_shared);
+  OMPRTL_NVPTX__kmpc_restore_team_static_memory,
+  /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+  OMPRTL__kmpc_barrier,
+  /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
+  /// global_tid);
+  OMPRTL__kmpc_barrier_simple_spmd,
+  /// Call to int32_t __kmpc_warp_active_thread_mask(void);
+  OMPRTL_NVPTX__kmpc_warp_active_thread_mask,
+  /// Call to void __kmpc_syncwarp(int32_t Mask);
+  OMPRTL_NVPTX__kmpc_syncwarp,
+};
+
+/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
+class NVPTXActionTy final : public PrePostActionTy {
+  llvm::FunctionCallee EnterCallee = nullptr;
+  ArrayRef EnterArgs;
+  llvm::FunctionCallee ExitCallee = nullptr;
+  ArrayRef ExitArgs;
+  bool Conditional = false;
+  llvm::BasicBlock *ContBlock = nullptr;
+
+public:
+  NVPTXActionTy(llvm::FunctionCallee EnterCallee,
+                ArrayRef EnterArgs,
+                llvm::FunctionCallee ExitCallee,
+                ArrayRef ExitArgs, bool Conditional = false)
+      : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
+        ExitArgs(ExitArgs), Conditional(Conditional) {}
+  void Enter(CodeGenFunction &CGF) override {
+    llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
+    if (Conditional) {
+      llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
+      auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
+      ContBlock = CGF.createBasicBlock("omp_if.end");
+      // Generate the branch (If-stmt)
+      CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
+      CGF.EmitBlock(ThenBlock);
+    }
+  }
+  void Done(CodeGenFunction &CGF) {
+    // Emit the rest of blocks/branches
+    CGF.EmitBranch(ContBlock);
+    CGF.EmitBlock(ContBlock, true);
+  }
+  void Exit(CodeGenFunction &CGF) override {
+    CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
+  }
+};
+
+/// A class to track the execution mode when codegening directives within
+/// a target region. The appropriate mode (SPMD|NON-SPMD) is set on entry
+/// to the target region and used by containing directives such as 'parallel'
+/// to emit optimized code.
+class ExecutionRuntimeModesRAII {
+private:
+  CGOpenMPRuntimeGPU::ExecutionMode SavedExecMode =
+      CGOpenMPRuntimeGPU::EM_Unknown;
+  CGOpenMPRuntimeGPU::ExecutionMode &ExecMode;
+  bool SavedRuntimeMode = false;
+  bool *RuntimeMode = nullptr;
+
+public:
+  /// Constructor for Non-SPMD mode.
+  ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode)
+      : ExecMode(ExecMode) {
+    SavedExecMode = ExecMode;
+    ExecMode = CGOpenMPRuntimeGPU::EM_NonSPMD;
+  }
+  /// Constructor for SPMD mode.
+  ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode,
+                            bool &RuntimeMode, bool FullRuntimeMode)
+      : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) {
+    SavedExecMode = ExecMode;
+    SavedRuntimeMode = RuntimeMode;
+    ExecMode = CGOpenMPRuntimeGPU::EM_SPMD;
+    RuntimeMode = FullRuntimeMode;
+  }
+  ~ExecutionRuntimeModesRAII() {
+    ExecMode = SavedExecMode;
+    if (RuntimeMode)
+      *RuntimeMode = SavedRuntimeMode;
+  }
+};
+
+/// GPU Configuration:  This information can be derived from cuda registers,
+/// however, providing compile time constants helps generate more efficient
+/// code.  For all practical purposes this is fine because the configuration
+/// is the same for all known NVPTX architectures.
+enum MachineConfiguration : unsigned {
+  WarpSize = 32,
+  /// Number of bits required to represent a lane identifier, which is
+  /// computed as log_2(WarpSize).
+  LaneIDBits = 5,
+  LaneIDMask = WarpSize - 1,
+
+  /// Global memory alignment for performance.
+  GlobalMemoryAlignment = 128,
+
+  /// Maximal size of the shared memory buffer.
+  SharedMemorySize = 128,
+};
+
+static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
+  RefExpr = RefExpr->IgnoreParens();
+  if (const auto *ASE = dyn_cast(RefExpr)) {
+    const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
+    while (const auto *TempASE = dyn_cast(Base))
+      Base = TempASE->getBase()->IgnoreParenImpCasts();
+    RefExpr = Base;
+  } else if (auto *OASE = dyn_cast(RefExpr)) {
+    const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
+    while (const auto *TempOASE = dyn_cast(Base))
+      Base = TempOASE->getBase()->IgnoreParenImpCasts();
+    while (const auto *TempASE = dyn_cast(Base))
+      Base = TempASE->getBase()->IgnoreParenImpCasts();
+    RefExpr = Base;
+  }
+  RefExpr = RefExpr->IgnoreParenImpCasts();
+  if (const auto *DE = dyn_cast(RefExpr))
+    return cast(DE->getDecl()->getCanonicalDecl());
+  const auto *ME = cast(RefExpr);
+  return cast(ME->getMemberDecl()->getCanonicalDecl());
+}
+
+
+static RecordDecl *buildRecordForGlobalizedVars(
+    ASTContext &C, ArrayRef EscapedDecls,
+    ArrayRef EscapedDeclsForTeams,
+    llvm::SmallDenseMap
+        &MappedDeclsFields, int BufSize) {
+  using VarsDataTy = std::pair;
+  if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
+    return nullptr;
+  SmallVector GlobalizedVars;
+  for (const ValueDecl *D : EscapedDecls)
+    GlobalizedVars.emplace_back(
+        CharUnits::fromQuantity(std::max(
+            C.getDeclAlign(D).getQuantity(),
+            static_cast(GlobalMemoryAlignment))),
+        D);
+  for (const ValueDecl *D : EscapedDeclsForTeams)
+    GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
+  llvm::stable_sort(GlobalizedVars, [](VarsDataTy L, VarsDataTy R) {
+    return L.first > R.first;
+  });
+
+  // Build struct _globalized_locals_ty {
+  //         /*  globalized vars  */[WarSize] align (max(decl_align,
+  //         GlobalMemoryAlignment))
+  //         /*  globalized vars  */ for EscapedDeclsForTeams
+  //       };
+  RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
+  GlobalizedRD->startDefinition();
+  llvm::SmallPtrSet SingleEscaped(
+      EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());
+  for (const auto &Pair : GlobalizedVars) {
+    const ValueDecl *VD = Pair.second;
+    QualType Type = VD->getType();
+    if (Type->isLValueReferenceType())
+      Type = C.getPointerType(Type.getNonReferenceType());
+    else
+      Type = Type.getNonReferenceType();
+    SourceLocation Loc = VD->getLocation();
+    FieldDecl *Field;
+    if (SingleEscaped.count(VD)) {
+      Field = FieldDecl::Create(
+          C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
+          C.getTrivialTypeSourceInfo(Type, SourceLocation()),
+          /*BW=*/nullptr, /*Mutable=*/false,
+          /*InitStyle=*/ICIS_NoInit);
+      Field->setAccess(AS_public);
+      if (VD->hasAttrs()) {
+        for (specific_attr_iterator I(VD->getAttrs().begin()),
+             E(VD->getAttrs().end());
+             I != E; ++I)
+          Field->addAttr(*I);
+      }
+    } else {
+      llvm::APInt ArraySize(32, BufSize);
+      Type = C.getConstantArrayType(Type, ArraySize, nullptr, ArrayType::Normal,
+                                    0);
+      Field = FieldDecl::Create(
+          C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
+          C.getTrivialTypeSourceInfo(Type, SourceLocation()),
+          /*BW=*/nullptr, /*Mutable=*/false,
+          /*InitStyle=*/ICIS_NoInit);
+      Field->setAccess(AS_public);
+      llvm::APInt Align(32, std::max(C.getDeclAlign(VD).getQuantity(),
+                                     static_cast(
+                                         GlobalMemoryAlignment)));
+      Field->addAttr(AlignedAttr::CreateImplicit(
+          C, /*IsAlignmentExpr=*/true,
+          IntegerLiteral::Create(C, Align,
+                                 C.getIntTypeForBitwidth(32, /*Signed=*/0),
+                                 SourceLocation()),
+          {}, AttributeCommonInfo::AS_GNU, AlignedAttr::GNU_aligned));
+    }
+    GlobalizedRD->addDecl(Field);
+    MappedDeclsFields.try_emplace(VD, Field);
+  }
+  GlobalizedRD->completeDefinition();
+  return GlobalizedRD;
+}
+
+/// Get the list of variables that can escape their declaration context.
+class CheckVarsEscapingDeclContext final
+    : public ConstStmtVisitor {
+  CodeGenFunction &CGF;
+  llvm::SetVector EscapedDecls;
+  llvm::SetVector EscapedVariableLengthDecls;
+  llvm::SmallPtrSet EscapedParameters;
+  RecordDecl *GlobalizedRD = nullptr;
+  llvm::SmallDenseMap MappedDeclsFields;
+  bool AllEscaped = false;
+  bool IsForCombinedParallelRegion = false;
+
+  void markAsEscaped(const ValueDecl *VD) {
+    // Do not globalize declare target variables.
+    if (!isa(VD) ||
+        OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
+      return;
+    VD = cast(VD->getCanonicalDecl());
+    // Use user-specified allocation.
+    if (VD->hasAttrs() && VD->hasAttr())
+      return;
+    // Variables captured by value must be globalized.
+    if (auto *CSI = CGF.CapturedStmtInfo) {
+      if (const FieldDecl *FD = CSI->lookup(cast(VD))) {
+        // Check if need to capture the variable that was already captured by
+        // value in the outer region.
+        if (!IsForCombinedParallelRegion) {
+          if (!FD->hasAttrs())
+            return;
+          const auto *Attr = FD->getAttr();
+          if (!Attr)
+            return;
+          if (((Attr->getCaptureKind() != OMPC_map) &&
+               !isOpenMPPrivate(Attr->getCaptureKind())) ||
+              ((Attr->getCaptureKind() == OMPC_map) &&
+               !FD->getType()->isAnyPointerType()))
+            return;
+        }
+        if (!FD->getType()->isReferenceType()) {
+          assert(!VD->getType()->isVariablyModifiedType() &&
+                 "Parameter captured by value with variably modified type");
+          EscapedParameters.insert(VD);
+        } else if (!IsForCombinedParallelRegion) {
+          return;
+        }
+      }
+    }
+    if ((!CGF.CapturedStmtInfo ||
+         (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
+        VD->getType()->isReferenceType())
+      // Do not globalize variables with reference type.
+      return;
+    if (VD->getType()->isVariablyModifiedType())
+      EscapedVariableLengthDecls.insert(VD);
+    else
+      EscapedDecls.insert(VD);
+  }
+
+  void VisitValueDecl(const ValueDecl *VD) {
+    if (VD->getType()->isLValueReferenceType())
+      markAsEscaped(VD);
+    if (const auto *VarD = dyn_cast(VD)) {
+      if (!isa(VarD) && VarD->hasInit()) {
+        const bool SavedAllEscaped = AllEscaped;
+        AllEscaped = VD->getType()->isLValueReferenceType();
+        Visit(VarD->getInit());
+        AllEscaped = SavedAllEscaped;
+      }
+    }
+  }
+  void VisitOpenMPCapturedStmt(const CapturedStmt *S,
+                               ArrayRef Clauses,
+                               bool IsCombinedParallelRegion) {
+    if (!S)
+      return;
+    for (const CapturedStmt::Capture &C : S->captures()) {
+      if (C.capturesVariable() && !C.capturesVariableByCopy()) {
+        const ValueDecl *VD = C.getCapturedVar();
+        bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;
+        if (IsCombinedParallelRegion) {
+          // Check if the variable is privatized in the combined construct and
+          // those private copies must be shared in the inner parallel
+          // directive.
+          IsForCombinedParallelRegion = false;
+          for (const OMPClause *C : Clauses) {
+            if (!isOpenMPPrivate(C->getClauseKind()) ||
+                C->getClauseKind() == OMPC_reduction ||
+                C->getClauseKind() == OMPC_linear ||
+                C->getClauseKind() == OMPC_private)
+              continue;
+            ArrayRef Vars;
+            if (const auto *PC = dyn_cast(C))
+              Vars = PC->getVarRefs();
+            else if (const auto *PC = dyn_cast(C))
+              Vars = PC->getVarRefs();
+            else
+              llvm_unreachable("Unexpected clause.");
+            for (const auto *E : Vars) {
+              const Decl *D =
+                  cast(E)->getDecl()->getCanonicalDecl();
+              if (D == VD->getCanonicalDecl()) {
+                IsForCombinedParallelRegion = true;
+                break;
+              }
+            }
+            if (IsForCombinedParallelRegion)
+              break;
+          }
+        }
+        markAsEscaped(VD);
+        if (isa(VD))
+          VisitValueDecl(VD);
+        IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;
+      }
+    }
+  }
+
+  void buildRecordForGlobalizedVars(bool IsInTTDRegion) {
+    assert(!GlobalizedRD &&
+           "Record for globalized variables is built already.");
+    ArrayRef EscapedDeclsForParallel, EscapedDeclsForTeams;
+    if (IsInTTDRegion)
+      EscapedDeclsForTeams = EscapedDecls.getArrayRef();
+    else
+      EscapedDeclsForParallel = EscapedDecls.getArrayRef();
+    GlobalizedRD = ::buildRecordForGlobalizedVars(
+        CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
+        MappedDeclsFields, WarpSize);
+  }
+
+public:
+  CheckVarsEscapingDeclContext(CodeGenFunction &CGF,
+                               ArrayRef TeamsReductions)
+      : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
+  }
+  virtual ~CheckVarsEscapingDeclContext() = default;
+  void VisitDeclStmt(const DeclStmt *S) {
+    if (!S)
+      return;
+    for (const Decl *D : S->decls())
+      if (const auto *VD = dyn_cast_or_null(D))
+        VisitValueDecl(VD);
+  }
+  void VisitOMPExecutableDirective(const OMPExecutableDirective *D) {
+    if (!D)
+      return;
+    if (!D->hasAssociatedStmt())
+      return;
+    if (const auto *S =
+            dyn_cast_or_null(D->getAssociatedStmt())) {
+      // Do not analyze directives that do not actually require capturing,
+      // like `omp for` or `omp simd` directives.
+      llvm::SmallVector CaptureRegions;
+      getOpenMPCaptureRegions(CaptureRegions, D->getDirectiveKind());
+      if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) {
+        VisitStmt(S->getCapturedStmt());
+        return;
+      }
+      VisitOpenMPCapturedStmt(
+          S, D->clauses(),
+          CaptureRegions.back() == OMPD_parallel &&
+              isOpenMPDistributeDirective(D->getDirectiveKind()));
+    }
+  }
+  void VisitCapturedStmt(const CapturedStmt *S) {
+    if (!S)
+      return;
+    for (const CapturedStmt::Capture &C : S->captures()) {
+      if (C.capturesVariable() && !C.capturesVariableByCopy()) {
+        const ValueDecl *VD = C.getCapturedVar();
+        markAsEscaped(VD);
+        if (isa(VD))
+          VisitValueDecl(VD);
+      }
+    }
+  }
+  void VisitLambdaExpr(const LambdaExpr *E) {
+    if (!E)
+      return;
+    for (const LambdaCapture &C : E->captures()) {
+      if (C.capturesVariable()) {
+        if (C.getCaptureKind() == LCK_ByRef) {
+          const ValueDecl *VD = C.getCapturedVar();
+          markAsEscaped(VD);
+          if (E->isInitCapture(&C) || isa(VD))
+            VisitValueDecl(VD);
+        }
+      }
+    }
+  }
+  void VisitBlockExpr(const BlockExpr *E) {
+    if (!E)
+      return;
+    for (const BlockDecl::Capture &C : E->getBlockDecl()->captures()) {
+      if (C.isByRef()) {
+        const VarDecl *VD = C.getVariable();
+        markAsEscaped(VD);
+        if (isa(VD) || VD->isInitCapture())
+          VisitValueDecl(VD);
+      }
+    }
+  }
+  void VisitCallExpr(const CallExpr *E) {
+    if (!E)
+      return;
+    for (const Expr *Arg : E->arguments()) {
+      if (!Arg)
+        continue;
+      if (Arg->isLValue()) {
+        const bool SavedAllEscaped = AllEscaped;
+        AllEscaped = true;
+        Visit(Arg);
+        AllEscaped = SavedAllEscaped;
+      } else {
+        Visit(Arg);
+      }
+    }
+    Visit(E->getCallee());
+  }
+  void VisitDeclRefExpr(const DeclRefExpr *E) {
+    if (!E)
+      return;
+    const ValueDecl *VD = E->getDecl();
+    if (AllEscaped)
+      markAsEscaped(VD);
+    if (isa(VD))
+      VisitValueDecl(VD);
+    else if (const auto *VarD = dyn_cast(VD))
+      if (VarD->isInitCapture())
+        VisitValueDecl(VD);
+  }
+  void VisitUnaryOperator(const UnaryOperator *E) {
+    if (!E)
+      return;
+    if (E->getOpcode() == UO_AddrOf) {
+      const bool SavedAllEscaped = AllEscaped;
+      AllEscaped = true;
+      Visit(E->getSubExpr());
+      AllEscaped = SavedAllEscaped;
+    } else {
+      Visit(E->getSubExpr());
+    }
+  }
+  void VisitImplicitCastExpr(const ImplicitCastExpr *E) {
+    if (!E)
+      return;
+    if (E->getCastKind() == CK_ArrayToPointerDecay) {
+      const bool SavedAllEscaped = AllEscaped;
+      AllEscaped = true;
+      Visit(E->getSubExpr());
+      AllEscaped = SavedAllEscaped;
+    } else {
+      Visit(E->getSubExpr());
+    }
+  }
+  void VisitExpr(const Expr *E) {
+    if (!E)
+      return;
+    bool SavedAllEscaped = AllEscaped;
+    if (!E->isLValue())
+      AllEscaped = false;
+    for (const Stmt *Child : E->children())
+      if (Child)
+        Visit(Child);
+    AllEscaped = SavedAllEscaped;
+  }
+  void VisitStmt(const Stmt *S) {
+    if (!S)
+      return;
+    for (const Stmt *Child : S->children())
+      if (Child)
+        Visit(Child);
+  }
+
+  /// Returns the record that handles all the escaped local variables and used
+  /// instead of their original storage.
+  const RecordDecl *getGlobalizedRecord(bool IsInTTDRegion) {
+    if (!GlobalizedRD)
+      buildRecordForGlobalizedVars(IsInTTDRegion);
+    return GlobalizedRD;
+  }
+
+  /// Returns the field in the globalized record for the escaped variable.
+  const FieldDecl *getFieldForGlobalizedVar(const ValueDecl *VD) const {
+    assert(GlobalizedRD &&
+           "Record for globalized variables must be generated already.");
+    auto I = MappedDeclsFields.find(VD);
+    if (I == MappedDeclsFields.end())
+      return nullptr;
+    return I->getSecond();
+  }
+
+  /// Returns the list of the escaped local variables/parameters.
+  ArrayRef getEscapedDecls() const {
+    return EscapedDecls.getArrayRef();
+  }
+
+  /// Checks if the escaped local variable is actually a parameter passed by
+  /// value.
+  const llvm::SmallPtrSetImpl &getEscapedParameters() const {
+    return EscapedParameters;
+  }
+
+  /// Returns the list of the escaped variables with the variably modified
+  /// types.
+  ArrayRef getEscapedVariableLengthDecls() const {
+    return EscapedVariableLengthDecls.getArrayRef();
+  }
+};
+} // anonymous namespace
+
+/// Get the id of the current thread on the GPU.
+static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
+  return CGF.EmitRuntimeCall(
+      llvm::Intrinsic::getDeclaration(
+          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
+      "nvptx_tid");
+}
+
+/// Get the id of the warp in the block.
+/// We assume that the warp size is 32, which is always the case
+/// on the NVPTX device, to generate more efficient code.
+static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  return Bld.CreateAShr(getNVPTXThreadID(CGF), LaneIDBits, "nvptx_warp_id");
+}
+
+/// Get the id of the current lane in the Warp.
+/// We assume that the warp size is 32, which is always the case
+/// on the NVPTX device, to generate more efficient code.
+static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  return Bld.CreateAnd(getNVPTXThreadID(CGF), Bld.getInt32(LaneIDMask),
+                       "nvptx_lane_id");
+}
+
+/// Get the maximum number of threads in a block of the GPU.
+static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
+  return CGF.EmitRuntimeCall(
+      llvm::Intrinsic::getDeclaration(
+          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
+      "nvptx_num_threads");
+}
+
+/// Get the value of the thread_limit clause in the teams directive.
+/// For the 'generic' execution mode, the runtime encodes thread_limit in
+/// the launch parameters, always starting thread_limit+warpSize threads per
+/// CTA. The threads in the last warp are reserved for master execution.
+/// For the 'spmd' execution mode, all threads in a CTA are part of the team.
+static llvm::Value *getThreadLimit(CodeGenFunction &CGF,
+                                   bool IsInSPMDExecutionMode = false) {
+  CGBuilderTy &Bld = CGF.Builder;
+  auto &RT = static_cast(CGF.CGM.getOpenMPRuntime());
+  return IsInSPMDExecutionMode
+             ? getNVPTXNumThreads(CGF)
+             : Bld.CreateNUWSub(getNVPTXNumThreads(CGF), RT.getGPUWarpSize(CGF),
+                                "thread_limit");
+}
+
+/// Get the thread id of the OMP master thread.
+/// The master thread id is the first thread (lane) of the last warp in the
+/// GPU block.  Warp size is assumed to be some power of 2.
+/// Thread id is 0 indexed.
+/// E.g: If NumThreads is 33, master id is 32.
+///      If NumThreads is 64, master id is 32.
+///      If NumThreads is 1024, master id is 992.
+static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
+  auto &RT = static_cast(CGF.CGM.getOpenMPRuntime());
+  // We assume that the warp size is a power of 2.
+  llvm::Value *Mask = Bld.CreateNUWSub(RT.getGPUWarpSize(CGF), Bld.getInt32(1));
+
+  return Bld.CreateAnd(Bld.CreateNUWSub(NumThreads, Bld.getInt32(1)),
+                       Bld.CreateNot(Mask), "master_tid");
+}
+
+CGOpenMPRuntimeGPU::WorkerFunctionState::WorkerFunctionState(
+    CodeGenModule &CGM, SourceLocation Loc)
+    : WorkerFn(nullptr), CGFI(CGM.getTypes().arrangeNullaryFunction()),
+      Loc(Loc) {
+  createWorkerFunction(CGM);
+}
+
+void CGOpenMPRuntimeGPU::WorkerFunctionState::createWorkerFunction(
+    CodeGenModule &CGM) {
+  // Create an worker function with no arguments.
+
+  WorkerFn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      /*placeholder=*/"_worker", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), WorkerFn, CGFI);
+  WorkerFn->setDoesNotRecurse();
+}
+
+CGOpenMPRuntimeGPU::ExecutionMode
+CGOpenMPRuntimeGPU::getExecutionMode() const {
+  return CurrentExecutionMode;
+}
+
+static CGOpenMPRuntimeGPU::DataSharingMode
+getDataSharingMode(CodeGenModule &CGM) {
+  return CGM.getLangOpts().OpenMPCUDAMode ? CGOpenMPRuntimeGPU::CUDA
+                                          : CGOpenMPRuntimeGPU::Generic;
+}
+
+/// Check for inner (nested) SPMD construct, if any
+static bool hasNestedSPMDDirective(ASTContext &Ctx,
+                                   const OMPExecutableDirective &D) {
+  const auto *CS = D.getInnermostCapturedStmt();
+  const auto *Body =
+      CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+  const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
+
+  if (const auto *NestedDir =
+          dyn_cast_or_null(ChildStmt)) {
+    OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
+    switch (D.getDirectiveKind()) {
+    case OMPD_target:
+      if (isOpenMPParallelDirective(DKind))
+        return true;
+      if (DKind == OMPD_teams) {
+        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+            /*IgnoreCaptured=*/true);
+        if (!Body)
+          return false;
+        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
+        if (const auto *NND =
+                dyn_cast_or_null(ChildStmt)) {
+          DKind = NND->getDirectiveKind();
+          if (isOpenMPParallelDirective(DKind))
+            return true;
+        }
+      }
+      return false;
+    case OMPD_target_teams:
+      return isOpenMPParallelDirective(DKind);
+    case OMPD_target_simd:
+    case OMPD_target_parallel:
+    case OMPD_target_parallel_for:
+    case OMPD_target_parallel_for_simd:
+    case OMPD_target_teams_distribute:
+    case OMPD_target_teams_distribute_simd:
+    case OMPD_target_teams_distribute_parallel_for:
+    case OMPD_target_teams_distribute_parallel_for_simd:
+    case OMPD_parallel:
+    case OMPD_for:
+    case OMPD_parallel_for:
+    case OMPD_parallel_master:
+    case OMPD_parallel_sections:
+    case OMPD_for_simd:
+    case OMPD_parallel_for_simd:
+    case OMPD_cancel:
+    case OMPD_cancellation_point:
+    case OMPD_ordered:
+    case OMPD_threadprivate:
+    case OMPD_allocate:
+    case OMPD_task:
+    case OMPD_simd:
+    case OMPD_sections:
+    case OMPD_section:
+    case OMPD_single:
+    case OMPD_master:
+    case OMPD_critical:
+    case OMPD_taskyield:
+    case OMPD_barrier:
+    case OMPD_taskwait:
+    case OMPD_taskgroup:
+    case OMPD_atomic:
+    case OMPD_flush:
+    case OMPD_depobj:
+    case OMPD_scan:
+    case OMPD_teams:
+    case OMPD_target_data:
+    case OMPD_target_exit_data:
+    case OMPD_target_enter_data:
+    case OMPD_distribute:
+    case OMPD_distribute_simd:
+    case OMPD_distribute_parallel_for:
+    case OMPD_distribute_parallel_for_simd:
+    case OMPD_teams_distribute:
+    case OMPD_teams_distribute_simd:
+    case OMPD_teams_distribute_parallel_for:
+    case OMPD_teams_distribute_parallel_for_simd:
+    case OMPD_target_update:
+    case OMPD_declare_simd:
+    case OMPD_declare_variant:
+    case OMPD_begin_declare_variant:
+    case OMPD_end_declare_variant:
+    case OMPD_declare_target:
+    case OMPD_end_declare_target:
+    case OMPD_declare_reduction:
+    case OMPD_declare_mapper:
+    case OMPD_taskloop:
+    case OMPD_taskloop_simd:
+    case OMPD_master_taskloop:
+    case OMPD_master_taskloop_simd:
+    case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
+    case OMPD_requires:
+    case OMPD_unknown:
+    default:
+      llvm_unreachable("Unexpected directive.");
+    }
+  }
+
+  return false;
+}
+
+static bool supportsSPMDExecutionMode(ASTContext &Ctx,
+                                      const OMPExecutableDirective &D) {
+  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
+  switch (DirectiveKind) {
+  case OMPD_target:
+  case OMPD_target_teams:
+    return hasNestedSPMDDirective(Ctx, D);
+  case OMPD_target_parallel:
+  case OMPD_target_parallel_for:
+  case OMPD_target_parallel_for_simd:
+  case OMPD_target_teams_distribute_parallel_for:
+  case OMPD_target_teams_distribute_parallel_for_simd:
+  case OMPD_target_simd:
+  case OMPD_target_teams_distribute_simd:
+    return true;
+  case OMPD_target_teams_distribute:
+    return false;
+  case OMPD_parallel:
+  case OMPD_for:
+  case OMPD_parallel_for:
+  case OMPD_parallel_master:
+  case OMPD_parallel_sections:
+  case OMPD_for_simd:
+  case OMPD_parallel_for_simd:
+  case OMPD_cancel:
+  case OMPD_cancellation_point:
+  case OMPD_ordered:
+  case OMPD_threadprivate:
+  case OMPD_allocate:
+  case OMPD_task:
+  case OMPD_simd:
+  case OMPD_sections:
+  case OMPD_section:
+  case OMPD_single:
+  case OMPD_master:
+  case OMPD_critical:
+  case OMPD_taskyield:
+  case OMPD_barrier:
+  case OMPD_taskwait:
+  case OMPD_taskgroup:
+  case OMPD_atomic:
+  case OMPD_flush:
+  case OMPD_depobj:
+  case OMPD_scan:
+  case OMPD_teams:
+  case OMPD_target_data:
+  case OMPD_target_exit_data:
+  case OMPD_target_enter_data:
+  case OMPD_distribute:
+  case OMPD_distribute_simd:
+  case OMPD_distribute_parallel_for:
+  case OMPD_distribute_parallel_for_simd:
+  case OMPD_teams_distribute:
+  case OMPD_teams_distribute_simd:
+  case OMPD_teams_distribute_parallel_for:
+  case OMPD_teams_distribute_parallel_for_simd:
+  case OMPD_target_update:
+  case OMPD_declare_simd:
+  case OMPD_declare_variant:
+  case OMPD_begin_declare_variant:
+  case OMPD_end_declare_variant:
+  case OMPD_declare_target:
+  case OMPD_end_declare_target:
+  case OMPD_declare_reduction:
+  case OMPD_declare_mapper:
+  case OMPD_taskloop:
+  case OMPD_taskloop_simd:
+  case OMPD_master_taskloop:
+  case OMPD_master_taskloop_simd:
+  case OMPD_parallel_master_taskloop:
+  case OMPD_parallel_master_taskloop_simd:
+  case OMPD_requires:
+  case OMPD_unknown:
+  default:
+    break;
+  }
+  llvm_unreachable(
+      "Unknown programming model for OpenMP directive on NVPTX target.");
+}
+
+/// Check if the directive is loops based and has schedule clause at all or has
+/// static scheduling.
+static bool hasStaticScheduling(const OMPExecutableDirective &D) {
+  assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) &&
+         isOpenMPLoopDirective(D.getDirectiveKind()) &&
+         "Expected loop-based directive.");
+  return !D.hasClausesOfKind() &&
+         (!D.hasClausesOfKind() ||
+          llvm::any_of(D.getClausesOfKind(),
+                       [](const OMPScheduleClause *C) {
+                         return C->getScheduleKind() == OMPC_SCHEDULE_static;
+                       }));
+}
+
+/// Check for inner (nested) lightweight runtime construct, if any
+static bool hasNestedLightweightDirective(ASTContext &Ctx,
+                                          const OMPExecutableDirective &D) {
+  assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive.");
+  const auto *CS = D.getInnermostCapturedStmt();
+  const auto *Body =
+      CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+  const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
+
+  if (const auto *NestedDir =
+          dyn_cast_or_null(ChildStmt)) {
+    OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
+    switch (D.getDirectiveKind()) {
+    case OMPD_target:
+      if (isOpenMPParallelDirective(DKind) &&
+          isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
+          hasStaticScheduling(*NestedDir))
+        return true;
+      if (DKind == OMPD_teams_distribute_simd || DKind == OMPD_simd)
+        return true;
+      if (DKind == OMPD_parallel) {
+        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+            /*IgnoreCaptured=*/true);
+        if (!Body)
+          return false;
+        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
+        if (const auto *NND =
+                dyn_cast_or_null(ChildStmt)) {
+          DKind = NND->getDirectiveKind();
+          if (isOpenMPWorksharingDirective(DKind) &&
+              isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+            return true;
+        }
+      } else if (DKind == OMPD_teams) {
+        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+            /*IgnoreCaptured=*/true);
+        if (!Body)
+          return false;
+        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
+        if (const auto *NND =
+                dyn_cast_or_null(ChildStmt)) {
+          DKind = NND->getDirectiveKind();
+          if (isOpenMPParallelDirective(DKind) &&
+              isOpenMPWorksharingDirective(DKind) &&
+              isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+            return true;
+          if (DKind == OMPD_parallel) {
+            Body = NND->getInnermostCapturedStmt()->IgnoreContainers(
+                /*IgnoreCaptured=*/true);
+            if (!Body)
+              return false;
+            ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
+            if (const auto *NND =
+                    dyn_cast_or_null(ChildStmt)) {
+              DKind = NND->getDirectiveKind();
+              if (isOpenMPWorksharingDirective(DKind) &&
+                  isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+                return true;
+            }
+          }
+        }
+      }
+      return false;
+    case OMPD_target_teams:
+      if (isOpenMPParallelDirective(DKind) &&
+          isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
+          hasStaticScheduling(*NestedDir))
+        return true;
+      if (DKind == OMPD_distribute_simd || DKind == OMPD_simd)
+        return true;
+      if (DKind == OMPD_parallel) {
+        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+            /*IgnoreCaptured=*/true);
+        if (!Body)
+          return false;
+        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
+        if (const auto *NND =
+                dyn_cast_or_null(ChildStmt)) {
+          DKind = NND->getDirectiveKind();
+          if (isOpenMPWorksharingDirective(DKind) &&
+              isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+            return true;
+        }
+      }
+      return false;
+    case OMPD_target_parallel:
+      if (DKind == OMPD_simd)
+        return true;
+      return isOpenMPWorksharingDirective(DKind) &&
+             isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir);
+    case OMPD_target_teams_distribute:
+    case OMPD_target_simd:
+    case OMPD_target_parallel_for:
+    case OMPD_target_parallel_for_simd:
+    case OMPD_target_teams_distribute_simd:
+    case OMPD_target_teams_distribute_parallel_for:
+    case OMPD_target_teams_distribute_parallel_for_simd:
+    case OMPD_parallel:
+    case OMPD_for:
+    case OMPD_parallel_for:
+    case OMPD_parallel_master:
+    case OMPD_parallel_sections:
+    case OMPD_for_simd:
+    case OMPD_parallel_for_simd:
+    case OMPD_cancel:
+    case OMPD_cancellation_point:
+    case OMPD_ordered:
+    case OMPD_threadprivate:
+    case OMPD_allocate:
+    case OMPD_task:
+    case OMPD_simd:
+    case OMPD_sections:
+    case OMPD_section:
+    case OMPD_single:
+    case OMPD_master:
+    case OMPD_critical:
+    case OMPD_taskyield:
+    case OMPD_barrier:
+    case OMPD_taskwait:
+    case OMPD_taskgroup:
+    case OMPD_atomic:
+    case OMPD_flush:
+    case OMPD_depobj:
+    case OMPD_scan:
+    case OMPD_teams:
+    case OMPD_target_data:
+    case OMPD_target_exit_data:
+    case OMPD_target_enter_data:
+    case OMPD_distribute:
+    case OMPD_distribute_simd:
+    case OMPD_distribute_parallel_for:
+    case OMPD_distribute_parallel_for_simd:
+    case OMPD_teams_distribute:
+    case OMPD_teams_distribute_simd:
+    case OMPD_teams_distribute_parallel_for:
+    case OMPD_teams_distribute_parallel_for_simd:
+    case OMPD_target_update:
+    case OMPD_declare_simd:
+    case OMPD_declare_variant:
+    case OMPD_begin_declare_variant:
+    case OMPD_end_declare_variant:
+    case OMPD_declare_target:
+    case OMPD_end_declare_target:
+    case OMPD_declare_reduction:
+    case OMPD_declare_mapper:
+    case OMPD_taskloop:
+    case OMPD_taskloop_simd:
+    case OMPD_master_taskloop:
+    case OMPD_master_taskloop_simd:
+    case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
+    case OMPD_requires:
+    case OMPD_unknown:
+    default:
+      llvm_unreachable("Unexpected directive.");
+    }
+  }
+
+  return false;
+}
+
+/// Checks if the construct supports lightweight runtime. It must be SPMD
+/// construct + inner loop-based construct with static scheduling.
+static bool supportsLightweightRuntime(ASTContext &Ctx,
+                                       const OMPExecutableDirective &D) {
+  if (!supportsSPMDExecutionMode(Ctx, D))
+    return false;
+  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
+  switch (DirectiveKind) {
+  case OMPD_target:
+  case OMPD_target_teams:
+  case OMPD_target_parallel:
+    return hasNestedLightweightDirective(Ctx, D);
+  case OMPD_target_parallel_for:
+  case OMPD_target_parallel_for_simd:
+  case OMPD_target_teams_distribute_parallel_for:
+  case OMPD_target_teams_distribute_parallel_for_simd:
+    // (Last|First)-privates must be shared in parallel region.
+    return hasStaticScheduling(D);
+  case OMPD_target_simd:
+  case OMPD_target_teams_distribute_simd:
+    return true;
+  case OMPD_target_teams_distribute:
+    return false;
+  case OMPD_parallel:
+  case OMPD_for:
+  case OMPD_parallel_for:
+  case OMPD_parallel_master:
+  case OMPD_parallel_sections:
+  case OMPD_for_simd:
+  case OMPD_parallel_for_simd:
+  case OMPD_cancel:
+  case OMPD_cancellation_point:
+  case OMPD_ordered:
+  case OMPD_threadprivate:
+  case OMPD_allocate:
+  case OMPD_task:
+  case OMPD_simd:
+  case OMPD_sections:
+  case OMPD_section:
+  case OMPD_single:
+  case OMPD_master:
+  case OMPD_critical:
+  case OMPD_taskyield:
+  case OMPD_barrier:
+  case OMPD_taskwait:
+  case OMPD_taskgroup:
+  case OMPD_atomic:
+  case OMPD_flush:
+  case OMPD_depobj:
+  case OMPD_scan:
+  case OMPD_teams:
+  case OMPD_target_data:
+  case OMPD_target_exit_data:
+  case OMPD_target_enter_data:
+  case OMPD_distribute:
+  case OMPD_distribute_simd:
+  case OMPD_distribute_parallel_for:
+  case OMPD_distribute_parallel_for_simd:
+  case OMPD_teams_distribute:
+  case OMPD_teams_distribute_simd:
+  case OMPD_teams_distribute_parallel_for:
+  case OMPD_teams_distribute_parallel_for_simd:
+  case OMPD_target_update:
+  case OMPD_declare_simd:
+  case OMPD_declare_variant:
+  case OMPD_begin_declare_variant:
+  case OMPD_end_declare_variant:
+  case OMPD_declare_target:
+  case OMPD_end_declare_target:
+  case OMPD_declare_reduction:
+  case OMPD_declare_mapper:
+  case OMPD_taskloop:
+  case OMPD_taskloop_simd:
+  case OMPD_master_taskloop:
+  case OMPD_master_taskloop_simd:
+  case OMPD_parallel_master_taskloop:
+  case OMPD_parallel_master_taskloop_simd:
+  case OMPD_requires:
+  case OMPD_unknown:
+  default:
+    break;
+  }
+  llvm_unreachable(
+      "Unknown programming model for OpenMP directive on NVPTX target.");
+}
+
+void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
+                                             StringRef ParentName,
+                                             llvm::Function *&OutlinedFn,
+                                             llvm::Constant *&OutlinedFnID,
+                                             bool IsOffloadEntry,
+                                             const RegionCodeGenTy &CodeGen) {
+  ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode);
+  EntryFunctionState EST;
+  WorkerFunctionState WST(CGM, D.getBeginLoc());
+  Work.clear();
+  WrapperFunctionsMap.clear();
+
+  // Emit target region as a standalone region.
+  class NVPTXPrePostActionTy : public PrePostActionTy {
+    CGOpenMPRuntimeGPU::EntryFunctionState &EST;
+    CGOpenMPRuntimeGPU::WorkerFunctionState &WST;
+
+  public:
+    NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST,
+                         CGOpenMPRuntimeGPU::WorkerFunctionState &WST)
+        : EST(EST), WST(WST) {}
+    void Enter(CodeGenFunction &CGF) override {
+      auto &RT =
+          static_cast(CGF.CGM.getOpenMPRuntime());
+      RT.emitNonSPMDEntryHeader(CGF, EST, WST);
+      // Skip target region initialization.
+      RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
+    }
+    void Exit(CodeGenFunction &CGF) override {
+      auto &RT =
+          static_cast(CGF.CGM.getOpenMPRuntime());
+      RT.clearLocThreadIdInsertPt(CGF);
+      RT.emitNonSPMDEntryFooter(CGF, EST);
+    }
+  } Action(EST, WST);
+  CodeGen.setAction(Action);
+  IsInTTDRegion = true;
+  // Reserve place for the globalized memory.
+  GlobalizedRecords.emplace_back();
+  if (!KernelStaticGlobalized) {
+    KernelStaticGlobalized = new llvm::GlobalVariable(
+        CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
+        llvm::GlobalValue::InternalLinkage,
+        llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+        "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
+        llvm::GlobalValue::NotThreadLocal,
+        CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+  }
+  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
+                                   IsOffloadEntry, CodeGen);
+  IsInTTDRegion = false;
+
+  // Now change the name of the worker function to correspond to this target
+  // region's entry function.
+  WST.WorkerFn->setName(Twine(OutlinedFn->getName(), "_worker"));
+
+  // Create the worker function
+  emitWorkerFunction(WST);
+}
+
+// Setup NVPTX threads for master-worker OpenMP scheme.
+void CGOpenMPRuntimeGPU::emitNonSPMDEntryHeader(CodeGenFunction &CGF,
+                                                  EntryFunctionState &EST,
+                                                  WorkerFunctionState &WST) {
+  CGBuilderTy &Bld = CGF.Builder;
+
+  llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker");
+  llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
+  llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
+  EST.ExitBB = CGF.createBasicBlock(".exit");
+
+  llvm::Value *IsWorker =
+      Bld.CreateICmpULT(getNVPTXThreadID(CGF), getThreadLimit(CGF));
+  Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
+
+  CGF.EmitBlock(WorkerBB);
+  emitCall(CGF, WST.Loc, WST.WorkerFn);
+  CGF.EmitBranch(EST.ExitBB);
+
+  CGF.EmitBlock(MasterCheckBB);
+  llvm::Value *IsMaster =
+      Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
+  Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB);
+
+  CGF.EmitBlock(MasterBB);
+  IsInTargetMasterThreadRegion = true;
+  // SEQUENTIAL (MASTER) REGION START
+  // First action in sequential region:
+  // Initialize the state of the OpenMP runtime library on the GPU.
+  // TODO: Optimize runtime initialization and pass in correct value.
+  llvm::Value *Args[] = {getThreadLimit(CGF),
+                         Bld.getInt16(/*RequiresOMPRuntime=*/1)};
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
+
+  // For data sharing, we need to initialize the stack.
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(
+          OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
+
+  emitGenericVarsProlog(CGF, WST.Loc);
+}
+
+void CGOpenMPRuntimeGPU::emitNonSPMDEntryFooter(CodeGenFunction &CGF,
+                                                  EntryFunctionState &EST) {
+  IsInTargetMasterThreadRegion = false;
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  emitGenericVarsEpilog(CGF);
+
+  if (!EST.ExitBB)
+    EST.ExitBB = CGF.createBasicBlock(".exit");
+
+  llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".termination.notifier");
+  CGF.EmitBranch(TerminateBB);
+
+  CGF.EmitBlock(TerminateBB);
+  // Signal termination condition.
+  // TODO: Optimize runtime initialization and pass in correct value.
+  llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)};
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args);
+  // Barrier to terminate worker threads.
+  syncCTAThreads(CGF);
+  // Master thread jumps to exit point.
+  CGF.EmitBranch(EST.ExitBB);
+
+  CGF.EmitBlock(EST.ExitBB);
+  EST.ExitBB = nullptr;
+}
+
+void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
+                                          StringRef ParentName,
+                                          llvm::Function *&OutlinedFn,
+                                          llvm::Constant *&OutlinedFnID,
+                                          bool IsOffloadEntry,
+                                          const RegionCodeGenTy &CodeGen) {
+  ExecutionRuntimeModesRAII ModeRAII(
+      CurrentExecutionMode, RequiresFullRuntime,
+      CGM.getLangOpts().OpenMPCUDAForceFullRuntime ||
+          !supportsLightweightRuntime(CGM.getContext(), D));
+  EntryFunctionState EST;
+
+  // Emit target region as a standalone region.
+  class NVPTXPrePostActionTy : public PrePostActionTy {
+    CGOpenMPRuntimeGPU &RT;
+    CGOpenMPRuntimeGPU::EntryFunctionState &EST;
+    const OMPExecutableDirective &D;
+
+  public:
+    NVPTXPrePostActionTy(CGOpenMPRuntimeGPU &RT,
+                         CGOpenMPRuntimeGPU::EntryFunctionState &EST,
+                         const OMPExecutableDirective &D)
+        : RT(RT), EST(EST), D(D) {}
+    void Enter(CodeGenFunction &CGF) override {
+      RT.emitSPMDEntryHeader(CGF, EST, D);
+      // Skip target region initialization.
+      RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
+    }
+    void Exit(CodeGenFunction &CGF) override {
+      RT.clearLocThreadIdInsertPt(CGF);
+      RT.emitSPMDEntryFooter(CGF, EST);
+    }
+  } Action(*this, EST, D);
+  CodeGen.setAction(Action);
+  IsInTTDRegion = true;
+  // Reserve place for the globalized memory.
+  GlobalizedRecords.emplace_back();
+  if (!KernelStaticGlobalized) {
+    KernelStaticGlobalized = new llvm::GlobalVariable(
+        CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
+        llvm::GlobalValue::InternalLinkage,
+        llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+        "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
+        llvm::GlobalValue::NotThreadLocal,
+        CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+  }
+  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
+                                   IsOffloadEntry, CodeGen);
+  IsInTTDRegion = false;
+}
+
+void CGOpenMPRuntimeGPU::emitSPMDEntryHeader(
+    CodeGenFunction &CGF, EntryFunctionState &EST,
+    const OMPExecutableDirective &D) {
+  CGBuilderTy &Bld = CGF.Builder;
+
+  // Setup BBs in entry function.
+  llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute");
+  EST.ExitBB = CGF.createBasicBlock(".exit");
+
+  llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
+                         /*RequiresOMPRuntime=*/
+                         Bld.getInt16(RequiresFullRuntime ? 1 : 0),
+                         /*RequiresDataSharing=*/Bld.getInt16(0)};
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
+
+  if (RequiresFullRuntime) {
+    // For data sharing, we need to initialize the stack.
+    CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+        OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
+  }
+
+  CGF.EmitBranch(ExecuteBB);
+
+  CGF.EmitBlock(ExecuteBB);
+
+  IsInTargetMasterThreadRegion = true;
+}
+
+void CGOpenMPRuntimeGPU::emitSPMDEntryFooter(CodeGenFunction &CGF,
+                                               EntryFunctionState &EST) {
+  IsInTargetMasterThreadRegion = false;
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  if (!EST.ExitBB)
+    EST.ExitBB = CGF.createBasicBlock(".exit");
+
+  llvm::BasicBlock *OMPDeInitBB = CGF.createBasicBlock(".omp.deinit");
+  CGF.EmitBranch(OMPDeInitBB);
+
+  CGF.EmitBlock(OMPDeInitBB);
+  // DeInitialize the OMP state in the runtime; called by all active threads.
+  llvm::Value *Args[] = {/*RequiresOMPRuntime=*/
+                         CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)};
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(
+          OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args);
+  CGF.EmitBranch(EST.ExitBB);
+
+  CGF.EmitBlock(EST.ExitBB);
+  EST.ExitBB = nullptr;
+}
+
+// Create a unique global variable to indicate the execution mode of this target
+// region. The execution mode is either 'generic', or 'spmd' depending on the
+// target directive. This variable is picked up by the offload library to setup
+// the device appropriately before kernel launch. If the execution mode is
+// 'generic', the runtime reserves one warp for the master, otherwise, all
+// warps participate in parallel work.
+static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
+                                     bool Mode) {
+  auto *GVMode =
+      new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+                               llvm::GlobalValue::WeakAnyLinkage,
+                               llvm::ConstantInt::get(CGM.Int8Ty, Mode ? 0 : 1),
+                               Twine(Name, "_exec_mode"));
+  CGM.addCompilerUsedGlobal(GVMode);
+}
+
+void CGOpenMPRuntimeGPU::emitWorkerFunction(WorkerFunctionState &WST) {
+  ASTContext &Ctx = CGM.getContext();
+
+  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
+  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, WST.CGFI, {},
+                    WST.Loc, WST.Loc);
+  emitWorkerLoop(CGF, WST);
+  CGF.FinishFunction();
+}
+
+void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF,
+                                          WorkerFunctionState &WST) {
+  //
+  // The workers enter this loop and wait for parallel work from the master.
+  // When the master encounters a parallel region it sets up the work + variable
+  // arguments, and wakes up the workers.  The workers first check to see if
+  // they are required for the parallel region, i.e., within the # of requested
+  // parallel threads.  The activated workers load the variable arguments and
+  // execute the parallel work.
+  //
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  llvm::BasicBlock *AwaitBB = CGF.createBasicBlock(".await.work");
+  llvm::BasicBlock *SelectWorkersBB = CGF.createBasicBlock(".select.workers");
+  llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute.parallel");
+  llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".terminate.parallel");
+  llvm::BasicBlock *BarrierBB = CGF.createBasicBlock(".barrier.parallel");
+  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+
+  CGF.EmitBranch(AwaitBB);
+
+  // Workers wait for work from master.
+  CGF.EmitBlock(AwaitBB);
+  // Wait for parallel work
+  syncCTAThreads(CGF);
+
+  Address WorkFn =
+      CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrTy, /*Name=*/"work_fn");
+  Address ExecStatus =
+      CGF.CreateDefaultAlignTempAlloca(CGF.Int8Ty, /*Name=*/"exec_status");
+  CGF.InitTempAlloca(ExecStatus, Bld.getInt8(/*C=*/0));
+  CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy));
+
+  // TODO: Optimize runtime initialization and pass in correct value.
+  llvm::Value *Args[] = {WorkFn.getPointer()};
+  llvm::Value *Ret = CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
+  Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
+
+  // On termination condition (workid == 0), exit loop.
+  llvm::Value *WorkID = Bld.CreateLoad(WorkFn);
+  llvm::Value *ShouldTerminate = Bld.CreateIsNull(WorkID, "should_terminate");
+  Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
+
+  // Activate requested workers.
+  CGF.EmitBlock(SelectWorkersBB);
+  llvm::Value *IsActive =
+      Bld.CreateIsNotNull(Bld.CreateLoad(ExecStatus), "is_active");
+  Bld.CreateCondBr(IsActive, ExecuteBB, BarrierBB);
+
+  // Signal start of parallel region.
+  CGF.EmitBlock(ExecuteBB);
+  // Skip initialization.
+  setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
+
+  // Process work items: outlined parallel functions.
+  for (llvm::Function *W : Work) {
+    // Try to match this outlined function.
+    llvm::Value *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy);
+
+    llvm::Value *WorkFnMatch =
+        Bld.CreateICmpEQ(Bld.CreateLoad(WorkFn), ID, "work_match");
+
+    llvm::BasicBlock *ExecuteFNBB = CGF.createBasicBlock(".execute.fn");
+    llvm::BasicBlock *CheckNextBB = CGF.createBasicBlock(".check.next");
+    Bld.CreateCondBr(WorkFnMatch, ExecuteFNBB, CheckNextBB);
+
+    // Execute this outlined function.
+    CGF.EmitBlock(ExecuteFNBB);
+
+    // Insert call to work function via shared wrapper. The shared
+    // wrapper takes two arguments:
+    //   - the parallelism level;
+    //   - the thread ID;
+    emitCall(CGF, WST.Loc, W,
+             {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
+
+    // Go to end of parallel region.
+    CGF.EmitBranch(TerminateBB);
+
+    CGF.EmitBlock(CheckNextBB);
+  }
+  // Default case: call to outlined function through pointer if the target
+  // region makes a declare target call that may contain an orphaned parallel
+  // directive.
+  auto *ParallelFnTy =
+      llvm::FunctionType::get(CGM.VoidTy, {CGM.Int16Ty, CGM.Int32Ty},
+                              /*isVarArg=*/false);
+  llvm::Value *WorkFnCast =
+      Bld.CreateBitCast(WorkID, ParallelFnTy->getPointerTo());
+  // Insert call to work function via shared wrapper. The shared
+  // wrapper takes two arguments:
+  //   - the parallelism level;
+  //   - the thread ID;
+  emitCall(CGF, WST.Loc, {ParallelFnTy, WorkFnCast},
+           {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
+  // Go to end of parallel region.
+  CGF.EmitBranch(TerminateBB);
+
+  // Signal end of parallel region.
+  CGF.EmitBlock(TerminateBB);
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel),
+      llvm::None);
+  CGF.EmitBranch(BarrierBB);
+
+  // All active and inactive workers wait at a barrier after parallel region.
+  CGF.EmitBlock(BarrierBB);
+  // Barrier after parallel region.
+  syncCTAThreads(CGF);
+  CGF.EmitBranch(AwaitBB);
+
+  // Exit target region.
+  CGF.EmitBlock(ExitBB);
+  // Skip initialization.
+  clearLocThreadIdInsertPt(CGF);
+}
+
+/// Returns specified OpenMP runtime function for the current OpenMP
+/// implementation.  Specialized for the NVPTX device.
+/// \param Function OpenMP runtime function.
+/// \return Specified function.
+llvm::FunctionCallee
+CGOpenMPRuntimeGPU::createNVPTXRuntimeFunction(unsigned Function) {
+  llvm::FunctionCallee RTLFn = nullptr;
+  switch (static_cast(Function)) {
+  case OMPRTL_NVPTX__kmpc_kernel_init: {
+    // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t
+    // RequiresOMPRuntime);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_kernel_deinit: {
+    // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
+    llvm::Type *TypeParams[] = {CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
+    // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
+    // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: {
+    // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
+    llvm::Type *TypeParams[] = {CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
+    /// Build void __kmpc_kernel_prepare_parallel(
+    /// void *outlined_function);
+    llvm::Type *TypeParams[] = {CGM.Int8PtrTy};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_kernel_parallel: {
+    /// Build bool __kmpc_kernel_parallel(void **outlined_function);
+    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy};
+    llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
+    auto *FnTy =
+        llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
+    /// Build void __kmpc_kernel_end_parallel();
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_serialized_parallel: {
+    // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
+    // global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_end_serialized_parallel: {
+    // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
+    // global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_shuffle_int32: {
+    // Build int32_t __kmpc_shuffle_int32(int32_t element,
+    // int16_t lane_offset, int16_t warp_size);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_shuffle_int64: {
+    // Build int64_t __kmpc_shuffle_int64(int64_t element,
+    // int16_t lane_offset, int16_t warp_size);
+    llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: {
+    // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc,
+    // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void*
+    // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t
+    // lane_id, int16_t lane_offset, int16_t Algorithm Version), void
+    // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
+    llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
+                                             CGM.Int16Ty, CGM.Int16Ty};
+    auto *ShuffleReduceFnTy =
+        llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
+                                /*isVarArg=*/false);
+    llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
+    auto *InterWarpCopyFnTy =
+        llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
+                                /*isVarArg=*/false);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
+                                CGM.Int32Ty,
+                                CGM.Int32Ty,
+                                CGM.SizeTy,
+                                CGM.VoidPtrTy,
+                                ShuffleReduceFnTy->getPointerTo(),
+                                InterWarpCopyFnTy->getPointerTo()};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(
+        FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
+    // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(
+        FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: {
+    // Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
+    // global_tid, void *global_buffer, int32_t num_of_records, void*
+    // reduce_data,
+    // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+    // lane_offset, int16_t shortCircuit),
+    // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
+    // (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
+    // void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
+    // void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
+    // int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
+    // *buffer, int idx, void *reduce_data));
+    llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
+                                             CGM.Int16Ty, CGM.Int16Ty};
+    auto *ShuffleReduceFnTy =
+        llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
+                                /*isVarArg=*/false);
+    llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
+    auto *InterWarpCopyFnTy =
+        llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
+                                /*isVarArg=*/false);
+    llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy,
+                                          CGM.VoidPtrTy};
+    auto *GlobalListFnTy =
+        llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams,
+                                /*isVarArg=*/false);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
+                                CGM.Int32Ty,
+                                CGM.VoidPtrTy,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrTy,
+                                ShuffleReduceFnTy->getPointerTo(),
+                                InterWarpCopyFnTy->getPointerTo(),
+                                GlobalListFnTy->getPointerTo(),
+                                GlobalListFnTy->getPointerTo(),
+                                GlobalListFnTy->getPointerTo(),
+                                GlobalListFnTy->getPointerTo()};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(
+        FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
+    /// Build void __kmpc_data_sharing_init_stack();
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: {
+    /// Build void __kmpc_data_sharing_init_stack_spmd();
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+    RTLFn =
+        CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: {
+    // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size,
+    // int16_t UseSharedMemory);
+    llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(
+        FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
+    // Build void *__kmpc_data_sharing_push_stack(size_t size, int16_t
+    // UseSharedMemory);
+    llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(
+        FnTy, /*Name=*/"__kmpc_data_sharing_push_stack");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
+    // Build void __kmpc_data_sharing_pop_stack(void *a);
+    llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy,
+                                      /*Name=*/"__kmpc_data_sharing_pop_stack");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_begin_sharing_variables: {
+    /// Build void __kmpc_begin_sharing_variables(void ***args,
+    /// size_t n_args);
+    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_end_sharing_variables: {
+    /// Build void __kmpc_end_sharing_variables();
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_get_shared_variables: {
+    /// Build void __kmpc_get_shared_variables(void ***GlobalArgs);
+    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_parallel_level: {
+    // Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: {
+    // Build int8_t __kmpc_is_spmd_exec_mode();
+    auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_get_team_static_memory: {
+    // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
+    // const void *buf, size_t size, int16_t is_shared, const void **res);
+    llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy,
+                                CGM.Int16Ty, CGM.VoidPtrPtrTy};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_restore_team_static_memory: {
+    // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
+    // int16_t is_shared);
+    llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn =
+        CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory");
+    break;
+  }
+  case OMPRTL__kmpc_barrier: {
+    // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn =
+        CGM.CreateConvergentRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
+    break;
+  }
+  case OMPRTL__kmpc_barrier_simple_spmd: {
+    // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
+    // global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateConvergentRuntimeFunction(
+        FnTy, /*Name*/ "__kmpc_barrier_simple_spmd");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: {
+    // Build int32_t __kmpc_warp_active_thread_mask(void);
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false);
+    RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_syncwarp: {
+    // Build void __kmpc_syncwarp(kmp_int32 Mask);
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false);
+    RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_syncwarp");
+    break;
+  }
+  }
+  return RTLFn;
+}
+
+void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID,
+                                              llvm::Constant *Addr,
+                                              uint64_t Size, int32_t,
+                                              llvm::GlobalValue::LinkageTypes) {
+  // TODO: Add support for global variables on the device after declare target
+  // support.
+  if (!isa(Addr))
+    return;
+  llvm::Module &M = CGM.getModule();
+  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+
+  // Get "nvvm.annotations" metadata node
+  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
+
+  llvm::Metadata *MDVals[] = {
+      llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx, "kernel"),
+      llvm::ConstantAsMetadata::get(
+          llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
+  // Append metadata to nvvm.annotations
+  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+}
+
+void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
+    const OMPExecutableDirective &D, StringRef ParentName,
+    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
+  if (!IsOffloadEntry) // Nothing to do.
+    return;
+
+  assert(!ParentName.empty() && "Invalid target region parent name!");
+
+  bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
+  if (Mode)
+    emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+                   CodeGen);
+  else
+    emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+                      CodeGen);
+
+  setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
+}
+
+namespace {
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+/// Enum for accesseing the reserved_2 field of the ident_t struct.
+enum ModeFlagsTy : unsigned {
+  /// Bit set to 1 when in SPMD mode.
+  KMP_IDENT_SPMD_MODE = 0x01,
+  /// Bit set to 1 when a simplified runtime is used.
+  KMP_IDENT_SIMPLE_RT_MODE = 0x02,
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/KMP_IDENT_SIMPLE_RT_MODE)
+};
+
+/// Special mode Undefined. Is the combination of Non-SPMD mode + SimpleRuntime.
+static const ModeFlagsTy UndefinedMode =
+    (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE;
+} // anonymous namespace
+
+unsigned CGOpenMPRuntimeGPU::getDefaultLocationReserved2Flags() const {
+  switch (getExecutionMode()) {
+  case EM_SPMD:
+    if (requiresFullRuntime())
+      return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE);
+    return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE;
+  case EM_NonSPMD:
+    assert(requiresFullRuntime() && "Expected full runtime.");
+    return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE);
+  case EM_Unknown:
+    return UndefinedMode;
+  }
+  llvm_unreachable("Unknown flags are requested.");
+}
+
+CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
+    : CGOpenMPRuntime(CGM, "_", "$") {
+  if (!CGM.getLangOpts().OpenMPIsDevice)
+    llvm_unreachable("OpenMP NVPTX can only handle device code.");
+}
+
+void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,
+                                              ProcBindKind ProcBind,
+                                              SourceLocation Loc) {
+  // Do nothing in case of SPMD mode and L0 parallel.
+  if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD)
+    return;
+
+  CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
+}
+
+void CGOpenMPRuntimeGPU::emitNumThreadsClause(CodeGenFunction &CGF,
+                                                llvm::Value *NumThreads,
+                                                SourceLocation Loc) {
+  // Do nothing in case of SPMD mode and L0 parallel.
+  if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD)
+    return;
+
+  CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
+}
+
+void CGOpenMPRuntimeGPU::emitNumTeamsClause(CodeGenFunction &CGF,
+                                              const Expr *NumTeams,
+                                              const Expr *ThreadLimit,
+                                              SourceLocation Loc) {}
+
+llvm::Function *CGOpenMPRuntimeGPU::emitParallelOutlinedFunction(
+    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+  // Emit target region as a standalone region.
+  class NVPTXPrePostActionTy : public PrePostActionTy {
+    bool &IsInParallelRegion;
+    bool PrevIsInParallelRegion;
+
+  public:
+    NVPTXPrePostActionTy(bool &IsInParallelRegion)
+        : IsInParallelRegion(IsInParallelRegion) {}
+    void Enter(CodeGenFunction &CGF) override {
+      PrevIsInParallelRegion = IsInParallelRegion;
+      IsInParallelRegion = true;
+    }
+    void Exit(CodeGenFunction &CGF) override {
+      IsInParallelRegion = PrevIsInParallelRegion;
+    }
+  } Action(IsInParallelRegion);
+  CodeGen.setAction(Action);
+  bool PrevIsInTTDRegion = IsInTTDRegion;
+  IsInTTDRegion = false;
+  bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
+  IsInTargetMasterThreadRegion = false;
+  auto *OutlinedFun =
+      cast(CGOpenMPRuntime::emitParallelOutlinedFunction(
+          D, ThreadIDVar, InnermostKind, CodeGen));
+  if (CGM.getLangOpts().Optimize) {
+    OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
+    OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
+    OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
+  IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
+  IsInTTDRegion = PrevIsInTTDRegion;
+  if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD &&
+      !IsInParallelRegion) {
+    llvm::Function *WrapperFun =
+        createParallelDataSharingWrapper(OutlinedFun, D);
+    WrapperFunctionsMap[OutlinedFun] = WrapperFun;
+  }
+
+  return OutlinedFun;
+}
+
+/// Get list of lastprivate variables from the teams distribute ... or
+/// teams {distribute ...} directives.
+static void
+getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D,
+                             llvm::SmallVectorImpl &Vars) {
+  assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
+         "expected teams directive.");
+  const OMPExecutableDirective *Dir = &D;
+  if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
+    if (const Stmt *S = CGOpenMPRuntime::getSingleCompoundChild(
+            Ctx,
+            D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
+                /*IgnoreCaptured=*/true))) {
+      Dir = dyn_cast_or_null(S);
+      if (Dir && !isOpenMPDistributeDirective(Dir->getDirectiveKind()))
+        Dir = nullptr;
+    }
+  }
+  if (!Dir)
+    return;
+  for (const auto *C : Dir->getClausesOfKind()) {
+    for (const Expr *E : C->getVarRefs())
+      Vars.push_back(getPrivateItem(E));
+  }
+}
+
+/// Get list of reduction variables from the teams ... directives.
+static void
+getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D,
+                      llvm::SmallVectorImpl &Vars) {
+  assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
+         "expected teams directive.");
+  for (const auto *C : D.getClausesOfKind()) {
+    for (const Expr *E : C->privates())
+      Vars.push_back(getPrivateItem(E));
+  }
+}
+
+llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
+    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+  SourceLocation Loc = D.getBeginLoc();
+
+  const RecordDecl *GlobalizedRD = nullptr;
+  llvm::SmallVector LastPrivatesReductions;
+  llvm::SmallDenseMap MappedDeclsFields;
+  // Globalize team reductions variable unconditionally in all modes.
+  if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
+    getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
+  if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD) {
+    getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions);
+    if (!LastPrivatesReductions.empty()) {
+      GlobalizedRD = ::buildRecordForGlobalizedVars(
+          CGM.getContext(), llvm::None, LastPrivatesReductions,
+          MappedDeclsFields, WarpSize);
+    }
+  } else if (!LastPrivatesReductions.empty()) {
+    assert(!TeamAndReductions.first &&
+           "Previous team declaration is not expected.");
+    TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl();
+    std::swap(TeamAndReductions.second, LastPrivatesReductions);
+  }
+
+  // Emit target region as a standalone region.
+  class NVPTXPrePostActionTy : public PrePostActionTy {
+    SourceLocation &Loc;
+    const RecordDecl *GlobalizedRD;
+    llvm::SmallDenseMap
+        &MappedDeclsFields;
+
+  public:
+    NVPTXPrePostActionTy(
+        SourceLocation &Loc, const RecordDecl *GlobalizedRD,
+        llvm::SmallDenseMap
+            &MappedDeclsFields)
+        : Loc(Loc), GlobalizedRD(GlobalizedRD),
+          MappedDeclsFields(MappedDeclsFields) {}
+    void Enter(CodeGenFunction &CGF) override {
+      auto &Rt =
+          static_cast(CGF.CGM.getOpenMPRuntime());
+      if (GlobalizedRD) {
+        auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
+        I->getSecond().GlobalRecord = GlobalizedRD;
+        I->getSecond().MappedParams =
+            std::make_unique();
+        DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
+        for (const auto &Pair : MappedDeclsFields) {
+          assert(Pair.getFirst()->isCanonicalDecl() &&
+                 "Expected canonical declaration");
+          Data.insert(std::make_pair(Pair.getFirst(),
+                                     MappedVarData(Pair.getSecond(),
+                                                   /*IsOnePerTeam=*/true)));
+        }
+      }
+      Rt.emitGenericVarsProlog(CGF, Loc);
+    }
+    void Exit(CodeGenFunction &CGF) override {
+      static_cast(CGF.CGM.getOpenMPRuntime())
+          .emitGenericVarsEpilog(CGF);
+    }
+  } Action(Loc, GlobalizedRD, MappedDeclsFields);
+  CodeGen.setAction(Action);
+  llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction(
+      D, ThreadIDVar, InnermostKind, CodeGen);
+  if (CGM.getLangOpts().Optimize) {
+    OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
+    OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
+    OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
+
+  return OutlinedFun;
+}
+
+void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
+                                                 SourceLocation Loc,
+                                                 bool WithSPMDCheck) {
+  if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
+      getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
+    return;
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
+  if (I == FunctionGlobalizedDecls.end())
+    return;
+  if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {
+    QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
+    QualType SecGlobalRecTy;
+
+    // Recover pointer to this function's global record. The runtime will
+    // handle the specifics of the allocation of the memory.
+    // Use actual memory size of the record including the padding
+    // for alignment purposes.
+    unsigned Alignment =
+        CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity();
+    unsigned GlobalRecordSize =
+        CGM.getContext().getTypeSizeInChars(GlobalRecTy).getQuantity();
+    GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
+
+    llvm::PointerType *GlobalRecPtrTy =
+        CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo();
+    llvm::Value *GlobalRecCastAddr;
+    llvm::Value *IsTTD = nullptr;
+    if (!IsInTTDRegion &&
+        (WithSPMDCheck ||
+         getExecutionMode() == CGOpenMPRuntimeGPU::EM_Unknown)) {
+      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+      llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");
+      llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
+      if (I->getSecond().SecondaryGlobalRecord.hasValue()) {
+        llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+        llvm::Value *ThreadID = getThreadID(CGF, Loc);
+        llvm::Value *PL = CGF.EmitRuntimeCall(
+            createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
+            {RTLoc, ThreadID});
+        IsTTD = Bld.CreateIsNull(PL);
+      }
+      llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
+          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
+      Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
+      // There is no need to emit line number for unconditional branch.
+      (void)ApplyDebugLocation::CreateEmpty(CGF);
+      CGF.EmitBlock(SPMDBB);
+      Address RecPtr = Address(llvm::ConstantPointerNull::get(GlobalRecPtrTy),
+                               CharUnits::fromQuantity(Alignment));
+      CGF.EmitBranch(ExitBB);
+      // There is no need to emit line number for unconditional branch.
+      (void)ApplyDebugLocation::CreateEmpty(CGF);
+      CGF.EmitBlock(NonSPMDBB);
+      llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize);
+      if (const RecordDecl *SecGlobalizedVarsRecord =
+              I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) {
+        SecGlobalRecTy =
+            CGM.getContext().getRecordType(SecGlobalizedVarsRecord);
+
+        // Recover pointer to this function's global record. The runtime will
+        // handle the specifics of the allocation of the memory.
+        // Use actual memory size of the record including the padding
+        // for alignment purposes.
+        unsigned Alignment =
+            CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity();
+        unsigned GlobalRecordSize =
+            CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity();
+        GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
+        Size = Bld.CreateSelect(
+            IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), Size);
+      }
+      // TODO: allow the usage of shared memory to be controlled by
+      // the user, for now, default to global.
+      llvm::Value *GlobalRecordSizeArg[] = {
+          Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+      llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+          createNVPTXRuntimeFunction(
+              OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+          GlobalRecordSizeArg);
+      GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+          GlobalRecValue, GlobalRecPtrTy);
+      CGF.EmitBlock(ExitBB);
+      auto *Phi = Bld.CreatePHI(GlobalRecPtrTy,
+                                /*NumReservedValues=*/2, "_select_stack");
+      Phi->addIncoming(RecPtr.getPointer(), SPMDBB);
+      Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB);
+      GlobalRecCastAddr = Phi;
+      I->getSecond().GlobalRecordAddr = Phi;
+      I->getSecond().IsInSPMDModeFlag = IsSPMD;
+    } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) {
+      assert(GlobalizedRecords.back().Records.size() < 2 &&
+             "Expected less than 2 globalized records: one for target and one "
+             "for teams.");
+      unsigned Offset = 0;
+      for (const RecordDecl *RD : GlobalizedRecords.back().Records) {
+        QualType RDTy = CGM.getContext().getRecordType(RD);
+        unsigned Alignment =
+            CGM.getContext().getTypeAlignInChars(RDTy).getQuantity();
+        unsigned Size = CGM.getContext().getTypeSizeInChars(RDTy).getQuantity();
+        Offset =
+            llvm::alignTo(llvm::alignTo(Offset, Alignment) + Size, Alignment);
+      }
+      unsigned Alignment =
+          CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity();
+      Offset = llvm::alignTo(Offset, Alignment);
+      GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord);
+      ++GlobalizedRecords.back().RegionCounter;
+      if (GlobalizedRecords.back().Records.size() == 1) {
+        assert(KernelStaticGlobalized &&
+               "Kernel static pointer must be initialized already.");
+        auto *UseSharedMemory = new llvm::GlobalVariable(
+            CGM.getModule(), CGM.Int16Ty, /*isConstant=*/true,
+            llvm::GlobalValue::InternalLinkage, nullptr,
+            "_openmp_static_kernel$is_shared");
+        UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+        QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
+            /*DestWidth=*/16, /*Signed=*/0);
+        llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
+            Address(UseSharedMemory,
+                    CGM.getContext().getTypeAlignInChars(Int16Ty)),
+            /*Volatile=*/false, Int16Ty, Loc);
+        auto *StaticGlobalized = new llvm::GlobalVariable(
+            CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false,
+            llvm::GlobalValue::CommonLinkage, nullptr);
+        auto *RecSize = new llvm::GlobalVariable(
+            CGM.getModule(), CGM.SizeTy, /*isConstant=*/true,
+            llvm::GlobalValue::InternalLinkage, nullptr,
+            "_openmp_static_kernel$size");
+        RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+        llvm::Value *Ld = CGF.EmitLoadOfScalar(
+            Address(RecSize, CGM.getSizeAlign()), /*Volatile=*/false,
+            CGM.getContext().getSizeType(), Loc);
+        llvm::Value *ResAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+            KernelStaticGlobalized, CGM.VoidPtrPtrTy);
+        llvm::Value *GlobalRecordSizeArg[] = {
+            llvm::ConstantInt::get(
+                CGM.Int16Ty,
+                getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0),
+            StaticGlobalized, Ld, IsInSharedMemory, ResAddr};
+        CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+                                OMPRTL_NVPTX__kmpc_get_team_static_memory),
+                            GlobalRecordSizeArg);
+        GlobalizedRecords.back().Buffer = StaticGlobalized;
+        GlobalizedRecords.back().RecSize = RecSize;
+        GlobalizedRecords.back().UseSharedMemory = UseSharedMemory;
+        GlobalizedRecords.back().Loc = Loc;
+      }
+      assert(KernelStaticGlobalized && "Global address must be set already.");
+      Address FrameAddr = CGF.EmitLoadOfPointer(
+          Address(KernelStaticGlobalized, CGM.getPointerAlign()),
+          CGM.getContext()
+              .getPointerType(CGM.getContext().VoidPtrTy)
+              .castAs());
+      llvm::Value *GlobalRecValue =
+          Bld.CreateConstInBoundsGEP(FrameAddr, Offset).getPointer();
+      I->getSecond().GlobalRecordAddr = GlobalRecValue;
+      I->getSecond().IsInSPMDModeFlag = nullptr;
+      GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+          GlobalRecValue, CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo());
+    } else {
+      // TODO: allow the usage of shared memory to be controlled by
+      // the user, for now, default to global.
+      bool UseSharedMemory =
+          IsInTTDRegion && GlobalRecordSize <= SharedMemorySize;
+      llvm::Value *GlobalRecordSizeArg[] = {
+          llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
+          CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)};
+      llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+          createNVPTXRuntimeFunction(
+              IsInTTDRegion
+                  ? OMPRTL_NVPTX__kmpc_data_sharing_push_stack
+                  : OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+          GlobalRecordSizeArg);
+      GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+          GlobalRecValue, GlobalRecPtrTy);
+      I->getSecond().GlobalRecordAddr = GlobalRecValue;
+      I->getSecond().IsInSPMDModeFlag = nullptr;
+    }
+    LValue Base =
+        CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, GlobalRecTy);
+
+    // Emit the "global alloca" which is a GEP from the global declaration
+    // record using the pointer returned by the runtime.
+    LValue SecBase;
+    decltype(I->getSecond().LocalVarData)::const_iterator SecIt;
+    if (IsTTD) {
+      SecIt = I->getSecond().SecondaryLocalVarData->begin();
+      llvm::PointerType *SecGlobalRecPtrTy =
+          CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo();
+      SecBase = CGF.MakeNaturalAlignPointeeAddrLValue(
+          Bld.CreatePointerBitCastOrAddrSpaceCast(
+              I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),
+          SecGlobalRecTy);
+    }
+    for (auto &Rec : I->getSecond().LocalVarData) {
+      bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
+      llvm::Value *ParValue;
+      if (EscapedParam) {
+        const auto *VD = cast(Rec.first);
+        LValue ParLVal =
+            CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
+        ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);
+      }
+      LValue VarAddr = CGF.EmitLValueForField(Base, Rec.second.FD);
+      // Emit VarAddr basing on lane-id if required.
+      QualType VarTy;
+      if (Rec.second.IsOnePerTeam) {
+        VarTy = Rec.second.FD->getType();
+      } else {
+        llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(
+            VarAddr.getAddress(CGF).getPointer(),
+            {Bld.getInt32(0), getNVPTXLaneID(CGF)});
+        VarTy =
+            Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType();
+        VarAddr = CGF.MakeAddrLValue(
+            Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy,
+            AlignmentSource::Decl);
+      }
+      Rec.second.PrivateAddr = VarAddr.getAddress(CGF);
+      if (!IsInTTDRegion &&
+          (WithSPMDCheck ||
+           getExecutionMode() == CGOpenMPRuntimeGPU::EM_Unknown)) {
+        assert(I->getSecond().IsInSPMDModeFlag &&
+               "Expected unknown execution mode or required SPMD check.");
+        if (IsTTD) {
+          assert(SecIt->second.IsOnePerTeam &&
+                 "Secondary glob data must be one per team.");
+          LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD);
+          VarAddr.setAddress(
+              Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(CGF),
+                                       VarAddr.getPointer(CGF)),
+                      VarAddr.getAlignment()));
+          Rec.second.PrivateAddr = VarAddr.getAddress(CGF);
+        }
+        Address GlobalPtr = Rec.second.PrivateAddr;
+        Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName());
+        Rec.second.PrivateAddr = Address(
+            Bld.CreateSelect(I->getSecond().IsInSPMDModeFlag,
+                             LocalAddr.getPointer(), GlobalPtr.getPointer()),
+            LocalAddr.getAlignment());
+      }
+      if (EscapedParam) {
+        const auto *VD = cast(Rec.first);
+        CGF.EmitStoreOfScalar(ParValue, VarAddr);
+        I->getSecond().MappedParams->setVarAddr(CGF, VD,
+                                                VarAddr.getAddress(CGF));
+      }
+      if (IsTTD)
+        ++SecIt;
+    }
+  }
+  for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
+    // Recover pointer to this function's global record. The runtime will
+    // handle the specifics of the allocation of the memory.
+    // Use actual memory size of the record including the padding
+    // for alignment purposes.
+    CGBuilderTy &Bld = CGF.Builder;
+    llvm::Value *Size = CGF.getTypeSize(VD->getType());
+    CharUnits Align = CGM.getContext().getDeclAlign(VD);
+    Size = Bld.CreateNUWAdd(
+        Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1));
+    llvm::Value *AlignVal =
+        llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity());
+    Size = Bld.CreateUDiv(Size, AlignVal);
+    Size = Bld.CreateNUWMul(Size, AlignVal);
+    // TODO: allow the usage of shared memory to be controlled by
+    // the user, for now, default to global.
+    llvm::Value *GlobalRecordSizeArg[] = {
+        Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+    llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+        createNVPTXRuntimeFunction(
+            OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+        GlobalRecordSizeArg);
+    llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+        GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo());
+    LValue Base = CGF.MakeAddrLValue(GlobalRecCastAddr, VD->getType(),
+                                     CGM.getContext().getDeclAlign(VD),
+                                     AlignmentSource::Decl);
+    I->getSecond().MappedParams->setVarAddr(CGF, cast(VD),
+                                            Base.getAddress(CGF));
+    I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(GlobalRecValue);
+  }
+  I->getSecond().MappedParams->apply(CGF);
+}
+
+void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF,
+                                                 bool WithSPMDCheck) {
+  if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
+      getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
+    return;
+
+  const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
+  if (I != FunctionGlobalizedDecls.end()) {
+    I->getSecond().MappedParams->restore(CGF);
+    if (!CGF.HaveInsertPoint())
+      return;
+    for (llvm::Value *Addr :
+         llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
+      CGF.EmitRuntimeCall(
+          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+          Addr);
+    }
+    if (I->getSecond().GlobalRecordAddr) {
+      if (!IsInTTDRegion &&
+          (WithSPMDCheck ||
+           getExecutionMode() == CGOpenMPRuntimeGPU::EM_Unknown)) {
+        CGBuilderTy &Bld = CGF.Builder;
+        llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+        llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
+        Bld.CreateCondBr(I->getSecond().IsInSPMDModeFlag, ExitBB, NonSPMDBB);
+        // There is no need to emit line number for unconditional branch.
+        (void)ApplyDebugLocation::CreateEmpty(CGF);
+        CGF.EmitBlock(NonSPMDBB);
+        CGF.EmitRuntimeCall(
+            createNVPTXRuntimeFunction(
+                OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+            CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
+        CGF.EmitBlock(ExitBB);
+      } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) {
+        assert(GlobalizedRecords.back().RegionCounter > 0 &&
+               "region counter must be > 0.");
+        --GlobalizedRecords.back().RegionCounter;
+        // Emit the restore function only in the target region.
+        if (GlobalizedRecords.back().RegionCounter == 0) {
+          QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
+              /*DestWidth=*/16, /*Signed=*/0);
+          llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
+              Address(GlobalizedRecords.back().UseSharedMemory,
+                      CGM.getContext().getTypeAlignInChars(Int16Ty)),
+              /*Volatile=*/false, Int16Ty, GlobalizedRecords.back().Loc);
+          llvm::Value *Args[] = {
+              llvm::ConstantInt::get(
+                  CGM.Int16Ty,
+                  getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0),
+              IsInSharedMemory};
+          CGF.EmitRuntimeCall(
+              createNVPTXRuntimeFunction(
+                  OMPRTL_NVPTX__kmpc_restore_team_static_memory),
+              Args);
+        }
+      } else {
+        CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+                                OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+                            I->getSecond().GlobalRecordAddr);
+      }
+    }
+  }
+}
+
+void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF,
+                                         const OMPExecutableDirective &D,
+                                         SourceLocation Loc,
+                                         llvm::Function *OutlinedFn,
+                                         ArrayRef CapturedVars) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                                      /*Name=*/".zero.addr");
+  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+  llvm::SmallVector OutlinedFnArgs;
+  OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
+  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+  emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
+}
+
+void CGOpenMPRuntimeGPU::emitParallelCall(
+    CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
+    ArrayRef CapturedVars, const Expr *IfCond) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD)
+    emitSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
+  else
+    emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
+}
+
+void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall(
+    CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
+    ArrayRef CapturedVars, const Expr *IfCond) {
+  llvm::Function *Fn = cast(OutlinedFn);
+
+  // Force inline this outlined function at its call site.
+  Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
+
+  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                                      /*Name=*/".zero.addr");
+  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+  // ThreadId for serialized parallels is 0.
+  Address ThreadIDAddr = ZeroAddr;
+  auto &&CodeGen = [this, Fn, CapturedVars, Loc, &ThreadIDAddr](
+                       CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
+
+    Address ZeroAddr =
+        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                         /*Name=*/".bound.zero.addr");
+    CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+    llvm::SmallVector OutlinedFnArgs;
+    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
+    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+    emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs);
+  };
+  auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF,
+                                        PrePostActionTy &) {
+
+    RegionCodeGenTy RCG(CodeGen);
+    llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+    llvm::Value *ThreadID = getThreadID(CGF, Loc);
+    llvm::Value *Args[] = {RTLoc, ThreadID};
+
+    NVPTXActionTy Action(
+        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
+        Args,
+        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
+        Args);
+    RCG.setAction(Action);
+    RCG(CGF);
+  };
+
+  auto &&L0ParallelGen = [this, CapturedVars, Fn](CodeGenFunction &CGF,
+                                                  PrePostActionTy &Action) {
+    CGBuilderTy &Bld = CGF.Builder;
+    llvm::Function *WFn = WrapperFunctionsMap[Fn];
+    assert(WFn && "Wrapper function does not exist!");
+    llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
+
+    // Prepare for parallel region. Indicate the outlined function.
+    llvm::Value *Args[] = {ID};
+    CGF.EmitRuntimeCall(
+        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
+        Args);
+
+    // Create a private scope that will globalize the arguments
+    // passed from the outside of the target region.
+    CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF);
+
+    // There's something to share.
+    if (!CapturedVars.empty()) {
+      // Prepare for parallel region. Indicate the outlined function.
+      Address SharedArgs =
+          CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "shared_arg_refs");
+      llvm::Value *SharedArgsPtr = SharedArgs.getPointer();
+
+      llvm::Value *DataSharingArgs[] = {
+          SharedArgsPtr,
+          llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
+      CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+                              OMPRTL_NVPTX__kmpc_begin_sharing_variables),
+                          DataSharingArgs);
+
+      // Store variable address in a list of references to pass to workers.
+      unsigned Idx = 0;
+      ASTContext &Ctx = CGF.getContext();
+      Address SharedArgListAddress = CGF.EmitLoadOfPointer(
+          SharedArgs, Ctx.getPointerType(Ctx.getPointerType(Ctx.VoidPtrTy))
+                          .castAs());
+      for (llvm::Value *V : CapturedVars) {
+        Address Dst = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx);
+        llvm::Value *PtrV;
+        if (V->getType()->isIntegerTy())
+          PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy);
+        else
+          PtrV = Bld.CreatePointerBitCastOrAddrSpaceCast(V, CGF.VoidPtrTy);
+        CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false,
+                              Ctx.getPointerType(Ctx.VoidPtrTy));
+        ++Idx;
+      }
+    }
+
+    // Activate workers. This barrier is used by the master to signal
+    // work for the workers.
+    syncCTAThreads(CGF);
+
+    // OpenMP [2.5, Parallel Construct, p.49]
+    // There is an implied barrier at the end of a parallel region. After the
+    // end of a parallel region, only the master thread of the team resumes
+    // execution of the enclosing task region.
+    //
+    // The master waits at this barrier until all workers are done.
+    syncCTAThreads(CGF);
+
+    if (!CapturedVars.empty())
+      CGF.EmitRuntimeCall(
+          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables));
+
+    // Remember for post-processing in worker loop.
+    Work.emplace_back(WFn);
+  };
+
+  auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen](
+                             CodeGenFunction &CGF, PrePostActionTy &Action) {
+    if (IsInParallelRegion) {
+      SeqGen(CGF, Action);
+    } else if (IsInTargetMasterThreadRegion) {
+      L0ParallelGen(CGF, Action);
+    } else {
+      // Check for master and then parallelism:
+      // if (__kmpc_is_spmd_exec_mode() || __kmpc_parallel_level(loc, gtid)) {
+      //   Serialized execution.
+      // } else {
+      //   Worker call.
+      // }
+      CGBuilderTy &Bld = CGF.Builder;
+      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+      llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential");
+      llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck");
+      llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
+      llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
+          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
+      Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB);
+      // There is no need to emit line number for unconditional branch.
+      (void)ApplyDebugLocation::CreateEmpty(CGF);
+      CGF.EmitBlock(ParallelCheckBB);
+      llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+      llvm::Value *ThreadID = getThreadID(CGF, Loc);
+      llvm::Value *PL = CGF.EmitRuntimeCall(
+          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
+          {RTLoc, ThreadID});
+      llvm::Value *Res = Bld.CreateIsNotNull(PL);
+      Bld.CreateCondBr(Res, SeqBB, MasterBB);
+      CGF.EmitBlock(SeqBB);
+      SeqGen(CGF, Action);
+      CGF.EmitBranch(ExitBB);
+      // There is no need to emit line number for unconditional branch.
+      (void)ApplyDebugLocation::CreateEmpty(CGF);
+      CGF.EmitBlock(MasterBB);
+      L0ParallelGen(CGF, Action);
+      CGF.EmitBranch(ExitBB);
+      // There is no need to emit line number for unconditional branch.
+      (void)ApplyDebugLocation::CreateEmpty(CGF);
+      // Emit the continuation block for code after the if.
+      CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+    }
+  };
+
+  if (IfCond) {
+    emitIfClause(CGF, IfCond, LNParallelGen, SeqGen);
+  } else {
+    CodeGenFunction::RunCleanupsScope Scope(CGF);
+    RegionCodeGenTy ThenRCG(LNParallelGen);
+    ThenRCG(CGF);
+  }
+}
+
+void CGOpenMPRuntimeGPU::emitSPMDParallelCall(
+    CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
+    ArrayRef CapturedVars, const Expr *IfCond) {
+  // Just call the outlined function to execute the parallel region.
+  // OutlinedFn(>id, &zero, CapturedStruct);
+  //
+  llvm::SmallVector OutlinedFnArgs;
+
+  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                                      /*Name=*/".zero.addr");
+  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+  // ThreadId for serialized parallels is 0.
+  Address ThreadIDAddr = ZeroAddr;
+  auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, &ThreadIDAddr](
+                       CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
+
+    Address ZeroAddr =
+        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                         /*Name=*/".bound.zero.addr");
+    CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+    llvm::SmallVector OutlinedFnArgs;
+    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
+    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+    emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
+  };
+  auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF,
+                                        PrePostActionTy &) {
+
+    RegionCodeGenTy RCG(CodeGen);
+    llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+    llvm::Value *ThreadID = getThreadID(CGF, Loc);
+    llvm::Value *Args[] = {RTLoc, ThreadID};
+
+    NVPTXActionTy Action(
+        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
+        Args,
+        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
+        Args);
+    RCG.setAction(Action);
+    RCG(CGF);
+  };
+
+  if (IsInTargetMasterThreadRegion) {
+    // In the worker need to use the real thread id.
+    ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
+    RegionCodeGenTy RCG(CodeGen);
+    RCG(CGF);
+  } else {
+    // If we are not in the target region, it is definitely L2 parallelism or
+    // more, because for SPMD mode we always has L1 parallel level, sowe don't
+    // need to check for orphaned directives.
+    RegionCodeGenTy RCG(SeqGen);
+    RCG(CGF);
+  }
+}
+
+void CGOpenMPRuntimeGPU::syncCTAThreads(CodeGenFunction &CGF) {
+  // Always emit simple barriers!
+  if (!CGF.HaveInsertPoint())
+    return;
+  // Build call __kmpc_barrier_simple_spmd(nullptr, 0);
+  // This function does not use parameters, so we can emit just default values.
+  llvm::Value *Args[] = {
+      llvm::ConstantPointerNull::get(
+          cast(getIdentTyPointerTy())),
+      llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)};
+  llvm::CallInst *Call = CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args);
+  Call->setConvergent();
+}
+
+void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF,
+                                           SourceLocation Loc,
+                                           OpenMPDirectiveKind Kind, bool,
+                                           bool) {
+  // Always emit simple barriers!
+  if (!CGF.HaveInsertPoint())
+    return;
+  // Build call __kmpc_cancel_barrier(loc, thread_id);
+  unsigned Flags = getDefaultFlagsForBarriers(Kind);
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
+                         getThreadID(CGF, Loc)};
+  llvm::CallInst *Call = CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args);
+  Call->setConvergent();
+}
+
+void CGOpenMPRuntimeGPU::emitCriticalRegion(
+    CodeGenFunction &CGF, StringRef CriticalName,
+    const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
+    const Expr *Hint) {
+  llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.critical.loop");
+  llvm::BasicBlock *TestBB = CGF.createBasicBlock("omp.critical.test");
+  llvm::BasicBlock *SyncBB = CGF.createBasicBlock("omp.critical.sync");
+  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");
+  llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");
+
+  // Get the mask of active threads in the warp.
+  llvm::Value *Mask = CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask));
+  // Fetch team-local id of the thread.
+  llvm::Value *ThreadID = getNVPTXThreadID(CGF);
+
+  // Get the width of the team.
+  llvm::Value *TeamWidth = getNVPTXNumThreads(CGF);
+
+  // Initialize the counter variable for the loop.
+  QualType Int32Ty =
+      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/0);
+  Address Counter = CGF.CreateMemTemp(Int32Ty, "critical_counter");
+  LValue CounterLVal = CGF.MakeAddrLValue(Counter, Int32Ty);
+  CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), CounterLVal,
+                        /*isInit=*/true);
+
+  // Block checks if loop counter exceeds upper bound.
+  CGF.EmitBlock(LoopBB);
+  llvm::Value *CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);
+  llvm::Value *CmpLoopBound = CGF.Builder.CreateICmpSLT(CounterVal, TeamWidth);
+  CGF.Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);
+
+  // Block tests which single thread should execute region, and which threads
+  // should go straight to synchronisation point.
+  CGF.EmitBlock(TestBB);
+  CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);
+  llvm::Value *CmpThreadToCounter =
+      CGF.Builder.CreateICmpEQ(ThreadID, CounterVal);
+  CGF.Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);
+
+  // Block emits the body of the critical region.
+  CGF.EmitBlock(BodyBB);
+
+  // Output the critical statement.
+  CGOpenMPRuntime::emitCriticalRegion(CGF, CriticalName, CriticalOpGen, Loc,
+                                      Hint);
+
+  // After the body surrounded by the critical region, the single executing
+  // thread will jump to the synchronisation point.
+  // Block waits for all threads in current team to finish then increments the
+  // counter variable and returns to the loop.
+  CGF.EmitBlock(SyncBB);
+  // Reconverge active threads in the warp.
+  (void)CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask);
+
+  llvm::Value *IncCounterVal =
+      CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));
+  CGF.EmitStoreOfScalar(IncCounterVal, CounterLVal);
+  CGF.EmitBranch(LoopBB);
+
+  // Block that is reached when  all threads in the team complete the region.
+  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+}
+
+/// Cast value to the specified type.
+static llvm::Value *castValueToType(CodeGenFunction &CGF, llvm::Value *Val,
+                                    QualType ValTy, QualType CastTy,
+                                    SourceLocation Loc) {
+  assert(!CGF.getContext().getTypeSizeInChars(CastTy).isZero() &&
+         "Cast type must sized.");
+  assert(!CGF.getContext().getTypeSizeInChars(ValTy).isZero() &&
+         "Val type must sized.");
+  llvm::Type *LLVMCastTy = CGF.ConvertTypeForMem(CastTy);
+  if (ValTy == CastTy)
+    return Val;
+  if (CGF.getContext().getTypeSizeInChars(ValTy) ==
+      CGF.getContext().getTypeSizeInChars(CastTy))
+    return CGF.Builder.CreateBitCast(Val, LLVMCastTy);
+  if (CastTy->isIntegerType() && ValTy->isIntegerType())
+    return CGF.Builder.CreateIntCast(Val, LLVMCastTy,
+                                     CastTy->hasSignedIntegerRepresentation());
+  Address CastItem = CGF.CreateMemTemp(CastTy);
+  Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace()));
+  CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy);
+  return CGF.EmitLoadOfScalar(CastItem, /*Volatile=*/false, CastTy, Loc);
+}
+
+/// This function creates calls to one of two shuffle functions to copy
+/// variables between lanes in a warp.
+static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF,
+                                                 llvm::Value *Elem,
+                                                 QualType ElemType,
+                                                 llvm::Value *Offset,
+                                                 SourceLocation Loc) {
+  CodeGenModule &CGM = CGF.CGM;
+  CGBuilderTy &Bld = CGF.Builder;
+  CGOpenMPRuntimeGPU &RT =
+      *(static_cast(&CGM.getOpenMPRuntime()));
+
+  CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType);
+  assert(Size.getQuantity() <= 8 &&
+         "Unsupported bitwidth in shuffle instruction.");
+
+  OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4
+                                         ? OMPRTL_NVPTX__kmpc_shuffle_int32
+                                         : OMPRTL_NVPTX__kmpc_shuffle_int64;
+
+  // Cast all types to 32- or 64-bit values before calling shuffle routines.
+  QualType CastTy = CGF.getContext().getIntTypeForBitwidth(
+      Size.getQuantity() <= 4 ? 32 : 64, /*Signed=*/1);
+  llvm::Value *ElemCast = castValueToType(CGF, Elem, ElemType, CastTy, Loc);
+  llvm::Value *WarpSize =
+      Bld.CreateIntCast(RT.getGPUWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true);
+
+  llvm::Value *ShuffledVal = CGF.EmitRuntimeCall(
+      RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize});
+
+  return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc);
+}
+
+static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr,
+                            Address DestAddr, QualType ElemType,
+                            llvm::Value *Offset, SourceLocation Loc) {
+  CGBuilderTy &Bld = CGF.Builder;
+
+  CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType);
+  // Create the loop over the big sized data.
+  // ptr = (void*)Elem;
+  // ptrEnd = (void*) Elem + 1;
+  // Step = 8;
+  // while (ptr + Step < ptrEnd)
+  //   shuffle((int64_t)*ptr);
+  // Step = 4;
+  // while (ptr + Step < ptrEnd)
+  //   shuffle((int32_t)*ptr);
+  // ...
+  Address ElemPtr = DestAddr;
+  Address Ptr = SrcAddr;
+  Address PtrEnd = Bld.CreatePointerBitCastOrAddrSpaceCast(
+      Bld.CreateConstGEP(SrcAddr, 1), CGF.VoidPtrTy);
+  for (int IntSize = 8; IntSize >= 1; IntSize /= 2) {
+    if (Size < CharUnits::fromQuantity(IntSize))
+      continue;
+    QualType IntType = CGF.getContext().getIntTypeForBitwidth(
+        CGF.getContext().toBits(CharUnits::fromQuantity(IntSize)),
+        /*Signed=*/1);
+    llvm::Type *IntTy = CGF.ConvertTypeForMem(IntType);
+    Ptr = Bld.CreatePointerBitCastOrAddrSpaceCast(Ptr, IntTy->getPointerTo());
+    ElemPtr =
+        Bld.CreatePointerBitCastOrAddrSpaceCast(ElemPtr, IntTy->getPointerTo());
+    if (Size.getQuantity() / IntSize > 1) {
+      llvm::BasicBlock *PreCondBB = CGF.createBasicBlock(".shuffle.pre_cond");
+      llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".shuffle.then");
+      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".shuffle.exit");
+      llvm::BasicBlock *CurrentBB = Bld.GetInsertBlock();
+      CGF.EmitBlock(PreCondBB);
+      llvm::PHINode *PhiSrc =
+          Bld.CreatePHI(Ptr.getType(), /*NumReservedValues=*/2);
+      PhiSrc->addIncoming(Ptr.getPointer(), CurrentBB);
+      llvm::PHINode *PhiDest =
+          Bld.CreatePHI(ElemPtr.getType(), /*NumReservedValues=*/2);
+      PhiDest->addIncoming(ElemPtr.getPointer(), CurrentBB);
+      Ptr = Address(PhiSrc, Ptr.getAlignment());
+      ElemPtr = Address(PhiDest, ElemPtr.getAlignment());
+      llvm::Value *PtrDiff = Bld.CreatePtrDiff(
+          PtrEnd.getPointer(), Bld.CreatePointerBitCastOrAddrSpaceCast(
+                                   Ptr.getPointer(), CGF.VoidPtrTy));
+      Bld.CreateCondBr(Bld.CreateICmpSGT(PtrDiff, Bld.getInt64(IntSize - 1)),
+                       ThenBB, ExitBB);
+      CGF.EmitBlock(ThenBB);
+      llvm::Value *Res = createRuntimeShuffleFunction(
+          CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
+          IntType, Offset, Loc);
+      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
+      Address LocalPtr = Bld.CreateConstGEP(Ptr, 1);
+      Address LocalElemPtr = Bld.CreateConstGEP(ElemPtr, 1);
+      PhiSrc->addIncoming(LocalPtr.getPointer(), ThenBB);
+      PhiDest->addIncoming(LocalElemPtr.getPointer(), ThenBB);
+      CGF.EmitBranch(PreCondBB);
+      CGF.EmitBlock(ExitBB);
+    } else {
+      llvm::Value *Res = createRuntimeShuffleFunction(
+          CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
+          IntType, Offset, Loc);
+      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
+      Ptr = Bld.CreateConstGEP(Ptr, 1);
+      ElemPtr = Bld.CreateConstGEP(ElemPtr, 1);
+    }
+    Size = Size % IntSize;
+  }
+}
+
+namespace {
+enum CopyAction : unsigned {
+  // RemoteLaneToThread: Copy over a Reduce list from a remote lane in
+  // the warp using shuffle instructions.
+  RemoteLaneToThread,
+  // ThreadCopy: Make a copy of a Reduce list on the thread's stack.
+  ThreadCopy,
+  // ThreadToScratchpad: Copy a team-reduced array to the scratchpad.
+  ThreadToScratchpad,
+  // ScratchpadToThread: Copy from a scratchpad array in global memory
+  // containing team-reduced data to a thread's stack.
+  ScratchpadToThread,
+};
+} // namespace
+
+struct CopyOptionsTy {
+  llvm::Value *RemoteLaneOffset;
+  llvm::Value *ScratchpadIndex;
+  llvm::Value *ScratchpadWidth;
+};
+
+/// Emit instructions to copy a Reduce list, which contains partially
+/// aggregated values, in the specified direction.
+static void emitReductionListCopy(
+    CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy,
+    ArrayRef Privates, Address SrcBase, Address DestBase,
+    CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr}) {
+
+  CodeGenModule &CGM = CGF.CGM;
+  ASTContext &C = CGM.getContext();
+  CGBuilderTy &Bld = CGF.Builder;
+
+  llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
+  llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex;
+  llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth;
+
+  // Iterates, element-by-element, through the source Reduce list and
+  // make a copy.
+  unsigned Idx = 0;
+  unsigned Size = Privates.size();
+  for (const Expr *Private : Privates) {
+    Address SrcElementAddr = Address::invalid();
+    Address DestElementAddr = Address::invalid();
+    Address DestElementPtrAddr = Address::invalid();
+    // Should we shuffle in an element from a remote lane?
+    bool ShuffleInElement = false;
+    // Set to true to update the pointer in the dest Reduce list to a
+    // newly created element.
+    bool UpdateDestListPtr = false;
+    // Increment the src or dest pointer to the scratchpad, for each
+    // new element.
+    bool IncrScratchpadSrc = false;
+    bool IncrScratchpadDest = false;
+
+    switch (Action) {
+    case RemoteLaneToThread: {
+      // Step 1.1: Get the address for the src element in the Reduce list.
+      Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx);
+      SrcElementAddr = CGF.EmitLoadOfPointer(
+          SrcElementPtrAddr,
+          C.getPointerType(Private->getType())->castAs());
+
+      // Step 1.2: Create a temporary to store the element in the destination
+      // Reduce list.
+      DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx);
+      DestElementAddr =
+          CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element");
+      ShuffleInElement = true;
+      UpdateDestListPtr = true;
+      break;
+    }
+    case ThreadCopy: {
+      // Step 1.1: Get the address for the src element in the Reduce list.
+      Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx);
+      SrcElementAddr = CGF.EmitLoadOfPointer(
+          SrcElementPtrAddr,
+          C.getPointerType(Private->getType())->castAs());
+
+      // Step 1.2: Get the address for dest element.  The destination
+      // element has already been created on the thread's stack.
+      DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx);
+      DestElementAddr = CGF.EmitLoadOfPointer(
+          DestElementPtrAddr,
+          C.getPointerType(Private->getType())->castAs());
+      break;
+    }
+    case ThreadToScratchpad: {
+      // Step 1.1: Get the address for the src element in the Reduce list.
+      Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx);
+      SrcElementAddr = CGF.EmitLoadOfPointer(
+          SrcElementPtrAddr,
+          C.getPointerType(Private->getType())->castAs());
+
+      // Step 1.2: Get the address for dest element:
+      // address = base + index * ElementSizeInChars.
+      llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
+      llvm::Value *CurrentOffset =
+          Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
+      llvm::Value *ScratchPadElemAbsolutePtrVal =
+          Bld.CreateNUWAdd(DestBase.getPointer(), CurrentOffset);
+      ScratchPadElemAbsolutePtrVal =
+          Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
+      DestElementAddr = Address(ScratchPadElemAbsolutePtrVal,
+                                C.getTypeAlignInChars(Private->getType()));
+      IncrScratchpadDest = true;
+      break;
+    }
+    case ScratchpadToThread: {
+      // Step 1.1: Get the address for the src element in the scratchpad.
+      // address = base + index * ElementSizeInChars.
+      llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
+      llvm::Value *CurrentOffset =
+          Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
+      llvm::Value *ScratchPadElemAbsolutePtrVal =
+          Bld.CreateNUWAdd(SrcBase.getPointer(), CurrentOffset);
+      ScratchPadElemAbsolutePtrVal =
+          Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
+      SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal,
+                               C.getTypeAlignInChars(Private->getType()));
+      IncrScratchpadSrc = true;
+
+      // Step 1.2: Create a temporary to store the element in the destination
+      // Reduce list.
+      DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx);
+      DestElementAddr =
+          CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element");
+      UpdateDestListPtr = true;
+      break;
+    }
+    }
+
+    // Regardless of src and dest of copy, we emit the load of src
+    // element as this is required in all directions
+    SrcElementAddr = Bld.CreateElementBitCast(
+        SrcElementAddr, CGF.ConvertTypeForMem(Private->getType()));
+    DestElementAddr = Bld.CreateElementBitCast(DestElementAddr,
+                                               SrcElementAddr.getElementType());
+
+    // Now that all active lanes have read the element in the
+    // Reduce list, shuffle over the value from the remote lane.
+    if (ShuffleInElement) {
+      shuffleAndStore(CGF, SrcElementAddr, DestElementAddr, Private->getType(),
+                      RemoteLaneOffset, Private->getExprLoc());
+    } else {
+      switch (CGF.getEvaluationKind(Private->getType())) {
+      case TEK_Scalar: {
+        llvm::Value *Elem =
+            CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false,
+                                 Private->getType(), Private->getExprLoc());
+        // Store the source element value to the dest element address.
+        CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false,
+                              Private->getType());
+        break;
+      }
+      case TEK_Complex: {
+        CodeGenFunction::ComplexPairTy Elem = CGF.EmitLoadOfComplex(
+            CGF.MakeAddrLValue(SrcElementAddr, Private->getType()),
+            Private->getExprLoc());
+        CGF.EmitStoreOfComplex(
+            Elem, CGF.MakeAddrLValue(DestElementAddr, Private->getType()),
+            /*isInit=*/false);
+        break;
+      }
+      case TEK_Aggregate:
+        CGF.EmitAggregateCopy(
+            CGF.MakeAddrLValue(DestElementAddr, Private->getType()),
+            CGF.MakeAddrLValue(SrcElementAddr, Private->getType()),
+            Private->getType(), AggValueSlot::DoesNotOverlap);
+        break;
+      }
+    }
+
+    // Step 3.1: Modify reference in dest Reduce list as needed.
+    // Modifying the reference in Reduce list to point to the newly
+    // created element.  The element is live in the current function
+    // scope and that of functions it invokes (i.e., reduce_function).
+    // RemoteReduceData[i] = (void*)&RemoteElem
+    if (UpdateDestListPtr) {
+      CGF.EmitStoreOfScalar(Bld.CreatePointerBitCastOrAddrSpaceCast(
+                                DestElementAddr.getPointer(), CGF.VoidPtrTy),
+                            DestElementPtrAddr, /*Volatile=*/false,
+                            C.VoidPtrTy);
+    }
+
+    // Step 4.1: Increment SrcBase/DestBase so that it points to the starting
+    // address of the next element in scratchpad memory, unless we're currently
+    // processing the last one.  Memory alignment is also taken care of here.
+    if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
+      llvm::Value *ScratchpadBasePtr =
+          IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer();
+      llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
+      ScratchpadBasePtr = Bld.CreateNUWAdd(
+          ScratchpadBasePtr,
+          Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars));
+
+      // Take care of global memory alignment for performance
+      ScratchpadBasePtr = Bld.CreateNUWSub(
+          ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
+      ScratchpadBasePtr = Bld.CreateUDiv(
+          ScratchpadBasePtr,
+          llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
+      ScratchpadBasePtr = Bld.CreateNUWAdd(
+          ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
+      ScratchpadBasePtr = Bld.CreateNUWMul(
+          ScratchpadBasePtr,
+          llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
+
+      if (IncrScratchpadDest)
+        DestBase = Address(ScratchpadBasePtr, CGF.getPointerAlign());
+      else /* IncrScratchpadSrc = true */
+        SrcBase = Address(ScratchpadBasePtr, CGF.getPointerAlign());
+    }
+
+    ++Idx;
+  }
+}
+
+/// This function emits a helper that gathers Reduce lists from the first
+/// lane of every active warp to lanes in the first warp.
+///
+/// void inter_warp_copy_func(void* reduce_data, num_warps)
+///   shared smem[warp_size];
+///   For all data entries D in reduce_data:
+///     sync
+///     If (I am the first lane in each warp)
+///       Copy my local D to smem[warp_id]
+///     sync
+///     if (I am the first warp)
+///       Copy smem[thread_id] to my local D
+static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
+                                              ArrayRef Privates,
+                                              QualType ReductionArrayTy,
+                                              SourceLocation Loc) {
+  ASTContext &C = CGM.getContext();
+  llvm::Module &M = CGM.getModule();
+
+  // ReduceList: thread local Reduce list.
+  // At the stage of the computation when this function is called, partially
+  // aggregated values reside in the first lane of every active warp.
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
+  // NumWarps: number of warps active in the parallel region.  This could
+  // be smaller than 32 (max warps in a CTA) for partial block reduction.
+  ImplicitParamDecl NumWarpsArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                C.getIntTypeForBitwidth(32, /* Signed */ true),
+                                ImplicitParamDecl::Other);
+  FunctionArgList Args;
+  Args.push_back(&ReduceListArg);
+  Args.push_back(&NumWarpsArg);
+
+  const CGFunctionInfo &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
+                                    llvm::GlobalValue::InternalLinkage,
+                                    "_omp_reduction_inter_warp_copy_func", &M);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setDoesNotRecurse();
+  CodeGenFunction CGF(CGM);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  // This array is used as a medium to transfer, one reduce element at a time,
+  // the data from the first lane of every warp to lanes in the first warp
+  // in order to perform the final step of a reduction in a parallel region
+  // (reduction across warps).  The array is placed in NVPTX __shared__ memory
+  // for reduced latency, as well as to have a distinct copy for concurrently
+  // executing target regions.  The array is declared with common linkage so
+  // as to be shared across compilation units.
+  StringRef TransferMediumName =
+      "__openmp_nvptx_data_transfer_temporary_storage";
+  llvm::GlobalVariable *TransferMedium =
+      M.getGlobalVariable(TransferMediumName);
+  if (!TransferMedium) {
+    auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
+    unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
+    TransferMedium = new llvm::GlobalVariable(
+        M, Ty, /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage,
+        llvm::Constant::getNullValue(Ty), TransferMediumName,
+        /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
+        SharedAddressSpace);
+    CGM.addCompilerUsedGlobal(TransferMedium);
+  }
+
+  // Get the CUDA thread id of the current OpenMP thread on the GPU.
+  llvm::Value *ThreadID = getNVPTXThreadID(CGF);
+  // nvptx_lane_id = nvptx_id % warpsize
+  llvm::Value *LaneID = getNVPTXLaneID(CGF);
+  // nvptx_warp_id = nvptx_id / warpsize
+  llvm::Value *WarpID = getNVPTXWarpID(CGF);
+
+  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+  Address LocalReduceList(
+      Bld.CreatePointerBitCastOrAddrSpaceCast(
+          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+                               C.VoidPtrTy, Loc),
+          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+      CGF.getPointerAlign());
+
+  unsigned Idx = 0;
+  for (const Expr *Private : Privates) {
+    //
+    // Warp master copies reduce element to transfer medium in __shared__
+    // memory.
+    //
+    unsigned RealTySize =
+        C.getTypeSizeInChars(Private->getType())
+            .alignTo(C.getTypeAlignInChars(Private->getType()))
+            .getQuantity();
+    for (unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /=2) {
+      unsigned NumIters = RealTySize / TySize;
+      if (NumIters == 0)
+        continue;
+      QualType CType = C.getIntTypeForBitwidth(
+          C.toBits(CharUnits::fromQuantity(TySize)), /*Signed=*/1);
+      llvm::Type *CopyType = CGF.ConvertTypeForMem(CType);
+      CharUnits Align = CharUnits::fromQuantity(TySize);
+      llvm::Value *Cnt = nullptr;
+      Address CntAddr = Address::invalid();
+      llvm::BasicBlock *PrecondBB = nullptr;
+      llvm::BasicBlock *ExitBB = nullptr;
+      if (NumIters > 1) {
+        CntAddr = CGF.CreateMemTemp(C.IntTy, ".cnt.addr");
+        CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.IntTy), CntAddr,
+                              /*Volatile=*/false, C.IntTy);
+        PrecondBB = CGF.createBasicBlock("precond");
+        ExitBB = CGF.createBasicBlock("exit");
+        llvm::BasicBlock *BodyBB = CGF.createBasicBlock("body");
+        // There is no need to emit line number for unconditional branch.
+        (void)ApplyDebugLocation::CreateEmpty(CGF);
+        CGF.EmitBlock(PrecondBB);
+        Cnt = CGF.EmitLoadOfScalar(CntAddr, /*Volatile=*/false, C.IntTy, Loc);
+        llvm::Value *Cmp =
+            Bld.CreateICmpULT(Cnt, llvm::ConstantInt::get(CGM.IntTy, NumIters));
+        Bld.CreateCondBr(Cmp, BodyBB, ExitBB);
+        CGF.EmitBlock(BodyBB);
+      }
+      // kmpc_barrier.
+      CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown,
+                                             /*EmitChecks=*/false,
+                                             /*ForceSimpleCall=*/true);
+      llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
+      llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
+      llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
+
+      // if (lane_id == 0)
+      llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master");
+      Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
+      CGF.EmitBlock(ThenBB);
+
+      // Reduce element = LocalReduceList[i]
+      Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
+      llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
+          ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+      // elemptr = ((CopyType*)(elemptrptr)) + I
+      Address ElemPtr = Address(ElemPtrPtr, Align);
+      ElemPtr = Bld.CreateElementBitCast(ElemPtr, CopyType);
+      if (NumIters > 1) {
+        ElemPtr = Address(Bld.CreateGEP(ElemPtr.getPointer(), Cnt),
+                          ElemPtr.getAlignment());
+      }
+
+      // Get pointer to location in transfer medium.
+      // MediumPtr = &medium[warp_id]
+      llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
+          TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID});
+      Address MediumPtr(MediumPtrVal, Align);
+      // Casting to actual data type.
+      // MediumPtr = (CopyType*)MediumPtrAddr;
+      MediumPtr = Bld.CreateElementBitCast(MediumPtr, CopyType);
+
+      // elem = *elemptr
+      //*MediumPtr = elem
+      llvm::Value *Elem =
+          CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, CType, Loc);
+      // Store the source element value to the dest element address.
+      CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/true, CType);
+
+      Bld.CreateBr(MergeBB);
+
+      CGF.EmitBlock(ElseBB);
+      Bld.CreateBr(MergeBB);
+
+      CGF.EmitBlock(MergeBB);
+
+      // kmpc_barrier.
+      CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown,
+                                             /*EmitChecks=*/false,
+                                             /*ForceSimpleCall=*/true);
+
+      //
+      // Warp 0 copies reduce element from transfer medium.
+      //
+      llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then");
+      llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else");
+      llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont");
+
+      Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg);
+      llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar(
+          AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, Loc);
+
+      // Up to 32 threads in warp 0 are active.
+      llvm::Value *IsActiveThread =
+          Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread");
+      Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
+
+      CGF.EmitBlock(W0ThenBB);
+
+      // SrcMediumPtr = &medium[tid]
+      llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
+          TransferMedium,
+          {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID});
+      Address SrcMediumPtr(SrcMediumPtrVal, Align);
+      // SrcMediumVal = *SrcMediumPtr;
+      SrcMediumPtr = Bld.CreateElementBitCast(SrcMediumPtr, CopyType);
+
+      // TargetElemPtr = (CopyType*)(SrcDataAddr[i]) + I
+      Address TargetElemPtrPtr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
+      llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar(
+          TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, Loc);
+      Address TargetElemPtr = Address(TargetElemPtrVal, Align);
+      TargetElemPtr = Bld.CreateElementBitCast(TargetElemPtr, CopyType);
+      if (NumIters > 1) {
+        TargetElemPtr = Address(Bld.CreateGEP(TargetElemPtr.getPointer(), Cnt),
+                                TargetElemPtr.getAlignment());
+      }
+
+      // *TargetElemPtr = SrcMediumVal;
+      llvm::Value *SrcMediumValue =
+          CGF.EmitLoadOfScalar(SrcMediumPtr, /*Volatile=*/true, CType, Loc);
+      CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false,
+                            CType);
+      Bld.CreateBr(W0MergeBB);
+
+      CGF.EmitBlock(W0ElseBB);
+      Bld.CreateBr(W0MergeBB);
+
+      CGF.EmitBlock(W0MergeBB);
+
+      if (NumIters > 1) {
+        Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.IntTy, /*V=*/1));
+        CGF.EmitStoreOfScalar(Cnt, CntAddr, /*Volatile=*/false, C.IntTy);
+        CGF.EmitBranch(PrecondBB);
+        (void)ApplyDebugLocation::CreateEmpty(CGF);
+        CGF.EmitBlock(ExitBB);
+      }
+      RealTySize %= TySize;
+    }
+    ++Idx;
+  }
+
+  CGF.FinishFunction();
+  return Fn;
+}
+
+/// Emit a helper that reduces data across two OpenMP threads (lanes)
+/// in the same warp.  It uses shuffle instructions to copy over data from
+/// a remote lane's stack.  The reduction algorithm performed is specified
+/// by the fourth parameter.
+///
+/// Algorithm Versions.
+/// Full Warp Reduce (argument value 0):
+///   This algorithm assumes that all 32 lanes are active and gathers
+///   data from these 32 lanes, producing a single resultant value.
+/// Contiguous Partial Warp Reduce (argument value 1):
+///   This algorithm assumes that only a *contiguous* subset of lanes
+///   are active.  This happens for the last warp in a parallel region
+///   when the user specified num_threads is not an integer multiple of
+///   32.  This contiguous subset always starts with the zeroth lane.
+/// Partial Warp Reduce (argument value 2):
+///   This algorithm gathers data from any number of lanes at any position.
+/// All reduced values are stored in the lowest possible lane.  The set
+/// of problems every algorithm addresses is a super set of those
+/// addressable by algorithms with a lower version number.  Overhead
+/// increases as algorithm version increases.
+///
+/// Terminology
+/// Reduce element:
+///   Reduce element refers to the individual data field with primitive
+///   data types to be combined and reduced across threads.
+/// Reduce list:
+///   Reduce list refers to a collection of local, thread-private
+///   reduce elements.
+/// Remote Reduce list:
+///   Remote Reduce list refers to a collection of remote (relative to
+///   the current thread) reduce elements.
+///
+/// We distinguish between three states of threads that are important to
+/// the implementation of this function.
+/// Alive threads:
+///   Threads in a warp executing the SIMT instruction, as distinguished from
+///   threads that are inactive due to divergent control flow.
+/// Active threads:
+///   The minimal set of threads that has to be alive upon entry to this
+///   function.  The computation is correct iff active threads are alive.
+///   Some threads are alive but they are not active because they do not
+///   contribute to the computation in any useful manner.  Turning them off
+///   may introduce control flow overheads without any tangible benefits.
+/// Effective threads:
+///   In order to comply with the argument requirements of the shuffle
+///   function, we must keep all lanes holding data alive.  But at most
+///   half of them perform value aggregation; we refer to this half of
+///   threads as effective. The other half is simply handing off their
+///   data.
+///
+/// Procedure
+/// Value shuffle:
+///   In this step active threads transfer data from higher lane positions
+///   in the warp to lower lane positions, creating Remote Reduce list.
+/// Value aggregation:
+///   In this step, effective threads combine their thread local Reduce list
+///   with Remote Reduce list and store the result in the thread local
+///   Reduce list.
+/// Value copy:
+///   In this step, we deal with the assumption made by algorithm 2
+///   (i.e. contiguity assumption).  When we have an odd number of lanes
+///   active, say 2k+1, only k threads will be effective and therefore k
+///   new values will be produced.  However, the Reduce list owned by the
+///   (2k+1)th thread is ignored in the value aggregation.  Therefore
+///   we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so
+///   that the contiguity assumption still holds.
+static llvm::Function *emitShuffleAndReduceFunction(
+    CodeGenModule &CGM, ArrayRef Privates,
+    QualType ReductionArrayTy, llvm::Function *ReduceFn, SourceLocation Loc) {
+  ASTContext &C = CGM.getContext();
+
+  // Thread local Reduce list used to host the values of data to be reduced.
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
+  // Current lane id; could be logical.
+  ImplicitParamDecl LaneIDArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.ShortTy,
+                              ImplicitParamDecl::Other);
+  // Offset of the remote source lane relative to the current lane.
+  ImplicitParamDecl RemoteLaneOffsetArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                        C.ShortTy, ImplicitParamDecl::Other);
+  // Algorithm version.  This is expected to be known at compile time.
+  ImplicitParamDecl AlgoVerArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                               C.ShortTy, ImplicitParamDecl::Other);
+  FunctionArgList Args;
+  Args.push_back(&ReduceListArg);
+  Args.push_back(&LaneIDArg);
+  Args.push_back(&RemoteLaneOffsetArg);
+  Args.push_back(&AlgoVerArg);
+
+  const CGFunctionInfo &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *Fn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      "_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setDoesNotRecurse();
+  if (CGM.getLangOpts().Optimize) {
+    Fn->removeFnAttr(llvm::Attribute::NoInline);
+    Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
+    Fn->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
+
+  CodeGenFunction CGF(CGM);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+  Address LocalReduceList(
+      Bld.CreatePointerBitCastOrAddrSpaceCast(
+          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+                               C.VoidPtrTy, SourceLocation()),
+          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+      CGF.getPointerAlign());
+
+  Address AddrLaneIDArg = CGF.GetAddrOfLocalVar(&LaneIDArg);
+  llvm::Value *LaneIDArgVal = CGF.EmitLoadOfScalar(
+      AddrLaneIDArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
+
+  Address AddrRemoteLaneOffsetArg = CGF.GetAddrOfLocalVar(&RemoteLaneOffsetArg);
+  llvm::Value *RemoteLaneOffsetArgVal = CGF.EmitLoadOfScalar(
+      AddrRemoteLaneOffsetArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
+
+  Address AddrAlgoVerArg = CGF.GetAddrOfLocalVar(&AlgoVerArg);
+  llvm::Value *AlgoVerArgVal = CGF.EmitLoadOfScalar(
+      AddrAlgoVerArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
+
+  // Create a local thread-private variable to host the Reduce list
+  // from a remote lane.
+  Address RemoteReduceList =
+      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_reduce_list");
+
+  // This loop iterates through the list of reduce elements and copies,
+  // element by element, from a remote lane in the warp to RemoteReduceList,
+  // hosted on the thread's stack.
+  emitReductionListCopy(RemoteLaneToThread, CGF, ReductionArrayTy, Privates,
+                        LocalReduceList, RemoteReduceList,
+                        {/*RemoteLaneOffset=*/RemoteLaneOffsetArgVal,
+                         /*ScratchpadIndex=*/nullptr,
+                         /*ScratchpadWidth=*/nullptr});
+
+  // The actions to be performed on the Remote Reduce list is dependent
+  // on the algorithm version.
+  //
+  //  if (AlgoVer==0) || (AlgoVer==1 && (LaneId < Offset)) || (AlgoVer==2 &&
+  //  LaneId % 2 == 0 && Offset > 0):
+  //    do the reduction value aggregation
+  //
+  //  The thread local variable Reduce list is mutated in place to host the
+  //  reduced data, which is the aggregated value produced from local and
+  //  remote lanes.
+  //
+  //  Note that AlgoVer is expected to be a constant integer known at compile
+  //  time.
+  //  When AlgoVer==0, the first conjunction evaluates to true, making
+  //    the entire predicate true during compile time.
+  //  When AlgoVer==1, the second conjunction has only the second part to be
+  //    evaluated during runtime.  Other conjunctions evaluates to false
+  //    during compile time.
+  //  When AlgoVer==2, the third conjunction has only the second part to be
+  //    evaluated during runtime.  Other conjunctions evaluates to false
+  //    during compile time.
+  llvm::Value *CondAlgo0 = Bld.CreateIsNull(AlgoVerArgVal);
+
+  llvm::Value *Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
+  llvm::Value *CondAlgo1 = Bld.CreateAnd(
+      Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal));
+
+  llvm::Value *Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
+  llvm::Value *CondAlgo2 = Bld.CreateAnd(
+      Algo2, Bld.CreateIsNull(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1))));
+  CondAlgo2 = Bld.CreateAnd(
+      CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0)));
+
+  llvm::Value *CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
+  CondReduce = Bld.CreateOr(CondReduce, CondAlgo2);
+
+  llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
+  llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
+  llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
+  Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
+
+  CGF.EmitBlock(ThenBB);
+  // reduce_function(LocalReduceList, RemoteReduceList)
+  llvm::Value *LocalReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+      LocalReduceList.getPointer(), CGF.VoidPtrTy);
+  llvm::Value *RemoteReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+      RemoteReduceList.getPointer(), CGF.VoidPtrTy);
+  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
+      CGF, Loc, ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
+  Bld.CreateBr(MergeBB);
+
+  CGF.EmitBlock(ElseBB);
+  Bld.CreateBr(MergeBB);
+
+  CGF.EmitBlock(MergeBB);
+
+  // if (AlgoVer==1 && (LaneId >= Offset)) copy Remote Reduce list to local
+  // Reduce list.
+  Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
+  llvm::Value *CondCopy = Bld.CreateAnd(
+      Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal));
+
+  llvm::BasicBlock *CpyThenBB = CGF.createBasicBlock("then");
+  llvm::BasicBlock *CpyElseBB = CGF.createBasicBlock("else");
+  llvm::BasicBlock *CpyMergeBB = CGF.createBasicBlock("ifcont");
+  Bld.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
+
+  CGF.EmitBlock(CpyThenBB);
+  emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates,
+                        RemoteReduceList, LocalReduceList);
+  Bld.CreateBr(CpyMergeBB);
+
+  CGF.EmitBlock(CpyElseBB);
+  Bld.CreateBr(CpyMergeBB);
+
+  CGF.EmitBlock(CpyMergeBB);
+
+  CGF.FinishFunction();
+  return Fn;
+}
+
+/// This function emits a helper that copies all the reduction variables from
+/// the team into the provided global buffer for the reduction variables.
+///
+/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data)
+///   For all data entries D in reduce_data:
+///     Copy local D to buffer.D[Idx]
+static llvm::Value *emitListToGlobalCopyFunction(
+    CodeGenModule &CGM, ArrayRef Privates,
+    QualType ReductionArrayTy, SourceLocation Loc,
+    const RecordDecl *TeamReductionRec,
+    const llvm::SmallDenseMap
+        &VarFieldMap) {
+  ASTContext &C = CGM.getContext();
+
+  // Buffer: global reduction buffer.
+  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                              C.VoidPtrTy, ImplicitParamDecl::Other);
+  // Idx: index of the buffer.
+  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
+                           ImplicitParamDecl::Other);
+  // ReduceList: thread local Reduce list.
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
+  FunctionArgList Args;
+  Args.push_back(&BufferArg);
+  Args.push_back(&IdxArg);
+  Args.push_back(&ReduceListArg);
+
+  const CGFunctionInfo &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *Fn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      "_omp_reduction_list_to_global_copy_func", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setDoesNotRecurse();
+  CodeGenFunction CGF(CGM);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
+  Address LocalReduceList(
+      Bld.CreatePointerBitCastOrAddrSpaceCast(
+          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+                               C.VoidPtrTy, Loc),
+          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+      CGF.getPointerAlign());
+  QualType StaticTy = C.getRecordType(TeamReductionRec);
+  llvm::Type *LLVMReductionsBufferTy =
+      CGM.getTypes().ConvertTypeForMem(StaticTy);
+  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
+      LLVMReductionsBufferTy->getPointerTo());
+  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
+                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
+                                              /*Volatile=*/false, C.IntTy,
+                                              Loc)};
+  unsigned Idx = 0;
+  for (const Expr *Private : Privates) {
+    // Reduce element = LocalReduceList[i]
+    Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
+    llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
+        ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+    // elemptr = ((CopyType*)(elemptrptr)) + I
+    ElemPtrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+        ElemPtrPtr, CGF.ConvertTypeForMem(Private->getType())->getPointerTo());
+    Address ElemPtr =
+        Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType()));
+    const ValueDecl *VD = cast(Private)->getDecl();
+    // Global = Buffer.VD[Idx];
+    const FieldDecl *FD = VarFieldMap.lookup(VD);
+    LValue GlobLVal = CGF.EmitLValueForField(
+        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
+    llvm::Value *BufferPtr =
+        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
+    GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment()));
+    switch (CGF.getEvaluationKind(Private->getType())) {
+    case TEK_Scalar: {
+      llvm::Value *V = CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false,
+                                            Private->getType(), Loc);
+      CGF.EmitStoreOfScalar(V, GlobLVal);
+      break;
+    }
+    case TEK_Complex: {
+      CodeGenFunction::ComplexPairTy V = CGF.EmitLoadOfComplex(
+          CGF.MakeAddrLValue(ElemPtr, Private->getType()), Loc);
+      CGF.EmitStoreOfComplex(V, GlobLVal, /*isInit=*/false);
+      break;
+    }
+    case TEK_Aggregate:
+      CGF.EmitAggregateCopy(GlobLVal,
+                            CGF.MakeAddrLValue(ElemPtr, Private->getType()),
+                            Private->getType(), AggValueSlot::DoesNotOverlap);
+      break;
+    }
+    ++Idx;
+  }
+
+  CGF.FinishFunction();
+  return Fn;
+}
+
+/// This function emits a helper that reduces all the reduction variables from
+/// the team into the provided global buffer for the reduction variables.
+///
+/// void list_to_global_reduce_func(void *buffer, int Idx, void *reduce_data)
+///  void *GlobPtrs[];
+///  GlobPtrs[0] = (void*)&buffer.D0[Idx];
+///  ...
+///  GlobPtrs[N] = (void*)&buffer.DN[Idx];
+///  reduce_function(GlobPtrs, reduce_data);
+static llvm::Value *emitListToGlobalReduceFunction(
+    CodeGenModule &CGM, ArrayRef Privates,
+    QualType ReductionArrayTy, SourceLocation Loc,
+    const RecordDecl *TeamReductionRec,
+    const llvm::SmallDenseMap
+        &VarFieldMap,
+    llvm::Function *ReduceFn) {
+  ASTContext &C = CGM.getContext();
+
+  // Buffer: global reduction buffer.
+  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                              C.VoidPtrTy, ImplicitParamDecl::Other);
+  // Idx: index of the buffer.
+  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
+                           ImplicitParamDecl::Other);
+  // ReduceList: thread local Reduce list.
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
+  FunctionArgList Args;
+  Args.push_back(&BufferArg);
+  Args.push_back(&IdxArg);
+  Args.push_back(&ReduceListArg);
+
+  const CGFunctionInfo &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *Fn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      "_omp_reduction_list_to_global_reduce_func", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setDoesNotRecurse();
+  CodeGenFunction CGF(CGM);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
+  QualType StaticTy = C.getRecordType(TeamReductionRec);
+  llvm::Type *LLVMReductionsBufferTy =
+      CGM.getTypes().ConvertTypeForMem(StaticTy);
+  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
+      LLVMReductionsBufferTy->getPointerTo());
+
+  // 1. Build a list of reduction variables.
+  // void *RedList[] = {[0], ..., [-1]};
+  Address ReductionList =
+      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
+  auto IPriv = Privates.begin();
+  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
+                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
+                                              /*Volatile=*/false, C.IntTy,
+                                              Loc)};
+  unsigned Idx = 0;
+  for (unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
+    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
+    // Global = Buffer.VD[Idx];
+    const ValueDecl *VD = cast(*IPriv)->getDecl();
+    const FieldDecl *FD = VarFieldMap.lookup(VD);
+    LValue GlobLVal = CGF.EmitLValueForField(
+        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
+    llvm::Value *BufferPtr =
+        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
+    llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr);
+    CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy);
+    if ((*IPriv)->getType()->isVariablyModifiedType()) {
+      // Store array size.
+      ++Idx;
+      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
+      llvm::Value *Size = CGF.Builder.CreateIntCast(
+          CGF.getVLASize(
+                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
+              .NumElts,
+          CGF.SizeTy, /*isSigned=*/false);
+      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
+                              Elem);
+    }
+  }
+
+  // Call reduce_function(GlobalReduceList, ReduceList)
+  llvm::Value *GlobalReduceList =
+      CGF.EmitCastToVoidPtr(ReductionList.getPointer());
+  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+  llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar(
+      AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
+  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
+      CGF, Loc, ReduceFn, {GlobalReduceList, ReducedPtr});
+  CGF.FinishFunction();
+  return Fn;
+}
+
+/// This function emits a helper that copies all the reduction variables from
+/// the team into the provided global buffer for the reduction variables.
+///
+/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data)
+///   For all data entries D in reduce_data:
+///     Copy buffer.D[Idx] to local D;
+static llvm::Value *emitGlobalToListCopyFunction(
+    CodeGenModule &CGM, ArrayRef Privates,
+    QualType ReductionArrayTy, SourceLocation Loc,
+    const RecordDecl *TeamReductionRec,
+    const llvm::SmallDenseMap
+        &VarFieldMap) {
+  ASTContext &C = CGM.getContext();
+
+  // Buffer: global reduction buffer.
+  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                              C.VoidPtrTy, ImplicitParamDecl::Other);
+  // Idx: index of the buffer.
+  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
+                           ImplicitParamDecl::Other);
+  // ReduceList: thread local Reduce list.
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
+  FunctionArgList Args;
+  Args.push_back(&BufferArg);
+  Args.push_back(&IdxArg);
+  Args.push_back(&ReduceListArg);
+
+  const CGFunctionInfo &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *Fn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      "_omp_reduction_global_to_list_copy_func", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setDoesNotRecurse();
+  CodeGenFunction CGF(CGM);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
+  Address LocalReduceList(
+      Bld.CreatePointerBitCastOrAddrSpaceCast(
+          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+                               C.VoidPtrTy, Loc),
+          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+      CGF.getPointerAlign());
+  QualType StaticTy = C.getRecordType(TeamReductionRec);
+  llvm::Type *LLVMReductionsBufferTy =
+      CGM.getTypes().ConvertTypeForMem(StaticTy);
+  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
+      LLVMReductionsBufferTy->getPointerTo());
+
+  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
+                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
+                                              /*Volatile=*/false, C.IntTy,
+                                              Loc)};
+  unsigned Idx = 0;
+  for (const Expr *Private : Privates) {
+    // Reduce element = LocalReduceList[i]
+    Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
+    llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
+        ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+    // elemptr = ((CopyType*)(elemptrptr)) + I
+    ElemPtrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+        ElemPtrPtr, CGF.ConvertTypeForMem(Private->getType())->getPointerTo());
+    Address ElemPtr =
+        Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType()));
+    const ValueDecl *VD = cast(Private)->getDecl();
+    // Global = Buffer.VD[Idx];
+    const FieldDecl *FD = VarFieldMap.lookup(VD);
+    LValue GlobLVal = CGF.EmitLValueForField(
+        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
+    llvm::Value *BufferPtr =
+        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
+    GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment()));
+    switch (CGF.getEvaluationKind(Private->getType())) {
+    case TEK_Scalar: {
+      llvm::Value *V = CGF.EmitLoadOfScalar(GlobLVal, Loc);
+      CGF.EmitStoreOfScalar(V, ElemPtr, /*Volatile=*/false, Private->getType());
+      break;
+    }
+    case TEK_Complex: {
+      CodeGenFunction::ComplexPairTy V = CGF.EmitLoadOfComplex(GlobLVal, Loc);
+      CGF.EmitStoreOfComplex(V, CGF.MakeAddrLValue(ElemPtr, Private->getType()),
+                             /*isInit=*/false);
+      break;
+    }
+    case TEK_Aggregate:
+      CGF.EmitAggregateCopy(CGF.MakeAddrLValue(ElemPtr, Private->getType()),
+                            GlobLVal, Private->getType(),
+                            AggValueSlot::DoesNotOverlap);
+      break;
+    }
+    ++Idx;
+  }
+
+  CGF.FinishFunction();
+  return Fn;
+}
+
+/// This function emits a helper that reduces all the reduction variables from
+/// the team into the provided global buffer for the reduction variables.
+///
+/// void global_to_list_reduce_func(void *buffer, int Idx, void *reduce_data)
+///  void *GlobPtrs[];
+///  GlobPtrs[0] = (void*)&buffer.D0[Idx];
+///  ...
+///  GlobPtrs[N] = (void*)&buffer.DN[Idx];
+///  reduce_function(reduce_data, GlobPtrs);
+static llvm::Value *emitGlobalToListReduceFunction(
+    CodeGenModule &CGM, ArrayRef Privates,
+    QualType ReductionArrayTy, SourceLocation Loc,
+    const RecordDecl *TeamReductionRec,
+    const llvm::SmallDenseMap
+        &VarFieldMap,
+    llvm::Function *ReduceFn) {
+  ASTContext &C = CGM.getContext();
+
+  // Buffer: global reduction buffer.
+  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                              C.VoidPtrTy, ImplicitParamDecl::Other);
+  // Idx: index of the buffer.
+  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
+                           ImplicitParamDecl::Other);
+  // ReduceList: thread local Reduce list.
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
+  FunctionArgList Args;
+  Args.push_back(&BufferArg);
+  Args.push_back(&IdxArg);
+  Args.push_back(&ReduceListArg);
+
+  const CGFunctionInfo &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *Fn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      "_omp_reduction_global_to_list_reduce_func", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setDoesNotRecurse();
+  CodeGenFunction CGF(CGM);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
+  QualType StaticTy = C.getRecordType(TeamReductionRec);
+  llvm::Type *LLVMReductionsBufferTy =
+      CGM.getTypes().ConvertTypeForMem(StaticTy);
+  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
+      LLVMReductionsBufferTy->getPointerTo());
+
+  // 1. Build a list of reduction variables.
+  // void *RedList[] = {[0], ..., [-1]};
+  Address ReductionList =
+      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
+  auto IPriv = Privates.begin();
+  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
+                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
+                                              /*Volatile=*/false, C.IntTy,
+                                              Loc)};
+  unsigned Idx = 0;
+  for (unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
+    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
+    // Global = Buffer.VD[Idx];
+    const ValueDecl *VD = cast(*IPriv)->getDecl();
+    const FieldDecl *FD = VarFieldMap.lookup(VD);
+    LValue GlobLVal = CGF.EmitLValueForField(
+        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
+    llvm::Value *BufferPtr =
+        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
+    llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr);
+    CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy);
+    if ((*IPriv)->getType()->isVariablyModifiedType()) {
+      // Store array size.
+      ++Idx;
+      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
+      llvm::Value *Size = CGF.Builder.CreateIntCast(
+          CGF.getVLASize(
+                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
+              .NumElts,
+          CGF.SizeTy, /*isSigned=*/false);
+      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
+                              Elem);
+    }
+  }
+
+  // Call reduce_function(ReduceList, GlobalReduceList)
+  llvm::Value *GlobalReduceList =
+      CGF.EmitCastToVoidPtr(ReductionList.getPointer());
+  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+  llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar(
+      AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
+  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
+      CGF, Loc, ReduceFn, {ReducedPtr, GlobalReduceList});
+  CGF.FinishFunction();
+  return Fn;
+}
+
+///
+/// Design of OpenMP reductions on the GPU
+///
+/// Consider a typical OpenMP program with one or more reduction
+/// clauses:
+///
+/// float foo;
+/// double bar;
+/// #pragma omp target teams distribute parallel for \
+///             reduction(+:foo) reduction(*:bar)
+/// for (int i = 0; i < N; i++) {
+///   foo += A[i]; bar *= B[i];
+/// }
+///
+/// where 'foo' and 'bar' are reduced across all OpenMP threads in
+/// all teams.  In our OpenMP implementation on the NVPTX device an
+/// OpenMP team is mapped to a CUDA threadblock and OpenMP threads
+/// within a team are mapped to CUDA threads within a threadblock.
+/// Our goal is to efficiently aggregate values across all OpenMP
+/// threads such that:
+///
+///   - the compiler and runtime are logically concise, and
+///   - the reduction is performed efficiently in a hierarchical
+///     manner as follows: within OpenMP threads in the same warp,
+///     across warps in a threadblock, and finally across teams on
+///     the NVPTX device.
+///
+/// Introduction to Decoupling
+///
+/// We would like to decouple the compiler and the runtime so that the
+/// latter is ignorant of the reduction variables (number, data types)
+/// and the reduction operators.  This allows a simpler interface
+/// and implementation while still attaining good performance.
+///
+/// Pseudocode for the aforementioned OpenMP program generated by the
+/// compiler is as follows:
+///
+/// 1. Create private copies of reduction variables on each OpenMP
+///    thread: 'foo_private', 'bar_private'
+/// 2. Each OpenMP thread reduces the chunk of 'A' and 'B' assigned
+///    to it and writes the result in 'foo_private' and 'bar_private'
+///    respectively.
+/// 3. Call the OpenMP runtime on the GPU to reduce within a team
+///    and store the result on the team master:
+///
+///     __kmpc_nvptx_parallel_reduce_nowait_v2(...,
+///        reduceData, shuffleReduceFn, interWarpCpyFn)
+///
+///     where:
+///       struct ReduceData {
+///         double *foo;
+///         double *bar;
+///       } reduceData
+///       reduceData.foo = &foo_private
+///       reduceData.bar = &bar_private
+///
+///     'shuffleReduceFn' and 'interWarpCpyFn' are pointers to two
+///     auxiliary functions generated by the compiler that operate on
+///     variables of type 'ReduceData'.  They aid the runtime perform
+///     algorithmic steps in a data agnostic manner.
+///
+///     'shuffleReduceFn' is a pointer to a function that reduces data
+///     of type 'ReduceData' across two OpenMP threads (lanes) in the
+///     same warp.  It takes the following arguments as input:
+///
+///     a. variable of type 'ReduceData' on the calling lane,
+///     b. its lane_id,
+///     c. an offset relative to the current lane_id to generate a
+///        remote_lane_id.  The remote lane contains the second
+///        variable of type 'ReduceData' that is to be reduced.
+///     d. an algorithm version parameter determining which reduction
+///        algorithm to use.
+///
+///     'shuffleReduceFn' retrieves data from the remote lane using
+///     efficient GPU shuffle intrinsics and reduces, using the
+///     algorithm specified by the 4th parameter, the two operands
+///     element-wise.  The result is written to the first operand.
+///
+///     Different reduction algorithms are implemented in different
+///     runtime functions, all calling 'shuffleReduceFn' to perform
+///     the essential reduction step.  Therefore, based on the 4th
+///     parameter, this function behaves slightly differently to
+///     cooperate with the runtime to ensure correctness under
+///     different circumstances.
+///
+///     'InterWarpCpyFn' is a pointer to a function that transfers
+///     reduced variables across warps.  It tunnels, through CUDA
+///     shared memory, the thread-private data of type 'ReduceData'
+///     from lane 0 of each warp to a lane in the first warp.
+/// 4. Call the OpenMP runtime on the GPU to reduce across teams.
+///    The last team writes the global reduced value to memory.
+///
+///     ret = __kmpc_nvptx_teams_reduce_nowait(...,
+///             reduceData, shuffleReduceFn, interWarpCpyFn,
+///             scratchpadCopyFn, loadAndReduceFn)
+///
+///     'scratchpadCopyFn' is a helper that stores reduced
+///     data from the team master to a scratchpad array in
+///     global memory.
+///
+///     'loadAndReduceFn' is a helper that loads data from
+///     the scratchpad array and reduces it with the input
+///     operand.
+///
+///     These compiler generated functions hide address
+///     calculation and alignment information from the runtime.
+/// 5. if ret == 1:
+///     The team master of the last team stores the reduced
+///     result to the globals in memory.
+///     foo += reduceData.foo; bar *= reduceData.bar
+///
+///
+/// Warp Reduction Algorithms
+///
+/// On the warp level, we have three algorithms implemented in the
+/// OpenMP runtime depending on the number of active lanes:
+///
+/// Full Warp Reduction
+///
+/// The reduce algorithm within a warp where all lanes are active
+/// is implemented in the runtime as follows:
+///
+/// full_warp_reduce(void *reduce_data,
+///                  kmp_ShuffleReductFctPtr ShuffleReduceFn) {
+///   for (int offset = WARPSIZE/2; offset > 0; offset /= 2)
+///     ShuffleReduceFn(reduce_data, 0, offset, 0);
+/// }
+///
+/// The algorithm completes in log(2, WARPSIZE) steps.
+///
+/// 'ShuffleReduceFn' is used here with lane_id set to 0 because it is
+/// not used therefore we save instructions by not retrieving lane_id
+/// from the corresponding special registers.  The 4th parameter, which
+/// represents the version of the algorithm being used, is set to 0 to
+/// signify full warp reduction.
+///
+/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
+///
+/// #reduce_elem refers to an element in the local lane's data structure
+/// #remote_elem is retrieved from a remote lane
+/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
+/// reduce_elem = reduce_elem REDUCE_OP remote_elem;
+///
+/// Contiguous Partial Warp Reduction
+///
+/// This reduce algorithm is used within a warp where only the first
+/// 'n' (n <= WARPSIZE) lanes are active.  It is typically used when the
+/// number of OpenMP threads in a parallel region is not a multiple of
+/// WARPSIZE.  The algorithm is implemented in the runtime as follows:
+///
+/// void
+/// contiguous_partial_reduce(void *reduce_data,
+///                           kmp_ShuffleReductFctPtr ShuffleReduceFn,
+///                           int size, int lane_id) {
+///   int curr_size;
+///   int offset;
+///   curr_size = size;
+///   mask = curr_size/2;
+///   while (offset>0) {
+///     ShuffleReduceFn(reduce_data, lane_id, offset, 1);
+///     curr_size = (curr_size+1)/2;
+///     offset = curr_size/2;
+///   }
+/// }
+///
+/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
+///
+/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
+/// if (lane_id < offset)
+///     reduce_elem = reduce_elem REDUCE_OP remote_elem
+/// else
+///     reduce_elem = remote_elem
+///
+/// This algorithm assumes that the data to be reduced are located in a
+/// contiguous subset of lanes starting from the first.  When there is
+/// an odd number of active lanes, the data in the last lane is not
+/// aggregated with any other lane's dat but is instead copied over.
+///
+/// Dispersed Partial Warp Reduction
+///
+/// This algorithm is used within a warp when any discontiguous subset of
+/// lanes are active.  It is used to implement the reduction operation
+/// across lanes in an OpenMP simd region or in a nested parallel region.
+///
+/// void
+/// dispersed_partial_reduce(void *reduce_data,
+///                          kmp_ShuffleReductFctPtr ShuffleReduceFn) {
+///   int size, remote_id;
+///   int logical_lane_id = number_of_active_lanes_before_me() * 2;
+///   do {
+///       remote_id = next_active_lane_id_right_after_me();
+///       # the above function returns 0 of no active lane
+///       # is present right after the current lane.
+///       size = number_of_active_lanes_in_this_warp();
+///       logical_lane_id /= 2;
+///       ShuffleReduceFn(reduce_data, logical_lane_id,
+///                       remote_id-1-threadIdx.x, 2);
+///   } while (logical_lane_id % 2 == 0 && size > 1);
+/// }
+///
+/// There is no assumption made about the initial state of the reduction.
+/// Any number of lanes (>=1) could be active at any position.  The reduction
+/// result is returned in the first active lane.
+///
+/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
+///
+/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
+/// if (lane_id % 2 == 0 && offset > 0)
+///     reduce_elem = reduce_elem REDUCE_OP remote_elem
+/// else
+///     reduce_elem = remote_elem
+///
+///
+/// Intra-Team Reduction
+///
+/// This function, as implemented in the runtime call
+/// '__kmpc_nvptx_parallel_reduce_nowait_v2', aggregates data across OpenMP
+/// threads in a team.  It first reduces within a warp using the
+/// aforementioned algorithms.  We then proceed to gather all such
+/// reduced values at the first warp.
+///
+/// The runtime makes use of the function 'InterWarpCpyFn', which copies
+/// data from each of the "warp master" (zeroth lane of each warp, where
+/// warp-reduced data is held) to the zeroth warp.  This step reduces (in
+/// a mathematical sense) the problem of reduction across warp masters in
+/// a block to the problem of warp reduction.
+///
+///
+/// Inter-Team Reduction
+///
+/// Once a team has reduced its data to a single value, it is stored in
+/// a global scratchpad array.  Since each team has a distinct slot, this
+/// can be done without locking.
+///
+/// The last team to write to the scratchpad array proceeds to reduce the
+/// scratchpad array.  One or more workers in the last team use the helper
+/// 'loadAndReduceDataFn' to load and reduce values from the array, i.e.,
+/// the k'th worker reduces every k'th element.
+///
+/// Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait_v2' to
+/// reduce across workers and compute a globally reduced value.
+///
+void CGOpenMPRuntimeGPU::emitReduction(
+    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+    ArrayRef LHSExprs, ArrayRef RHSExprs,
+    ArrayRef ReductionOps, ReductionOptionsTy Options) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind);
+#ifndef NDEBUG
+  bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind);
+#endif
+
+  if (Options.SimpleReduction) {
+    assert(!TeamsReduction && !ParallelReduction &&
+           "Invalid reduction selection in emitReduction.");
+    CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
+                                   ReductionOps, Options);
+    return;
+  }
+
+  assert((TeamsReduction || ParallelReduction) &&
+         "Invalid reduction selection in emitReduction.");
+
+  // Build res = __kmpc_reduce{_nowait}(, , sizeof(RedList),
+  // RedList, shuffle_reduce_func, interwarp_copy_func);
+  // or
+  // Build res = __kmpc_reduce_teams_nowait_simple(, , );
+  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+
+  llvm::Value *Res;
+  ASTContext &C = CGM.getContext();
+  // 1. Build a list of reduction variables.
+  // void *RedList[] = {[0], ..., [-1]};
+  auto Size = RHSExprs.size();
+  for (const Expr *E : Privates) {
+    if (E->getType()->isVariablyModifiedType())
+      // Reserve place for array size.
+      ++Size;
+  }
+  llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
+  QualType ReductionArrayTy =
+      C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
+                             /*IndexTypeQuals=*/0);
+  Address ReductionList =
+      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
+  auto IPriv = Privates.begin();
+  unsigned Idx = 0;
+  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
+    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
+    CGF.Builder.CreateStore(
+        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+            CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
+        Elem);
+    if ((*IPriv)->getType()->isVariablyModifiedType()) {
+      // Store array size.
+      ++Idx;
+      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
+      llvm::Value *Size = CGF.Builder.CreateIntCast(
+          CGF.getVLASize(
+                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
+              .NumElts,
+          CGF.SizeTy, /*isSigned=*/false);
+      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
+                              Elem);
+    }
+  }
+
+  llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      ReductionList.getPointer(), CGF.VoidPtrTy);
+  llvm::Function *ReductionFn = emitReductionFunction(
+      Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
+      LHSExprs, RHSExprs, ReductionOps);
+  llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
+  llvm::Function *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
+      CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
+  llvm::Value *InterWarpCopyFn =
+      emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc);
+
+  if (ParallelReduction) {
+    llvm::Value *Args[] = {RTLoc,
+                           ThreadId,
+                           CGF.Builder.getInt32(RHSExprs.size()),
+                           ReductionArrayTySize,
+                           RL,
+                           ShuffleAndReduceFn,
+                           InterWarpCopyFn};
+
+    Res = CGF.EmitRuntimeCall(
+        createNVPTXRuntimeFunction(
+            OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2),
+        Args);
+  } else {
+    assert(TeamsReduction && "expected teams reduction.");
+    llvm::SmallDenseMap VarFieldMap;
+    llvm::SmallVector PrivatesReductions(Privates.size());
+    int Cnt = 0;
+    for (const Expr *DRE : Privates) {
+      PrivatesReductions[Cnt] = cast(DRE)->getDecl();
+      ++Cnt;
+    }
+    const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars(
+        CGM.getContext(), PrivatesReductions, llvm::None, VarFieldMap,
+        C.getLangOpts().OpenMPCUDAReductionBufNum);
+    TeamsReductions.push_back(TeamReductionRec);
+    if (!KernelTeamsReductionPtr) {
+      KernelTeamsReductionPtr = new llvm::GlobalVariable(
+          CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/true,
+          llvm::GlobalValue::InternalLinkage, nullptr,
+          "_openmp_teams_reductions_buffer_$_$ptr");
+    }
+    llvm::Value *GlobalBufferPtr = CGF.EmitLoadOfScalar(
+        Address(KernelTeamsReductionPtr, CGM.getPointerAlign()),
+        /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
+    llvm::Value *GlobalToBufferCpyFn = ::emitListToGlobalCopyFunction(
+        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
+    llvm::Value *GlobalToBufferRedFn = ::emitListToGlobalReduceFunction(
+        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
+        ReductionFn);
+    llvm::Value *BufferToGlobalCpyFn = ::emitGlobalToListCopyFunction(
+        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
+    llvm::Value *BufferToGlobalRedFn = ::emitGlobalToListReduceFunction(
+        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
+        ReductionFn);
+
+    llvm::Value *Args[] = {
+        RTLoc,
+        ThreadId,
+        GlobalBufferPtr,
+        CGF.Builder.getInt32(C.getLangOpts().OpenMPCUDAReductionBufNum),
+        RL,
+        ShuffleAndReduceFn,
+        InterWarpCopyFn,
+        GlobalToBufferCpyFn,
+        GlobalToBufferRedFn,
+        BufferToGlobalCpyFn,
+        BufferToGlobalRedFn};
+
+    Res = CGF.EmitRuntimeCall(
+        createNVPTXRuntimeFunction(
+            OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2),
+        Args);
+  }
+
+  // 5. Build if (res == 1)
+  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.reduction.done");
+  llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.then");
+  llvm::Value *Cond = CGF.Builder.CreateICmpEQ(
+      Res, llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1));
+  CGF.Builder.CreateCondBr(Cond, ThenBB, ExitBB);
+
+  // 6. Build then branch: where we have reduced values in the master
+  //    thread in each team.
+  //    __kmpc_end_reduce{_nowait}();
+  //    break;
+  CGF.EmitBlock(ThenBB);
+
+  // Add emission of __kmpc_end_reduce{_nowait}();
+  auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps,
+                    this](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    auto IPriv = Privates.begin();
+    auto ILHS = LHSExprs.begin();
+    auto IRHS = RHSExprs.begin();
+    for (const Expr *E : ReductionOps) {
+      emitSingleReductionCombiner(CGF, E, *IPriv, cast(*ILHS),
+                                  cast(*IRHS));
+      ++IPriv;
+      ++ILHS;
+      ++IRHS;
+    }
+  };
+  llvm::Value *EndArgs[] = {ThreadId};
+  RegionCodeGenTy RCG(CodeGen);
+  NVPTXActionTy Action(
+      nullptr, llvm::None,
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait),
+      EndArgs);
+  RCG.setAction(Action);
+  RCG(CGF);
+  // There is no need to emit line number for unconditional branch.
+  (void)ApplyDebugLocation::CreateEmpty(CGF);
+  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+}
+
+const VarDecl *
+CGOpenMPRuntimeGPU::translateParameter(const FieldDecl *FD,
+                                         const VarDecl *NativeParam) const {
+  if (!NativeParam->getType()->isReferenceType())
+    return NativeParam;
+  QualType ArgType = NativeParam->getType();
+  QualifierCollector QC;
+  const Type *NonQualTy = QC.strip(ArgType);
+  QualType PointeeTy = cast(NonQualTy)->getPointeeType();
+  if (const auto *Attr = FD->getAttr()) {
+    if (Attr->getCaptureKind() == OMPC_map) {
+      PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy,
+                                                        LangAS::opencl_global);
+    } else if (Attr->getCaptureKind() == OMPC_firstprivate &&
+               PointeeTy.isConstant(CGM.getContext())) {
+      PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy,
+                                                        LangAS::opencl_generic);
+    }
+  }
+  ArgType = CGM.getContext().getPointerType(PointeeTy);
+  QC.addRestrict();
+  enum { NVPTX_local_addr = 5 };
+  QC.addAddressSpace(getLangASFromTargetAS(NVPTX_local_addr));
+  ArgType = QC.apply(CGM.getContext(), ArgType);
+  if (isa(NativeParam))
+    return ImplicitParamDecl::Create(
+        CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(),
+        NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other);
+  return ParmVarDecl::Create(
+      CGM.getContext(),
+      const_cast(NativeParam->getDeclContext()),
+      NativeParam->getBeginLoc(), NativeParam->getLocation(),
+      NativeParam->getIdentifier(), ArgType,
+      /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
+}
+
+Address
+CGOpenMPRuntimeGPU::getParameterAddress(CodeGenFunction &CGF,
+                                          const VarDecl *NativeParam,
+                                          const VarDecl *TargetParam) const {
+  assert(NativeParam != TargetParam &&
+         NativeParam->getType()->isReferenceType() &&
+         "Native arg must not be the same as target arg.");
+  Address LocalAddr = CGF.GetAddrOfLocalVar(TargetParam);
+  QualType NativeParamType = NativeParam->getType();
+  QualifierCollector QC;
+  const Type *NonQualTy = QC.strip(NativeParamType);
+  QualType NativePointeeTy = cast(NonQualTy)->getPointeeType();
+  unsigned NativePointeeAddrSpace =
+      CGF.getContext().getTargetAddressSpace(NativePointeeTy);
+  QualType TargetTy = TargetParam->getType();
+  llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(
+      LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation());
+  // First cast to generic.
+  TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
+                      /*AddrSpace=*/0));
+  // Cast from generic to native address space.
+  TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
+                      NativePointeeAddrSpace));
+  Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType);
+  CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, /*Volatile=*/false,
+                        NativeParamType);
+  return NativeParamAddr;
+}
+
+void CGOpenMPRuntimeGPU::emitOutlinedFunctionCall(
+    CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
+    ArrayRef Args) const {
+  SmallVector TargetArgs;
+  TargetArgs.reserve(Args.size());
+  auto *FnType = OutlinedFn.getFunctionType();
+  for (unsigned I = 0, E = Args.size(); I < E; ++I) {
+    if (FnType->isVarArg() && FnType->getNumParams() <= I) {
+      TargetArgs.append(std::next(Args.begin(), I), Args.end());
+      break;
+    }
+    llvm::Type *TargetType = FnType->getParamType(I);
+    llvm::Value *NativeArg = Args[I];
+    if (!TargetType->isPointerTy()) {
+      TargetArgs.emplace_back(NativeArg);
+      continue;
+    }
+    llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+        NativeArg,
+        NativeArg->getType()->getPointerElementType()->getPointerTo());
+    TargetArgs.emplace_back(
+        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType));
+  }
+  CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs);
+}
+
+/// Emit function which wraps the outline parallel region
+/// and controls the arguments which are passed to this function.
+/// The wrapper ensures that the outlined function is called
+/// with the correct arguments when data is shared.
+llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
+    llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D) {
+  ASTContext &Ctx = CGM.getContext();
+  const auto &CS = *D.getCapturedStmt(OMPD_parallel);
+
+  // Create a function that takes as argument the source thread.
+  FunctionArgList WrapperArgs;
+  QualType Int16QTy =
+      Ctx.getIntTypeForBitwidth(/*DestWidth=*/16, /*Signed=*/false);
+  QualType Int32QTy =
+      Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false);
+  ImplicitParamDecl ParallelLevelArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(),
+                                     /*Id=*/nullptr, Int16QTy,
+                                     ImplicitParamDecl::Other);
+  ImplicitParamDecl WrapperArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(),
+                               /*Id=*/nullptr, Int32QTy,
+                               ImplicitParamDecl::Other);
+  WrapperArgs.emplace_back(&ParallelLevelArg);
+  WrapperArgs.emplace_back(&WrapperArg);
+
+  const CGFunctionInfo &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, WrapperArgs);
+
+  auto *Fn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      Twine(OutlinedParallelFn->getName(), "_wrapper"), &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
+  Fn->setDoesNotRecurse();
+
+  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
+  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs,
+                    D.getBeginLoc(), D.getBeginLoc());
+
+  const auto *RD = CS.getCapturedRecordDecl();
+  auto CurField = RD->field_begin();
+
+  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                                      /*Name=*/".zero.addr");
+  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+  // Get the array of arguments.
+  SmallVector Args;
+
+  Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).getPointer());
+  Args.emplace_back(ZeroAddr.getPointer());
+
+  CGBuilderTy &Bld = CGF.Builder;
+  auto CI = CS.capture_begin();
+
+  // Use global memory for data sharing.
+  // Handle passing of global args to workers.
+  Address GlobalArgs =
+      CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args");
+  llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer();
+  llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables),
+      DataSharingArgs);
+
+  // Retrieve the shared variables from the list of references returned
+  // by the runtime. Pass the variables to the outlined function.
+  Address SharedArgListAddress = Address::invalid();
+  if (CS.capture_size() > 0 ||
+      isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) {
+    SharedArgListAddress = CGF.EmitLoadOfPointer(
+        GlobalArgs, CGF.getContext()
+                        .getPointerType(CGF.getContext().getPointerType(
+                            CGF.getContext().VoidPtrTy))
+                        .castAs());
+  }
+  unsigned Idx = 0;
+  if (isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) {
+    Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx);
+    Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
+        Src, CGF.SizeTy->getPointerTo());
+    llvm::Value *LB = CGF.EmitLoadOfScalar(
+        TypedAddress,
+        /*Volatile=*/false,
+        CGF.getContext().getPointerType(CGF.getContext().getSizeType()),
+        cast(D).getLowerBoundVariable()->getExprLoc());
+    Args.emplace_back(LB);
+    ++Idx;
+    Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx);
+    TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
+        Src, CGF.SizeTy->getPointerTo());
+    llvm::Value *UB = CGF.EmitLoadOfScalar(
+        TypedAddress,
+        /*Volatile=*/false,
+        CGF.getContext().getPointerType(CGF.getContext().getSizeType()),
+        cast(D).getUpperBoundVariable()->getExprLoc());
+    Args.emplace_back(UB);
+    ++Idx;
+  }
+  if (CS.capture_size() > 0) {
+    ASTContext &CGFContext = CGF.getContext();
+    for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) {
+      QualType ElemTy = CurField->getType();
+      Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx);
+      Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
+          Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy)));
+      llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress,
+                                              /*Volatile=*/false,
+                                              CGFContext.getPointerType(ElemTy),
+                                              CI->getLocation());
+      if (CI->capturesVariableByCopy() &&
+          !CI->getCapturedVar()->getType()->isAnyPointerType()) {
+        Arg = castValueToType(CGF, Arg, ElemTy, CGFContext.getUIntPtrType(),
+                              CI->getLocation());
+      }
+      Args.emplace_back(Arg);
+    }
+  }
+
+  emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedParallelFn, Args);
+  CGF.FinishFunction();
+  return Fn;
+}
+
+void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
+                                              const Decl *D) {
+  if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
+    return;
+
+  assert(D && "Expected function or captured|block decl.");
+  assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 &&
+         "Function is registered already.");
+  assert((!TeamAndReductions.first || TeamAndReductions.first == D) &&
+         "Team is set but not processed.");
+  const Stmt *Body = nullptr;
+  bool NeedToDelayGlobalization = false;
+  if (const auto *FD = dyn_cast(D)) {
+    Body = FD->getBody();
+  } else if (const auto *BD = dyn_cast(D)) {
+    Body = BD->getBody();
+  } else if (const auto *CD = dyn_cast(D)) {
+    Body = CD->getBody();
+    NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP;
+    if (NeedToDelayGlobalization &&
+        getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD)
+      return;
+  }
+  if (!Body)
+    return;
+  CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
+  VarChecker.Visit(Body);
+  const RecordDecl *GlobalizedVarsRecord =
+      VarChecker.getGlobalizedRecord(IsInTTDRegion);
+  TeamAndReductions.first = nullptr;
+  TeamAndReductions.second.clear();
+  ArrayRef EscapedVariableLengthDecls =
+      VarChecker.getEscapedVariableLengthDecls();
+  if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty())
+    return;
+  auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
+  I->getSecond().MappedParams =
+      std::make_unique();
+  I->getSecond().GlobalRecord = GlobalizedVarsRecord;
+  I->getSecond().EscapedParameters.insert(
+      VarChecker.getEscapedParameters().begin(),
+      VarChecker.getEscapedParameters().end());
+  I->getSecond().EscapedVariableLengthDecls.append(
+      EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());
+  DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
+  for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
+    assert(VD->isCanonicalDecl() && "Expected canonical declaration");
+    const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
+    Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion)));
+  }
+  if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {
+    CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None);
+    VarChecker.Visit(Body);
+    I->getSecond().SecondaryGlobalRecord =
+        VarChecker.getGlobalizedRecord(/*IsInTTDRegion=*/true);
+    I->getSecond().SecondaryLocalVarData.emplace();
+    DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue();
+    for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
+      assert(VD->isCanonicalDecl() && "Expected canonical declaration");
+      const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
+      Data.insert(
+          std::make_pair(VD, MappedVarData(FD, /*IsInTTDRegion=*/true)));
+    }
+  }
+  if (!NeedToDelayGlobalization) {
+    emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
+    struct GlobalizationScope final : EHScopeStack::Cleanup {
+      GlobalizationScope() = default;
+
+      void Emit(CodeGenFunction &CGF, Flags flags) override {
+        static_cast(CGF.CGM.getOpenMPRuntime())
+            .emitGenericVarsEpilog(CGF, /*WithSPMDCheck=*/true);
+      }
+    };
+    CGF.EHStack.pushCleanup(NormalAndEHCleanup);
+  }
+}
+
+Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF,
+                                                        const VarDecl *VD) {
+  if (VD && VD->hasAttr()) {
+    const auto *A = VD->getAttr();
+    auto AS = LangAS::Default;
+    switch (A->getAllocatorType()) {
+      // Use the default allocator here as by default local vars are
+      // threadlocal.
+    case OMPAllocateDeclAttr::OMPNullMemAlloc:
+    case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
+    case OMPAllocateDeclAttr::OMPThreadMemAlloc:
+    case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
+    case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
+      // Follow the user decision - use default allocation.
+      return Address::invalid();
+    case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
+      // TODO: implement aupport for user-defined allocators.
+      return Address::invalid();
+    case OMPAllocateDeclAttr::OMPConstMemAlloc:
+      AS = LangAS::cuda_constant;
+      break;
+    case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
+      AS = LangAS::cuda_shared;
+      break;
+    case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
+    case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
+      break;
+    }
+    llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
+    auto *GV = new llvm::GlobalVariable(
+        CGM.getModule(), VarTy, /*isConstant=*/false,
+        llvm::GlobalValue::InternalLinkage, llvm::Constant::getNullValue(VarTy),
+        VD->getName(),
+        /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
+        CGM.getContext().getTargetAddressSpace(AS));
+    CharUnits Align = CGM.getContext().getDeclAlign(VD);
+    GV->setAlignment(Align.getAsAlign());
+    return Address(
+        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+            GV, VarTy->getPointerTo(CGM.getContext().getTargetAddressSpace(
+                    VD->getType().getAddressSpace()))),
+        Align);
+  }
+
+  if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
+    return Address::invalid();
+
+  VD = VD->getCanonicalDecl();
+  auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
+  if (I == FunctionGlobalizedDecls.end())
+    return Address::invalid();
+  auto VDI = I->getSecond().LocalVarData.find(VD);
+  if (VDI != I->getSecond().LocalVarData.end())
+    return VDI->second.PrivateAddr;
+  if (VD->hasAttrs()) {
+    for (specific_attr_iterator IT(VD->attr_begin()),
+         E(VD->attr_end());
+         IT != E; ++IT) {
+      auto VDI = I->getSecond().LocalVarData.find(
+          cast(cast(IT->getRef())->getDecl())
+              ->getCanonicalDecl());
+      if (VDI != I->getSecond().LocalVarData.end())
+        return VDI->second.PrivateAddr;
+    }
+  }
+
+  return Address::invalid();
+}
+
+void CGOpenMPRuntimeGPU::functionFinished(CodeGenFunction &CGF) {
+  FunctionGlobalizedDecls.erase(CGF.CurFn);
+  CGOpenMPRuntime::functionFinished(CGF);
+}
+
+void CGOpenMPRuntimeGPU::getDefaultDistScheduleAndChunk(
+    CodeGenFunction &CGF, const OMPLoopDirective &S,
+    OpenMPDistScheduleClauseKind &ScheduleKind,
+    llvm::Value *&Chunk) const {
+  if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD) {
+    ScheduleKind = OMPC_DIST_SCHEDULE_static;
+    Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF),
+        CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+        S.getIterationVariable()->getType(), S.getBeginLoc());
+    return;
+  }
+  CGOpenMPRuntime::getDefaultDistScheduleAndChunk(
+      CGF, S, ScheduleKind, Chunk);
+}
+
+void CGOpenMPRuntimeGPU::getDefaultScheduleAndChunk(
+    CodeGenFunction &CGF, const OMPLoopDirective &S,
+    OpenMPScheduleClauseKind &ScheduleKind,
+    const Expr *&ChunkExpr) const {
+  ScheduleKind = OMPC_SCHEDULE_static;
+  // Chunk size is 1 in this case.
+  llvm::APInt ChunkSize(32, 1);
+  ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize,
+      CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+      SourceLocation());
+}
+
+void CGOpenMPRuntimeGPU::adjustTargetSpecificDataForLambdas(
+    CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
+  assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
+         " Expected target-based directive.");
+  const CapturedStmt *CS = D.getCapturedStmt(OMPD_target);
+  for (const CapturedStmt::Capture &C : CS->captures()) {
+    // Capture variables captured by reference in lambdas for target-based
+    // directives.
+    if (!C.capturesVariable())
+      continue;
+    const VarDecl *VD = C.getCapturedVar();
+    const auto *RD = VD->getType()
+                         .getCanonicalType()
+                         .getNonReferenceType()
+                         ->getAsCXXRecordDecl();
+    if (!RD || !RD->isLambda())
+      continue;
+    Address VDAddr = CGF.GetAddrOfLocalVar(VD);
+    LValue VDLVal;
+    if (VD->getType().getCanonicalType()->isReferenceType())
+      VDLVal = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType());
+    else
+      VDLVal = CGF.MakeAddrLValue(
+          VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
+    llvm::DenseMap Captures;
+    FieldDecl *ThisCapture = nullptr;
+    RD->getCaptureFields(Captures, ThisCapture);
+    if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) {
+      LValue ThisLVal =
+          CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
+      llvm::Value *CXXThis = CGF.LoadCXXThis();
+      CGF.EmitStoreOfScalar(CXXThis, ThisLVal);
+    }
+    for (const LambdaCapture &LC : RD->captures()) {
+      if (LC.getCaptureKind() != LCK_ByRef)
+        continue;
+      const VarDecl *VD = LC.getCapturedVar();
+      if (!CS->capturesVariable(VD))
+        continue;
+      auto It = Captures.find(VD);
+      assert(It != Captures.end() && "Found lambda capture without field.");
+      LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
+      Address VDAddr = CGF.GetAddrOfLocalVar(VD);
+      if (VD->getType().getCanonicalType()->isReferenceType())
+        VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr,
+                                               VD->getType().getCanonicalType())
+                     .getAddress(CGF);
+      CGF.EmitStoreOfScalar(VDAddr.getPointer(), VarLVal);
+    }
+  }
+}
+
+unsigned CGOpenMPRuntimeGPU::getDefaultFirstprivateAddressSpace() const {
+  return CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant);
+}
+
+bool CGOpenMPRuntimeGPU::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
+                                                            LangAS &AS) {
+  if (!VD || !VD->hasAttr())
+    return false;
+  const auto *A = VD->getAttr();
+  switch(A->getAllocatorType()) {
+  case OMPAllocateDeclAttr::OMPNullMemAlloc:
+  case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
+  // Not supported, fallback to the default mem space.
+  case OMPAllocateDeclAttr::OMPThreadMemAlloc:
+  case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
+  case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
+  case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
+  case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
+    AS = LangAS::Default;
+    return true;
+  case OMPAllocateDeclAttr::OMPConstMemAlloc:
+    AS = LangAS::cuda_constant;
+    return true;
+  case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
+    AS = LangAS::cuda_shared;
+    return true;
+  case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
+    llvm_unreachable("Expected predefined allocator for the variables with the "
+                     "static storage.");
+  }
+  return false;
+}
+
+// Get current CudaArch and ignore any unknown values
+static CudaArch getCudaArch(CodeGenModule &CGM) {
+  if (!CGM.getTarget().hasFeature("ptx"))
+    return CudaArch::UNKNOWN;
+  llvm::StringMap Features;
+  CGM.getTarget().initFeatureMap(Features, CGM.getDiags(),
+                                 CGM.getTarget().getTargetOpts().CPU,
+                                 CGM.getTarget().getTargetOpts().Features);
+  for (const auto &Feature : Features) {
+    if (Feature.getValue()) {
+      CudaArch Arch = StringToCudaArch(Feature.getKey());
+      if (Arch != CudaArch::UNKNOWN)
+        return Arch;
+    }
+  }
+  return CudaArch::UNKNOWN;
+}
+
+/// Check to see if target architecture supports unified addressing which is
+/// a restriction for OpenMP requires clause "unified_shared_memory".
+void CGOpenMPRuntimeGPU::processRequiresDirective(
+    const OMPRequiresDecl *D) {
+  for (const OMPClause *Clause : D->clauselists()) {
+    if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
+      CudaArch Arch = getCudaArch(CGM);
+      switch (Arch) {
+      case CudaArch::SM_20:
+      case CudaArch::SM_21:
+      case CudaArch::SM_30:
+      case CudaArch::SM_32:
+      case CudaArch::SM_35:
+      case CudaArch::SM_37:
+      case CudaArch::SM_50:
+      case CudaArch::SM_52:
+      case CudaArch::SM_53:
+      case CudaArch::SM_60:
+      case CudaArch::SM_61:
+      case CudaArch::SM_62: {
+        SmallString<256> Buffer;
+        llvm::raw_svector_ostream Out(Buffer);
+        Out << "Target architecture " << CudaArchToString(Arch)
+            << " does not support unified addressing";
+        CGM.Error(Clause->getBeginLoc(), Out.str());
+        return;
+      }
+      case CudaArch::SM_70:
+      case CudaArch::SM_72:
+      case CudaArch::SM_75:
+      case CudaArch::SM_80:
+      case CudaArch::GFX600:
+      case CudaArch::GFX601:
+      case CudaArch::GFX700:
+      case CudaArch::GFX701:
+      case CudaArch::GFX702:
+      case CudaArch::GFX703:
+      case CudaArch::GFX704:
+      case CudaArch::GFX801:
+      case CudaArch::GFX802:
+      case CudaArch::GFX803:
+      case CudaArch::GFX810:
+      case CudaArch::GFX900:
+      case CudaArch::GFX902:
+      case CudaArch::GFX904:
+      case CudaArch::GFX906:
+      case CudaArch::GFX908:
+      case CudaArch::GFX909:
+      case CudaArch::GFX1010:
+      case CudaArch::GFX1011:
+      case CudaArch::GFX1012:
+      case CudaArch::GFX1030:
+      case CudaArch::UNKNOWN:
+        break;
+      case CudaArch::LAST:
+        llvm_unreachable("Unexpected Cuda arch.");
+      }
+    }
+  }
+  CGOpenMPRuntime::processRequiresDirective(D);
+}
+
+/// Get number of SMs and number of blocks per SM.
+static std::pair getSMsBlocksPerSM(CodeGenModule &CGM) {
+  std::pair Data;
+  if (CGM.getLangOpts().OpenMPCUDANumSMs)
+    Data.first = CGM.getLangOpts().OpenMPCUDANumSMs;
+  if (CGM.getLangOpts().OpenMPCUDABlocksPerSM)
+    Data.second = CGM.getLangOpts().OpenMPCUDABlocksPerSM;
+  if (Data.first && Data.second)
+    return Data;
+  switch (getCudaArch(CGM)) {
+  case CudaArch::SM_20:
+  case CudaArch::SM_21:
+  case CudaArch::SM_30:
+  case CudaArch::SM_32:
+  case CudaArch::SM_35:
+  case CudaArch::SM_37:
+  case CudaArch::SM_50:
+  case CudaArch::SM_52:
+  case CudaArch::SM_53:
+    return {16, 16};
+  case CudaArch::SM_60:
+  case CudaArch::SM_61:
+  case CudaArch::SM_62:
+    return {56, 32};
+  case CudaArch::SM_70:
+  case CudaArch::SM_72:
+  case CudaArch::SM_75:
+  case CudaArch::SM_80:
+    return {84, 32};
+  case CudaArch::GFX600:
+  case CudaArch::GFX601:
+  case CudaArch::GFX700:
+  case CudaArch::GFX701:
+  case CudaArch::GFX702:
+  case CudaArch::GFX703:
+  case CudaArch::GFX704:
+  case CudaArch::GFX801:
+  case CudaArch::GFX802:
+  case CudaArch::GFX803:
+  case CudaArch::GFX810:
+  case CudaArch::GFX900:
+  case CudaArch::GFX902:
+  case CudaArch::GFX904:
+  case CudaArch::GFX906:
+  case CudaArch::GFX908:
+  case CudaArch::GFX909:
+  case CudaArch::GFX1010:
+  case CudaArch::GFX1011:
+  case CudaArch::GFX1012:
+  case CudaArch::GFX1030:
+  case CudaArch::UNKNOWN:
+    break;
+  case CudaArch::LAST:
+    llvm_unreachable("Unexpected Cuda arch.");
+  }
+  llvm_unreachable("Unexpected NVPTX target without ptx feature.");
+}
+
+void CGOpenMPRuntimeGPU::clear() {
+  if (!GlobalizedRecords.empty() &&
+      !CGM.getLangOpts().OpenMPCUDATargetParallel) {
+    ASTContext &C = CGM.getContext();
+    llvm::SmallVector GlobalRecs;
+    llvm::SmallVector SharedRecs;
+    RecordDecl *StaticRD = C.buildImplicitRecord(
+        "_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
+    StaticRD->startDefinition();
+    RecordDecl *SharedStaticRD = C.buildImplicitRecord(
+        "_shared_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
+    SharedStaticRD->startDefinition();
+    for (const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) {
+      if (Records.Records.empty())
+        continue;
+      unsigned Size = 0;
+      unsigned RecAlignment = 0;
+      for (const RecordDecl *RD : Records.Records) {
+        QualType RDTy = C.getRecordType(RD);
+        unsigned Alignment = C.getTypeAlignInChars(RDTy).getQuantity();
+        RecAlignment = std::max(RecAlignment, Alignment);
+        unsigned RecSize = C.getTypeSizeInChars(RDTy).getQuantity();
+        Size =
+            llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment);
+      }
+      Size = llvm::alignTo(Size, RecAlignment);
+      llvm::APInt ArySize(/*numBits=*/64, Size);
+      QualType SubTy = C.getConstantArrayType(
+          C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
+      const bool UseSharedMemory = Size <= SharedMemorySize;
+      auto *Field =
+          FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD,
+                            SourceLocation(), SourceLocation(), nullptr, SubTy,
+                            C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
+                            /*BW=*/nullptr, /*Mutable=*/false,
+                            /*InitStyle=*/ICIS_NoInit);
+      Field->setAccess(AS_public);
+      if (UseSharedMemory) {
+        SharedStaticRD->addDecl(Field);
+        SharedRecs.push_back(&Records);
+      } else {
+        StaticRD->addDecl(Field);
+        GlobalRecs.push_back(&Records);
+      }
+      Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.SizeTy, Size));
+      Records.UseSharedMemory->setInitializer(
+          llvm::ConstantInt::get(CGM.Int16Ty, UseSharedMemory ? 1 : 0));
+    }
+    // Allocate SharedMemorySize buffer for the shared memory.
+    // FIXME: nvlink does not handle weak linkage correctly (object with the
+    // different size are reported as erroneous).
+    // Restore this code as sson as nvlink is fixed.
+    if (!SharedStaticRD->field_empty()) {
+      llvm::APInt ArySize(/*numBits=*/64, SharedMemorySize);
+      QualType SubTy = C.getConstantArrayType(
+          C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
+      auto *Field = FieldDecl::Create(
+          C, SharedStaticRD, SourceLocation(), SourceLocation(), nullptr, SubTy,
+          C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
+          /*BW=*/nullptr, /*Mutable=*/false,
+          /*InitStyle=*/ICIS_NoInit);
+      Field->setAccess(AS_public);
+      SharedStaticRD->addDecl(Field);
+    }
+    SharedStaticRD->completeDefinition();
+    if (!SharedStaticRD->field_empty()) {
+      QualType StaticTy = C.getRecordType(SharedStaticRD);
+      llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy);
+      auto *GV = new llvm::GlobalVariable(
+          CGM.getModule(), LLVMStaticTy,
+          /*isConstant=*/false, llvm::GlobalValue::CommonLinkage,
+          llvm::Constant::getNullValue(LLVMStaticTy),
+          "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr,
+          llvm::GlobalValue::NotThreadLocal,
+          C.getTargetAddressSpace(LangAS::cuda_shared));
+      auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+          GV, CGM.VoidPtrTy);
+      for (const GlobalPtrSizeRecsTy *Rec : SharedRecs) {
+        Rec->Buffer->replaceAllUsesWith(Replacement);
+        Rec->Buffer->eraseFromParent();
+      }
+    }
+    StaticRD->completeDefinition();
+    if (!StaticRD->field_empty()) {
+      QualType StaticTy = C.getRecordType(StaticRD);
+      std::pair SMsBlockPerSM = getSMsBlocksPerSM(CGM);
+      llvm::APInt Size1(32, SMsBlockPerSM.second);
+      QualType Arr1Ty =
+          C.getConstantArrayType(StaticTy, Size1, nullptr, ArrayType::Normal,
+                                 /*IndexTypeQuals=*/0);
+      llvm::APInt Size2(32, SMsBlockPerSM.first);
+      QualType Arr2Ty =
+          C.getConstantArrayType(Arr1Ty, Size2, nullptr, ArrayType::Normal,
+                                 /*IndexTypeQuals=*/0);
+      llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty);
+      // FIXME: nvlink does not handle weak linkage correctly (object with the
+      // different size are reported as erroneous).
+      // Restore CommonLinkage as soon as nvlink is fixed.
+      auto *GV = new llvm::GlobalVariable(
+          CGM.getModule(), LLVMArr2Ty,
+          /*isConstant=*/false, llvm::GlobalValue::InternalLinkage,
+          llvm::Constant::getNullValue(LLVMArr2Ty),
+          "_openmp_static_glob_rd_$_");
+      auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+          GV, CGM.VoidPtrTy);
+      for (const GlobalPtrSizeRecsTy *Rec : GlobalRecs) {
+        Rec->Buffer->replaceAllUsesWith(Replacement);
+        Rec->Buffer->eraseFromParent();
+      }
+    }
+  }
+  if (!TeamsReductions.empty()) {
+    ASTContext &C = CGM.getContext();
+    RecordDecl *StaticRD = C.buildImplicitRecord(
+        "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::TTK_Union);
+    StaticRD->startDefinition();
+    for (const RecordDecl *TeamReductionRec : TeamsReductions) {
+      QualType RecTy = C.getRecordType(TeamReductionRec);
+      auto *Field = FieldDecl::Create(
+          C, StaticRD, SourceLocation(), SourceLocation(), nullptr, RecTy,
+          C.getTrivialTypeSourceInfo(RecTy, SourceLocation()),
+          /*BW=*/nullptr, /*Mutable=*/false,
+          /*InitStyle=*/ICIS_NoInit);
+      Field->setAccess(AS_public);
+      StaticRD->addDecl(Field);
+    }
+    StaticRD->completeDefinition();
+    QualType StaticTy = C.getRecordType(StaticRD);
+    llvm::Type *LLVMReductionsBufferTy =
+        CGM.getTypes().ConvertTypeForMem(StaticTy);
+    // FIXME: nvlink does not handle weak linkage correctly (object with the
+    // different size are reported as erroneous).
+    // Restore CommonLinkage as soon as nvlink is fixed.
+    auto *GV = new llvm::GlobalVariable(
+        CGM.getModule(), LLVMReductionsBufferTy,
+        /*isConstant=*/false, llvm::GlobalValue::InternalLinkage,
+        llvm::Constant::getNullValue(LLVMReductionsBufferTy),
+        "_openmp_teams_reductions_buffer_$_");
+    KernelTeamsReductionPtr->setInitializer(
+        llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV,
+                                                             CGM.VoidPtrTy));
+  }
+  CGOpenMPRuntime::clear();
+}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
new file mode 100644
index 0000000000000..316333072c5bc
--- /dev/null
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -0,0 +1,495 @@
+//===------ CGOpenMPRuntimeGPU.h - Interface to OpenMP GPU Runtimes ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a generalized class for OpenMP runtime code generation
+// specialized by GPU target NVPTX.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
+#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
+
+#include "CGOpenMPRuntime.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/StmtOpenMP.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
+
+namespace clang {
+namespace CodeGen {
+
+class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
+public:
+  /// Defines the execution mode.
+  enum ExecutionMode {
+    /// SPMD execution mode (all threads are worker threads).
+    EM_SPMD,
+    /// Non-SPMD execution mode (1 master thread, others are workers).
+    EM_NonSPMD,
+    /// Unknown execution mode (orphaned directive).
+    EM_Unknown,
+  };
+private:
+  /// Parallel outlined function work for workers to execute.
+  llvm::SmallVector Work;
+
+  struct EntryFunctionState {
+    llvm::BasicBlock *ExitBB = nullptr;
+  };
+
+  class WorkerFunctionState {
+  public:
+    llvm::Function *WorkerFn;
+    const CGFunctionInfo &CGFI;
+    SourceLocation Loc;
+
+    WorkerFunctionState(CodeGenModule &CGM, SourceLocation Loc);
+
+  private:
+    void createWorkerFunction(CodeGenModule &CGM);
+  };
+
+  ExecutionMode getExecutionMode() const;
+
+  bool requiresFullRuntime() const { return RequiresFullRuntime; }
+
+  /// Get barrier to synchronize all threads in a block.
+  void syncCTAThreads(CodeGenFunction &CGF);
+
+  /// Emit the worker function for the current target region.
+  void emitWorkerFunction(WorkerFunctionState &WST);
+
+  /// Helper for worker function. Emit body of worker loop.
+  void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
+
+  /// Helper for non-SPMD target entry function. Guide the master and
+  /// worker threads to their respective locations.
+  void emitNonSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
+                              WorkerFunctionState &WST);
+
+  /// Signal termination of OMP execution for non-SPMD target entry
+  /// function.
+  void emitNonSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
+
+  /// Helper for generic variables globalization prolog.
+  void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc,
+                             bool WithSPMDCheck = false);
+
+  /// Helper for generic variables globalization epilog.
+  void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false);
+
+  /// Helper for SPMD mode target directive's entry function.
+  void emitSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
+                           const OMPExecutableDirective &D);
+
+  /// Signal termination of SPMD mode execution.
+  void emitSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
+
+  //
+  // Base class overrides.
+  //
+
+  /// Creates offloading entry for the provided entry ID \a ID,
+  /// address \a Addr, size \a Size, and flags \a Flags.
+  void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
+                          uint64_t Size, int32_t Flags,
+                          llvm::GlobalValue::LinkageTypes Linkage) override;
+
+  /// Emit outlined function specialized for the Fork-Join
+  /// programming model for applicable target directives on the NVPTX device.
+  /// \param D Directive to emit.
+  /// \param ParentName Name of the function that encloses the target region.
+  /// \param OutlinedFn Outlined function value to be defined by this call.
+  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+  /// \param IsOffloadEntry True if the outlined function is an offload entry.
+  /// An outlined function may not be an entry if, e.g. the if clause always
+  /// evaluates to false.
+  void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
+                         llvm::Function *&OutlinedFn,
+                         llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
+                         const RegionCodeGenTy &CodeGen);
+
+  /// Emit outlined function specialized for the Single Program
+  /// Multiple Data programming model for applicable target directives on the
+  /// NVPTX device.
+  /// \param D Directive to emit.
+  /// \param ParentName Name of the function that encloses the target region.
+  /// \param OutlinedFn Outlined function value to be defined by this call.
+  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+  /// \param IsOffloadEntry True if the outlined function is an offload entry.
+  /// \param CodeGen Object containing the target statements.
+  /// An outlined function may not be an entry if, e.g. the if clause always
+  /// evaluates to false.
+  void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
+                      llvm::Function *&OutlinedFn,
+                      llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
+                      const RegionCodeGenTy &CodeGen);
+
+  /// Emit outlined function for 'target' directive on the NVPTX
+  /// device.
+  /// \param D Directive to emit.
+  /// \param ParentName Name of the function that encloses the target region.
+  /// \param OutlinedFn Outlined function value to be defined by this call.
+  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+  /// \param IsOffloadEntry True if the outlined function is an offload entry.
+  /// An outlined function may not be an entry if, e.g. the if clause always
+  /// evaluates to false.
+  void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
+                                  StringRef ParentName,
+                                  llvm::Function *&OutlinedFn,
+                                  llvm::Constant *&OutlinedFnID,
+                                  bool IsOffloadEntry,
+                                  const RegionCodeGenTy &CodeGen) override;
+
+  /// Emits code for parallel or serial call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// This call is for the Non-SPMD Execution Mode.
+  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  /// \param IfCond Condition in the associated 'if' clause, if it was
+  /// specified, nullptr otherwise.
+  void emitNonSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+                               llvm::Value *OutlinedFn,
+                               ArrayRef CapturedVars,
+                               const Expr *IfCond);
+
+  /// Emits code for parallel or serial call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// This call is for a parallel directive within an SPMD target directive.
+  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  /// \param IfCond Condition in the associated 'if' clause, if it was
+  /// specified, nullptr otherwise.
+  ///
+  void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+                            llvm::Function *OutlinedFn,
+                            ArrayRef CapturedVars,
+                            const Expr *IfCond);
+
+protected:
+  /// Get the function name of an outlined region.
+  //  The name can be customized depending on the target.
+  //
+  StringRef getOutlinedHelperName() const override {
+    return "__omp_outlined__";
+  }
+
+  /// Check if the default location must be constant.
+  /// Constant for NVPTX for better optimization.
+  bool isDefaultLocationConstant() const override { return true; }
+
+  /// Returns additional flags that can be stored in reserved_2 field of the
+  /// default location.
+  /// For NVPTX target contains data about SPMD/Non-SPMD execution mode +
+  /// Full/Lightweight runtime mode. Used for better optimization.
+  unsigned getDefaultLocationReserved2Flags() const override;
+
+public:
+  explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM);
+  void clear() override;
+
+  /// Declare generalized virtual functions which need to be defined
+  /// by all specializations of OpenMPGPURuntime Targets.
+  virtual llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) = 0;
+
+  /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
+  /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
+  virtual void emitProcBindClause(CodeGenFunction &CGF,
+                                  llvm::omp::ProcBindKind ProcBind,
+                                  SourceLocation Loc) override;
+
+  /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
+  /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
+  /// clause.
+  /// \param NumThreads An integer value of threads.
+  virtual void emitNumThreadsClause(CodeGenFunction &CGF,
+                                    llvm::Value *NumThreads,
+                                    SourceLocation Loc) override;
+
+  /// This function ought to emit, in the general case, a call to
+  // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
+  // as these numbers are obtained through the PTX grid and block configuration.
+  /// \param NumTeams An integer expression of teams.
+  /// \param ThreadLimit An integer expression of threads.
+  void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
+                          const Expr *ThreadLimit, SourceLocation Loc) override;
+
+  /// Emits inlined function for the specified OpenMP parallel
+  //  directive.
+  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+  /// kmp_int32 BoundID, struct context_vars*).
+  /// \param D OpenMP directive.
+  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  /// \param InnermostKind Kind of innermost directive (for simple directives it
+  /// is a directive itself, for combined - its innermost directive).
+  /// \param CodeGen Code generation sequence for the \a D directive.
+  llvm::Function *
+  emitParallelOutlinedFunction(const OMPExecutableDirective &D,
+                               const VarDecl *ThreadIDVar,
+                               OpenMPDirectiveKind InnermostKind,
+                               const RegionCodeGenTy &CodeGen) override;
+
+  /// Emits inlined function for the specified OpenMP teams
+  //  directive.
+  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+  /// kmp_int32 BoundID, struct context_vars*).
+  /// \param D OpenMP directive.
+  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  /// \param InnermostKind Kind of innermost directive (for simple directives it
+  /// is a directive itself, for combined - its innermost directive).
+  /// \param CodeGen Code generation sequence for the \a D directive.
+  llvm::Function *
+  emitTeamsOutlinedFunction(const OMPExecutableDirective &D,
+                            const VarDecl *ThreadIDVar,
+                            OpenMPDirectiveKind InnermostKind,
+                            const RegionCodeGenTy &CodeGen) override;
+
+  /// Emits code for teams call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// \param OutlinedFn Outlined function to be run by team masters. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  ///
+  void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+                     SourceLocation Loc, llvm::Function *OutlinedFn,
+                     ArrayRef CapturedVars) override;
+
+  /// Emits code for parallel or serial call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  /// \param IfCond Condition in the associated 'if' clause, if it was
+  /// specified, nullptr otherwise.
+  void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+                        llvm::Function *OutlinedFn,
+                        ArrayRef CapturedVars,
+                        const Expr *IfCond) override;
+
+  /// Emit an implicit/explicit barrier for OpenMP threads.
+  /// \param Kind Directive for which this implicit barrier call must be
+  /// generated. Must be OMPD_barrier for explicit barrier generation.
+  /// \param EmitChecks true if need to emit checks for cancellation barriers.
+  /// \param ForceSimpleCall true simple barrier call must be emitted, false if
+  /// runtime class decides which one to emit (simple or with cancellation
+  /// checks).
+  ///
+  void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
+                       OpenMPDirectiveKind Kind, bool EmitChecks = true,
+                       bool ForceSimpleCall = false) override;
+
+  /// Emits a critical region.
+  /// \param CriticalName Name of the critical region.
+  /// \param CriticalOpGen Generator for the statement associated with the given
+  /// critical region.
+  /// \param Hint Value of the 'hint' clause (optional).
+  void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
+                          const RegionCodeGenTy &CriticalOpGen,
+                          SourceLocation Loc,
+                          const Expr *Hint = nullptr) override;
+
+  /// Emit a code for reduction clause.
+  ///
+  /// \param Privates List of private copies for original reduction arguments.
+  /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
+  /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
+  /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
+  /// or 'operator binop(LHS, RHS)'.
+  /// \param Options List of options for reduction codegen:
+  ///     WithNowait true if parent directive has also nowait clause, false
+  ///     otherwise.
+  ///     SimpleReduction Emit reduction operation only. Used for omp simd
+  ///     directive on the host.
+  ///     ReductionKind The kind of reduction to perform.
+  virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
+                             ArrayRef Privates,
+                             ArrayRef LHSExprs,
+                             ArrayRef RHSExprs,
+                             ArrayRef ReductionOps,
+                             ReductionOptionsTy Options) override;
+
+  /// Returns specified OpenMP runtime function for the current OpenMP
+  /// implementation.  Specialized for the NVPTX device.
+  /// \param Function OpenMP runtime function.
+  /// \return Specified function.
+  llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function);
+
+  /// Translates the native parameter of outlined function if this is required
+  /// for target.
+  /// \param FD Field decl from captured record for the parameter.
+  /// \param NativeParam Parameter itself.
+  const VarDecl *translateParameter(const FieldDecl *FD,
+                                    const VarDecl *NativeParam) const override;
+
+  /// Gets the address of the native argument basing on the address of the
+  /// target-specific parameter.
+  /// \param NativeParam Parameter itself.
+  /// \param TargetParam Corresponding target-specific parameter.
+  Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam,
+                              const VarDecl *TargetParam) const override;
+
+  /// Emits call of the outlined function with the provided arguments,
+  /// translating these arguments to correct target-specific arguments.
+  void emitOutlinedFunctionCall(
+      CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
+      ArrayRef Args = llvm::None) const override;
+
+  /// Emits OpenMP-specific function prolog.
+  /// Required for device constructs.
+  void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override;
+
+  /// Gets the OpenMP-specific address of the local variable.
+  Address getAddressOfLocalVariable(CodeGenFunction &CGF,
+                                    const VarDecl *VD) override;
+
+  /// Target codegen is specialized based on two data-sharing modes: CUDA, in
+  /// which the local variables are actually global threadlocal, and Generic, in
+  /// which the local variables are placed in global memory if they may escape
+  /// their declaration context.
+  enum DataSharingMode {
+    /// CUDA data sharing mode.
+    CUDA,
+    /// Generic data-sharing mode.
+    Generic,
+  };
+
+  /// Cleans up references to the objects in finished function.
+  ///
+  void functionFinished(CodeGenFunction &CGF) override;
+
+  /// Choose a default value for the dist_schedule clause.
+  void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
+      const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
+      llvm::Value *&Chunk) const override;
+
+  /// Choose a default value for the schedule clause.
+  void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
+      const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
+      const Expr *&ChunkExpr) const override;
+
+  /// Adjust some parameters for the target-based directives, like addresses of
+  /// the variables captured by reference in lambdas.
+  void adjustTargetSpecificDataForLambdas(
+      CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
+
+  /// Perform check on requires decl to ensure that target architecture
+  /// supports unified addressing
+  void processRequiresDirective(const OMPRequiresDecl *D) override;
+
+  /// Returns default address space for the constant firstprivates, __constant__
+  /// address space by default.
+  unsigned getDefaultFirstprivateAddressSpace() const override;
+
+  /// Checks if the variable has associated OMPAllocateDeclAttr attribute with
+  /// the predefined allocator and translates it into the corresponding address
+  /// space.
+  bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override;
+
+private:
+  /// Track the execution mode when codegening directives within a target
+  /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
+  /// target region and used by containing directives such as 'parallel'
+  /// to emit optimized code.
+  ExecutionMode CurrentExecutionMode = EM_Unknown;
+
+  /// Check if the full runtime is required (default - yes).
+  bool RequiresFullRuntime = true;
+
+  /// true if we're emitting the code for the target region and next parallel
+  /// region is L0 for sure.
+  bool IsInTargetMasterThreadRegion = false;
+  /// true if currently emitting code for target/teams/distribute region, false
+  /// - otherwise.
+  bool IsInTTDRegion = false;
+  /// true if we're definitely in the parallel region.
+  bool IsInParallelRegion = false;
+
+  /// Map between an outlined function and its wrapper.
+  llvm::DenseMap WrapperFunctionsMap;
+
+  /// Emit function which wraps the outline parallel region
+  /// and controls the parameters which are passed to this function.
+  /// The wrapper ensures that the outlined function is called
+  /// with the correct arguments when data is shared.
+  llvm::Function *createParallelDataSharingWrapper(
+      llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D);
+
+  /// The data for the single globalized variable.
+  struct MappedVarData {
+    /// Corresponding field in the global record.
+    const FieldDecl *FD = nullptr;
+    /// Corresponding address.
+    Address PrivateAddr = Address::invalid();
+    /// true, if only one element is required (for latprivates in SPMD mode),
+    /// false, if need to create based on the warp-size.
+    bool IsOnePerTeam = false;
+    MappedVarData() = delete;
+    MappedVarData(const FieldDecl *FD, bool IsOnePerTeam = false)
+        : FD(FD), IsOnePerTeam(IsOnePerTeam) {}
+  };
+  /// The map of local variables to their addresses in the global memory.
+  using DeclToAddrMapTy = llvm::MapVector;
+  /// Set of the parameters passed by value escaping OpenMP context.
+  using EscapedParamsTy = llvm::SmallPtrSet;
+  struct FunctionData {
+    DeclToAddrMapTy LocalVarData;
+    llvm::Optional SecondaryLocalVarData = llvm::None;
+    EscapedParamsTy EscapedParameters;
+    llvm::SmallVector EscapedVariableLengthDecls;
+    llvm::SmallVector EscapedVariableLengthDeclsAddrs;
+    const RecordDecl *GlobalRecord = nullptr;
+    llvm::Optional SecondaryGlobalRecord = llvm::None;
+    llvm::Value *GlobalRecordAddr = nullptr;
+    llvm::Value *IsInSPMDModeFlag = nullptr;
+    std::unique_ptr MappedParams;
+  };
+  /// Maps the function to the list of the globalized variables with their
+  /// addresses.
+  llvm::SmallDenseMap FunctionGlobalizedDecls;
+  /// List of records for the globalized variables in target/teams/distribute
+  /// contexts. Inner records are going to be joined into the single record,
+  /// while those resulting records are going to be joined into the single
+  /// union. This resulting union (one per CU) is the entry point for the static
+  /// memory management runtime functions.
+  struct GlobalPtrSizeRecsTy {
+    llvm::GlobalVariable *UseSharedMemory = nullptr;
+    llvm::GlobalVariable *RecSize = nullptr;
+    llvm::GlobalVariable *Buffer = nullptr;
+    SourceLocation Loc;
+    llvm::SmallVector Records;
+    unsigned RegionCounter = 0;
+  };
+  llvm::SmallVector GlobalizedRecords;
+  llvm::GlobalVariable *KernelTeamsReductionPtr = nullptr;
+  /// List of the records with the list of fields for the reductions across the
+  /// teams. Used to build the intermediate buffer for the fast teams
+  /// reductions.
+  /// All the records are gathered into a union `union.type` is created.
+  llvm::SmallVector TeamsReductions;
+  /// Shared pointer for the global memory in the global memory buffer used for
+  /// the given kernel.
+  llvm::GlobalVariable *KernelStaticGlobalized = nullptr;
+  /// Pair of the Non-SPMD team and all reductions variables in this team
+  /// region.
+  std::pair>
+      TeamAndReductions;
+};
+
+} // CodeGen namespace.
+} // clang namespace.
+
+#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index cabd06bd76e84..5fefc95ee4130 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -7,11 +7,12 @@
 //===----------------------------------------------------------------------===//
 //
 // This provides a class for OpenMP runtime code generation specialized to NVPTX
-// targets.
+// targets from generalized CGOpenMPRuntimeGPU class.
 //
 //===----------------------------------------------------------------------===//
 
 #include "CGOpenMPRuntimeNVPTX.h"
+#include "CGOpenMPRuntimeGPU.h"
 #include "CodeGenFunction.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclOpenMP.h"
@@ -25,5227 +26,16 @@ using namespace clang;
 using namespace CodeGen;
 using namespace llvm::omp;
 
-namespace {
-enum OpenMPRTLFunctionNVPTX {
-  /// Call to void __kmpc_kernel_init(kmp_int32 thread_limit,
-  /// int16_t RequiresOMPRuntime);
-  OMPRTL_NVPTX__kmpc_kernel_init,
-  /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
-  OMPRTL_NVPTX__kmpc_kernel_deinit,
-  /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
-  /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
-  OMPRTL_NVPTX__kmpc_spmd_kernel_init,
-  /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
-  OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
-  /// Call to void __kmpc_kernel_prepare_parallel(void
-  /// *outlined_function, int16_t
-  /// IsOMPRuntimeInitialized);
-  OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
-  /// Call to bool __kmpc_kernel_parallel(void **outlined_function,
-  /// int16_t IsOMPRuntimeInitialized);
-  OMPRTL_NVPTX__kmpc_kernel_parallel,
-  /// Call to void __kmpc_kernel_end_parallel();
-  OMPRTL_NVPTX__kmpc_kernel_end_parallel,
-  /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
-  /// global_tid);
-  OMPRTL_NVPTX__kmpc_serialized_parallel,
-  /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
-  /// global_tid);
-  OMPRTL_NVPTX__kmpc_end_serialized_parallel,
-  /// Call to int32_t __kmpc_shuffle_int32(int32_t element,
-  /// int16_t lane_offset, int16_t warp_size);
-  OMPRTL_NVPTX__kmpc_shuffle_int32,
-  /// Call to int64_t __kmpc_shuffle_int64(int64_t element,
-  /// int16_t lane_offset, int16_t warp_size);
-  OMPRTL_NVPTX__kmpc_shuffle_int64,
-  /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32
-  /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
-  /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
-  /// lane_offset, int16_t shortCircuit),
-  /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
-  OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2,
-  /// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
-  /// global_tid, void *global_buffer, int32_t num_of_records, void*
-  /// reduce_data,
-  /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
-  /// lane_offset, int16_t shortCircuit),
-  /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
-  /// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
-  /// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
-  /// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
-  /// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
-  /// *buffer, int idx, void *reduce_data));
-  OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2,
-  /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
-  OMPRTL_NVPTX__kmpc_end_reduce_nowait,
-  /// Call to void __kmpc_data_sharing_init_stack();
-  OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
-  /// Call to void __kmpc_data_sharing_init_stack_spmd();
-  OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
-  /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size,
-  /// int16_t UseSharedMemory);
-  OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
-  /// Call to void* __kmpc_data_sharing_push_stack(size_t size, int16_t
-  /// UseSharedMemory);
-  OMPRTL_NVPTX__kmpc_data_sharing_push_stack,
-  /// Call to void __kmpc_data_sharing_pop_stack(void *a);
-  OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
-  /// Call to void __kmpc_begin_sharing_variables(void ***args,
-  /// size_t n_args);
-  OMPRTL_NVPTX__kmpc_begin_sharing_variables,
-  /// Call to void __kmpc_end_sharing_variables();
-  OMPRTL_NVPTX__kmpc_end_sharing_variables,
-  /// Call to void __kmpc_get_shared_variables(void ***GlobalArgs)
-  OMPRTL_NVPTX__kmpc_get_shared_variables,
-  /// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32
-  /// global_tid);
-  OMPRTL_NVPTX__kmpc_parallel_level,
-  /// Call to int8_t __kmpc_is_spmd_exec_mode();
-  OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
-  /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
-  /// const void *buf, size_t size, int16_t is_shared, const void **res);
-  OMPRTL_NVPTX__kmpc_get_team_static_memory,
-  /// Call to void __kmpc_restore_team_static_memory(int16_t
-  /// isSPMDExecutionMode, int16_t is_shared);
-  OMPRTL_NVPTX__kmpc_restore_team_static_memory,
-  /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
-  OMPRTL__kmpc_barrier,
-  /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
-  /// global_tid);
-  OMPRTL__kmpc_barrier_simple_spmd,
-  /// Call to int32_t __kmpc_warp_active_thread_mask(void);
-  OMPRTL_NVPTX__kmpc_warp_active_thread_mask,
-  /// Call to void __kmpc_syncwarp(int32_t Mask);
-  OMPRTL_NVPTX__kmpc_syncwarp,
-};
-
-/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
-class NVPTXActionTy final : public PrePostActionTy {
-  llvm::FunctionCallee EnterCallee = nullptr;
-  ArrayRef EnterArgs;
-  llvm::FunctionCallee ExitCallee = nullptr;
-  ArrayRef ExitArgs;
-  bool Conditional = false;
-  llvm::BasicBlock *ContBlock = nullptr;
-
-public:
-  NVPTXActionTy(llvm::FunctionCallee EnterCallee,
-                ArrayRef EnterArgs,
-                llvm::FunctionCallee ExitCallee,
-                ArrayRef ExitArgs, bool Conditional = false)
-      : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
-        ExitArgs(ExitArgs), Conditional(Conditional) {}
-  void Enter(CodeGenFunction &CGF) override {
-    llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
-    if (Conditional) {
-      llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
-      auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
-      ContBlock = CGF.createBasicBlock("omp_if.end");
-      // Generate the branch (If-stmt)
-      CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
-      CGF.EmitBlock(ThenBlock);
-    }
-  }
-  void Done(CodeGenFunction &CGF) {
-    // Emit the rest of blocks/branches
-    CGF.EmitBranch(ContBlock);
-    CGF.EmitBlock(ContBlock, true);
-  }
-  void Exit(CodeGenFunction &CGF) override {
-    CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
-  }
-};
-
-/// A class to track the execution mode when codegening directives within
-/// a target region. The appropriate mode (SPMD|NON-SPMD) is set on entry
-/// to the target region and used by containing directives such as 'parallel'
-/// to emit optimized code.
-class ExecutionRuntimeModesRAII {
-private:
-  CGOpenMPRuntimeNVPTX::ExecutionMode SavedExecMode =
-      CGOpenMPRuntimeNVPTX::EM_Unknown;
-  CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode;
-  bool SavedRuntimeMode = false;
-  bool *RuntimeMode = nullptr;
-
-public:
-  /// Constructor for Non-SPMD mode.
-  ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode)
-      : ExecMode(ExecMode) {
-    SavedExecMode = ExecMode;
-    ExecMode = CGOpenMPRuntimeNVPTX::EM_NonSPMD;
-  }
-  /// Constructor for SPMD mode.
-  ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode,
-                            bool &RuntimeMode, bool FullRuntimeMode)
-      : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) {
-    SavedExecMode = ExecMode;
-    SavedRuntimeMode = RuntimeMode;
-    ExecMode = CGOpenMPRuntimeNVPTX::EM_SPMD;
-    RuntimeMode = FullRuntimeMode;
-  }
-  ~ExecutionRuntimeModesRAII() {
-    ExecMode = SavedExecMode;
-    if (RuntimeMode)
-      *RuntimeMode = SavedRuntimeMode;
-  }
-};
-
-/// GPU Configuration:  This information can be derived from cuda registers,
-/// however, providing compile time constants helps generate more efficient
-/// code.  For all practical purposes this is fine because the configuration
-/// is the same for all known NVPTX architectures.
-enum MachineConfiguration : unsigned {
-  WarpSize = 32,
-  /// Number of bits required to represent a lane identifier, which is
-  /// computed as log_2(WarpSize).
-  LaneIDBits = 5,
-  LaneIDMask = WarpSize - 1,
-
-  /// Global memory alignment for performance.
-  GlobalMemoryAlignment = 128,
-
-  /// Maximal size of the shared memory buffer.
-  SharedMemorySize = 128,
-};
-
-static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
-  RefExpr = RefExpr->IgnoreParens();
-  if (const auto *ASE = dyn_cast(RefExpr)) {
-    const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
-    while (const auto *TempASE = dyn_cast(Base))
-      Base = TempASE->getBase()->IgnoreParenImpCasts();
-    RefExpr = Base;
-  } else if (auto *OASE = dyn_cast(RefExpr)) {
-    const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
-    while (const auto *TempOASE = dyn_cast(Base))
-      Base = TempOASE->getBase()->IgnoreParenImpCasts();
-    while (const auto *TempASE = dyn_cast(Base))
-      Base = TempASE->getBase()->IgnoreParenImpCasts();
-    RefExpr = Base;
-  }
-  RefExpr = RefExpr->IgnoreParenImpCasts();
-  if (const auto *DE = dyn_cast(RefExpr))
-    return cast(DE->getDecl()->getCanonicalDecl());
-  const auto *ME = cast(RefExpr);
-  return cast(ME->getMemberDecl()->getCanonicalDecl());
-}
-
-
-static RecordDecl *buildRecordForGlobalizedVars(
-    ASTContext &C, ArrayRef EscapedDecls,
-    ArrayRef EscapedDeclsForTeams,
-    llvm::SmallDenseMap
-        &MappedDeclsFields, int BufSize) {
-  using VarsDataTy = std::pair;
-  if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
-    return nullptr;
-  SmallVector GlobalizedVars;
-  for (const ValueDecl *D : EscapedDecls)
-    GlobalizedVars.emplace_back(
-        CharUnits::fromQuantity(std::max(
-            C.getDeclAlign(D).getQuantity(),
-            static_cast(GlobalMemoryAlignment))),
-        D);
-  for (const ValueDecl *D : EscapedDeclsForTeams)
-    GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
-  llvm::stable_sort(GlobalizedVars, [](VarsDataTy L, VarsDataTy R) {
-    return L.first > R.first;
-  });
-
-  // Build struct _globalized_locals_ty {
-  //         /*  globalized vars  */[WarSize] align (max(decl_align,
-  //         GlobalMemoryAlignment))
-  //         /*  globalized vars  */ for EscapedDeclsForTeams
-  //       };
-  RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
-  GlobalizedRD->startDefinition();
-  llvm::SmallPtrSet SingleEscaped(
-      EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());
-  for (const auto &Pair : GlobalizedVars) {
-    const ValueDecl *VD = Pair.second;
-    QualType Type = VD->getType();
-    if (Type->isLValueReferenceType())
-      Type = C.getPointerType(Type.getNonReferenceType());
-    else
-      Type = Type.getNonReferenceType();
-    SourceLocation Loc = VD->getLocation();
-    FieldDecl *Field;
-    if (SingleEscaped.count(VD)) {
-      Field = FieldDecl::Create(
-          C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
-          C.getTrivialTypeSourceInfo(Type, SourceLocation()),
-          /*BW=*/nullptr, /*Mutable=*/false,
-          /*InitStyle=*/ICIS_NoInit);
-      Field->setAccess(AS_public);
-      if (VD->hasAttrs()) {
-        for (specific_attr_iterator I(VD->getAttrs().begin()),
-             E(VD->getAttrs().end());
-             I != E; ++I)
-          Field->addAttr(*I);
-      }
-    } else {
-      llvm::APInt ArraySize(32, BufSize);
-      Type = C.getConstantArrayType(Type, ArraySize, nullptr, ArrayType::Normal,
-                                    0);
-      Field = FieldDecl::Create(
-          C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
-          C.getTrivialTypeSourceInfo(Type, SourceLocation()),
-          /*BW=*/nullptr, /*Mutable=*/false,
-          /*InitStyle=*/ICIS_NoInit);
-      Field->setAccess(AS_public);
-      llvm::APInt Align(32, std::max(C.getDeclAlign(VD).getQuantity(),
-                                     static_cast(
-                                         GlobalMemoryAlignment)));
-      Field->addAttr(AlignedAttr::CreateImplicit(
-          C, /*IsAlignmentExpr=*/true,
-          IntegerLiteral::Create(C, Align,
-                                 C.getIntTypeForBitwidth(32, /*Signed=*/0),
-                                 SourceLocation()),
-          {}, AttributeCommonInfo::AS_GNU, AlignedAttr::GNU_aligned));
-    }
-    GlobalizedRD->addDecl(Field);
-    MappedDeclsFields.try_emplace(VD, Field);
-  }
-  GlobalizedRD->completeDefinition();
-  return GlobalizedRD;
+CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
+    : CGOpenMPRuntimeGPU(CGM) {
+  if (!CGM.getLangOpts().OpenMPIsDevice)
+    llvm_unreachable("OpenMP NVPTX can only handle device code.");
 }
 
-/// Get the list of variables that can escape their declaration context.
-class CheckVarsEscapingDeclContext final
-    : public ConstStmtVisitor {
-  CodeGenFunction &CGF;
-  llvm::SetVector EscapedDecls;
-  llvm::SetVector EscapedVariableLengthDecls;
-  llvm::SmallPtrSet EscapedParameters;
-  RecordDecl *GlobalizedRD = nullptr;
-  llvm::SmallDenseMap MappedDeclsFields;
-  bool AllEscaped = false;
-  bool IsForCombinedParallelRegion = false;
-
-  void markAsEscaped(const ValueDecl *VD) {
-    // Do not globalize declare target variables.
-    if (!isa(VD) ||
-        OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
-      return;
-    VD = cast(VD->getCanonicalDecl());
-    // Use user-specified allocation.
-    if (VD->hasAttrs() && VD->hasAttr())
-      return;
-    // Variables captured by value must be globalized.
-    if (auto *CSI = CGF.CapturedStmtInfo) {
-      if (const FieldDecl *FD = CSI->lookup(cast(VD))) {
-        // Check if need to capture the variable that was already captured by
-        // value in the outer region.
-        if (!IsForCombinedParallelRegion) {
-          if (!FD->hasAttrs())
-            return;
-          const auto *Attr = FD->getAttr();
-          if (!Attr)
-            return;
-          if (((Attr->getCaptureKind() != OMPC_map) &&
-               !isOpenMPPrivate(Attr->getCaptureKind())) ||
-              ((Attr->getCaptureKind() == OMPC_map) &&
-               !FD->getType()->isAnyPointerType()))
-            return;
-        }
-        if (!FD->getType()->isReferenceType()) {
-          assert(!VD->getType()->isVariablyModifiedType() &&
-                 "Parameter captured by value with variably modified type");
-          EscapedParameters.insert(VD);
-        } else if (!IsForCombinedParallelRegion) {
-          return;
-        }
-      }
-    }
-    if ((!CGF.CapturedStmtInfo ||
-         (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
-        VD->getType()->isReferenceType())
-      // Do not globalize variables with reference type.
-      return;
-    if (VD->getType()->isVariablyModifiedType())
-      EscapedVariableLengthDecls.insert(VD);
-    else
-      EscapedDecls.insert(VD);
-  }
-
-  void VisitValueDecl(const ValueDecl *VD) {
-    if (VD->getType()->isLValueReferenceType())
-      markAsEscaped(VD);
-    if (const auto *VarD = dyn_cast(VD)) {
-      if (!isa(VarD) && VarD->hasInit()) {
-        const bool SavedAllEscaped = AllEscaped;
-        AllEscaped = VD->getType()->isLValueReferenceType();
-        Visit(VarD->getInit());
-        AllEscaped = SavedAllEscaped;
-      }
-    }
-  }
-  void VisitOpenMPCapturedStmt(const CapturedStmt *S,
-                               ArrayRef Clauses,
-                               bool IsCombinedParallelRegion) {
-    if (!S)
-      return;
-    for (const CapturedStmt::Capture &C : S->captures()) {
-      if (C.capturesVariable() && !C.capturesVariableByCopy()) {
-        const ValueDecl *VD = C.getCapturedVar();
-        bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;
-        if (IsCombinedParallelRegion) {
-          // Check if the variable is privatized in the combined construct and
-          // those private copies must be shared in the inner parallel
-          // directive.
-          IsForCombinedParallelRegion = false;
-          for (const OMPClause *C : Clauses) {
-            if (!isOpenMPPrivate(C->getClauseKind()) ||
-                C->getClauseKind() == OMPC_reduction ||
-                C->getClauseKind() == OMPC_linear ||
-                C->getClauseKind() == OMPC_private)
-              continue;
-            ArrayRef Vars;
-            if (const auto *PC = dyn_cast(C))
-              Vars = PC->getVarRefs();
-            else if (const auto *PC = dyn_cast(C))
-              Vars = PC->getVarRefs();
-            else
-              llvm_unreachable("Unexpected clause.");
-            for (const auto *E : Vars) {
-              const Decl *D =
-                  cast(E)->getDecl()->getCanonicalDecl();
-              if (D == VD->getCanonicalDecl()) {
-                IsForCombinedParallelRegion = true;
-                break;
-              }
-            }
-            if (IsForCombinedParallelRegion)
-              break;
-          }
-        }
-        markAsEscaped(VD);
-        if (isa(VD))
-          VisitValueDecl(VD);
-        IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;
-      }
-    }
-  }
-
-  void buildRecordForGlobalizedVars(bool IsInTTDRegion) {
-    assert(!GlobalizedRD &&
-           "Record for globalized variables is built already.");
-    ArrayRef EscapedDeclsForParallel, EscapedDeclsForTeams;
-    if (IsInTTDRegion)
-      EscapedDeclsForTeams = EscapedDecls.getArrayRef();
-    else
-      EscapedDeclsForParallel = EscapedDecls.getArrayRef();
-    GlobalizedRD = ::buildRecordForGlobalizedVars(
-        CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
-        MappedDeclsFields, WarpSize);
-  }
-
-public:
-  CheckVarsEscapingDeclContext(CodeGenFunction &CGF,
-                               ArrayRef TeamsReductions)
-      : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
-  }
-  virtual ~CheckVarsEscapingDeclContext() = default;
-  void VisitDeclStmt(const DeclStmt *S) {
-    if (!S)
-      return;
-    for (const Decl *D : S->decls())
-      if (const auto *VD = dyn_cast_or_null(D))
-        VisitValueDecl(VD);
-  }
-  void VisitOMPExecutableDirective(const OMPExecutableDirective *D) {
-    if (!D)
-      return;
-    if (!D->hasAssociatedStmt())
-      return;
-    if (const auto *S =
-            dyn_cast_or_null(D->getAssociatedStmt())) {
-      // Do not analyze directives that do not actually require capturing,
-      // like `omp for` or `omp simd` directives.
-      llvm::SmallVector CaptureRegions;
-      getOpenMPCaptureRegions(CaptureRegions, D->getDirectiveKind());
-      if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) {
-        VisitStmt(S->getCapturedStmt());
-        return;
-      }
-      VisitOpenMPCapturedStmt(
-          S, D->clauses(),
-          CaptureRegions.back() == OMPD_parallel &&
-              isOpenMPDistributeDirective(D->getDirectiveKind()));
-    }
-  }
-  void VisitCapturedStmt(const CapturedStmt *S) {
-    if (!S)
-      return;
-    for (const CapturedStmt::Capture &C : S->captures()) {
-      if (C.capturesVariable() && !C.capturesVariableByCopy()) {
-        const ValueDecl *VD = C.getCapturedVar();
-        markAsEscaped(VD);
-        if (isa(VD))
-          VisitValueDecl(VD);
-      }
-    }
-  }
-  void VisitLambdaExpr(const LambdaExpr *E) {
-    if (!E)
-      return;
-    for (const LambdaCapture &C : E->captures()) {
-      if (C.capturesVariable()) {
-        if (C.getCaptureKind() == LCK_ByRef) {
-          const ValueDecl *VD = C.getCapturedVar();
-          markAsEscaped(VD);
-          if (E->isInitCapture(&C) || isa(VD))
-            VisitValueDecl(VD);
-        }
-      }
-    }
-  }
-  void VisitBlockExpr(const BlockExpr *E) {
-    if (!E)
-      return;
-    for (const BlockDecl::Capture &C : E->getBlockDecl()->captures()) {
-      if (C.isByRef()) {
-        const VarDecl *VD = C.getVariable();
-        markAsEscaped(VD);
-        if (isa(VD) || VD->isInitCapture())
-          VisitValueDecl(VD);
-      }
-    }
-  }
-  void VisitCallExpr(const CallExpr *E) {
-    if (!E)
-      return;
-    for (const Expr *Arg : E->arguments()) {
-      if (!Arg)
-        continue;
-      if (Arg->isLValue()) {
-        const bool SavedAllEscaped = AllEscaped;
-        AllEscaped = true;
-        Visit(Arg);
-        AllEscaped = SavedAllEscaped;
-      } else {
-        Visit(Arg);
-      }
-    }
-    Visit(E->getCallee());
-  }
-  void VisitDeclRefExpr(const DeclRefExpr *E) {
-    if (!E)
-      return;
-    const ValueDecl *VD = E->getDecl();
-    if (AllEscaped)
-      markAsEscaped(VD);
-    if (isa(VD))
-      VisitValueDecl(VD);
-    else if (const auto *VarD = dyn_cast(VD))
-      if (VarD->isInitCapture())
-        VisitValueDecl(VD);
-  }
-  void VisitUnaryOperator(const UnaryOperator *E) {
-    if (!E)
-      return;
-    if (E->getOpcode() == UO_AddrOf) {
-      const bool SavedAllEscaped = AllEscaped;
-      AllEscaped = true;
-      Visit(E->getSubExpr());
-      AllEscaped = SavedAllEscaped;
-    } else {
-      Visit(E->getSubExpr());
-    }
-  }
-  void VisitImplicitCastExpr(const ImplicitCastExpr *E) {
-    if (!E)
-      return;
-    if (E->getCastKind() == CK_ArrayToPointerDecay) {
-      const bool SavedAllEscaped = AllEscaped;
-      AllEscaped = true;
-      Visit(E->getSubExpr());
-      AllEscaped = SavedAllEscaped;
-    } else {
-      Visit(E->getSubExpr());
-    }
-  }
-  void VisitExpr(const Expr *E) {
-    if (!E)
-      return;
-    bool SavedAllEscaped = AllEscaped;
-    if (!E->isLValue())
-      AllEscaped = false;
-    for (const Stmt *Child : E->children())
-      if (Child)
-        Visit(Child);
-    AllEscaped = SavedAllEscaped;
-  }
-  void VisitStmt(const Stmt *S) {
-    if (!S)
-      return;
-    for (const Stmt *Child : S->children())
-      if (Child)
-        Visit(Child);
-  }
-
-  /// Returns the record that handles all the escaped local variables and used
-  /// instead of their original storage.
-  const RecordDecl *getGlobalizedRecord(bool IsInTTDRegion) {
-    if (!GlobalizedRD)
-      buildRecordForGlobalizedVars(IsInTTDRegion);
-    return GlobalizedRD;
-  }
-
-  /// Returns the field in the globalized record for the escaped variable.
-  const FieldDecl *getFieldForGlobalizedVar(const ValueDecl *VD) const {
-    assert(GlobalizedRD &&
-           "Record for globalized variables must be generated already.");
-    auto I = MappedDeclsFields.find(VD);
-    if (I == MappedDeclsFields.end())
-      return nullptr;
-    return I->getSecond();
-  }
-
-  /// Returns the list of the escaped local variables/parameters.
-  ArrayRef getEscapedDecls() const {
-    return EscapedDecls.getArrayRef();
-  }
-
-  /// Checks if the escaped local variable is actually a parameter passed by
-  /// value.
-  const llvm::SmallPtrSetImpl &getEscapedParameters() const {
-    return EscapedParameters;
-  }
-
-  /// Returns the list of the escaped variables with the variably modified
-  /// types.
-  ArrayRef getEscapedVariableLengthDecls() const {
-    return EscapedVariableLengthDecls.getArrayRef();
-  }
-};
-} // anonymous namespace
-
 /// Get the GPU warp size.
-static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
+llvm::Value *CGOpenMPRuntimeNVPTX::getGPUWarpSize(CodeGenFunction &CGF) {
   return CGF.EmitRuntimeCall(
       llvm::Intrinsic::getDeclaration(
           &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
       "nvptx_warp_size");
 }
-
-/// Get the id of the current thread on the GPU.
-static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
-  return CGF.EmitRuntimeCall(
-      llvm::Intrinsic::getDeclaration(
-          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
-      "nvptx_tid");
-}
-
-/// Get the id of the warp in the block.
-/// We assume that the warp size is 32, which is always the case
-/// on the NVPTX device, to generate more efficient code.
-static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
-  CGBuilderTy &Bld = CGF.Builder;
-  return Bld.CreateAShr(getNVPTXThreadID(CGF), LaneIDBits, "nvptx_warp_id");
-}
-
-/// Get the id of the current lane in the Warp.
-/// We assume that the warp size is 32, which is always the case
-/// on the NVPTX device, to generate more efficient code.
-static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
-  CGBuilderTy &Bld = CGF.Builder;
-  return Bld.CreateAnd(getNVPTXThreadID(CGF), Bld.getInt32(LaneIDMask),
-                       "nvptx_lane_id");
-}
-
-/// Get the maximum number of threads in a block of the GPU.
-static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
-  return CGF.EmitRuntimeCall(
-      llvm::Intrinsic::getDeclaration(
-          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
-      "nvptx_num_threads");
-}
-
-/// Get the value of the thread_limit clause in the teams directive.
-/// For the 'generic' execution mode, the runtime encodes thread_limit in
-/// the launch parameters, always starting thread_limit+warpSize threads per
-/// CTA. The threads in the last warp are reserved for master execution.
-/// For the 'spmd' execution mode, all threads in a CTA are part of the team.
-static llvm::Value *getThreadLimit(CodeGenFunction &CGF,
-                                   bool IsInSPMDExecutionMode = false) {
-  CGBuilderTy &Bld = CGF.Builder;
-  return IsInSPMDExecutionMode
-             ? getNVPTXNumThreads(CGF)
-             : Bld.CreateNUWSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
-                                "thread_limit");
-}
-
-/// Get the thread id of the OMP master thread.
-/// The master thread id is the first thread (lane) of the last warp in the
-/// GPU block.  Warp size is assumed to be some power of 2.
-/// Thread id is 0 indexed.
-/// E.g: If NumThreads is 33, master id is 32.
-///      If NumThreads is 64, master id is 32.
-///      If NumThreads is 1024, master id is 992.
-static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) {
-  CGBuilderTy &Bld = CGF.Builder;
-  llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
-
-  // We assume that the warp size is a power of 2.
-  llvm::Value *Mask = Bld.CreateNUWSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
-
-  return Bld.CreateAnd(Bld.CreateNUWSub(NumThreads, Bld.getInt32(1)),
-                       Bld.CreateNot(Mask), "master_tid");
-}
-
-CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
-    CodeGenModule &CGM, SourceLocation Loc)
-    : WorkerFn(nullptr), CGFI(CGM.getTypes().arrangeNullaryFunction()),
-      Loc(Loc) {
-  createWorkerFunction(CGM);
-}
-
-void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
-    CodeGenModule &CGM) {
-  // Create an worker function with no arguments.
-
-  WorkerFn = llvm::Function::Create(
-      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
-      /*placeholder=*/"_worker", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), WorkerFn, CGFI);
-  WorkerFn->setDoesNotRecurse();
-}
-
-CGOpenMPRuntimeNVPTX::ExecutionMode
-CGOpenMPRuntimeNVPTX::getExecutionMode() const {
-  return CurrentExecutionMode;
-}
-
-static CGOpenMPRuntimeNVPTX::DataSharingMode
-getDataSharingMode(CodeGenModule &CGM) {
-  return CGM.getLangOpts().OpenMPCUDAMode ? CGOpenMPRuntimeNVPTX::CUDA
-                                          : CGOpenMPRuntimeNVPTX::Generic;
-}
-
-/// Check for inner (nested) SPMD construct, if any
-static bool hasNestedSPMDDirective(ASTContext &Ctx,
-                                   const OMPExecutableDirective &D) {
-  const auto *CS = D.getInnermostCapturedStmt();
-  const auto *Body =
-      CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
-  const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-
-  if (const auto *NestedDir =
-          dyn_cast_or_null(ChildStmt)) {
-    OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
-    switch (D.getDirectiveKind()) {
-    case OMPD_target:
-      if (isOpenMPParallelDirective(DKind))
-        return true;
-      if (DKind == OMPD_teams) {
-        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
-            /*IgnoreCaptured=*/true);
-        if (!Body)
-          return false;
-        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-        if (const auto *NND =
-                dyn_cast_or_null(ChildStmt)) {
-          DKind = NND->getDirectiveKind();
-          if (isOpenMPParallelDirective(DKind))
-            return true;
-        }
-      }
-      return false;
-    case OMPD_target_teams:
-      return isOpenMPParallelDirective(DKind);
-    case OMPD_target_simd:
-    case OMPD_target_parallel:
-    case OMPD_target_parallel_for:
-    case OMPD_target_parallel_for_simd:
-    case OMPD_target_teams_distribute:
-    case OMPD_target_teams_distribute_simd:
-    case OMPD_target_teams_distribute_parallel_for:
-    case OMPD_target_teams_distribute_parallel_for_simd:
-    case OMPD_parallel:
-    case OMPD_for:
-    case OMPD_parallel_for:
-    case OMPD_parallel_master:
-    case OMPD_parallel_sections:
-    case OMPD_for_simd:
-    case OMPD_parallel_for_simd:
-    case OMPD_cancel:
-    case OMPD_cancellation_point:
-    case OMPD_ordered:
-    case OMPD_threadprivate:
-    case OMPD_allocate:
-    case OMPD_task:
-    case OMPD_simd:
-    case OMPD_sections:
-    case OMPD_section:
-    case OMPD_single:
-    case OMPD_master:
-    case OMPD_critical:
-    case OMPD_taskyield:
-    case OMPD_barrier:
-    case OMPD_taskwait:
-    case OMPD_taskgroup:
-    case OMPD_atomic:
-    case OMPD_flush:
-    case OMPD_depobj:
-    case OMPD_scan:
-    case OMPD_teams:
-    case OMPD_target_data:
-    case OMPD_target_exit_data:
-    case OMPD_target_enter_data:
-    case OMPD_distribute:
-    case OMPD_distribute_simd:
-    case OMPD_distribute_parallel_for:
-    case OMPD_distribute_parallel_for_simd:
-    case OMPD_teams_distribute:
-    case OMPD_teams_distribute_simd:
-    case OMPD_teams_distribute_parallel_for:
-    case OMPD_teams_distribute_parallel_for_simd:
-    case OMPD_target_update:
-    case OMPD_declare_simd:
-    case OMPD_declare_variant:
-    case OMPD_begin_declare_variant:
-    case OMPD_end_declare_variant:
-    case OMPD_declare_target:
-    case OMPD_end_declare_target:
-    case OMPD_declare_reduction:
-    case OMPD_declare_mapper:
-    case OMPD_taskloop:
-    case OMPD_taskloop_simd:
-    case OMPD_master_taskloop:
-    case OMPD_master_taskloop_simd:
-    case OMPD_parallel_master_taskloop:
-    case OMPD_parallel_master_taskloop_simd:
-    case OMPD_requires:
-    case OMPD_unknown:
-    default:
-      llvm_unreachable("Unexpected directive.");
-    }
-  }
-
-  return false;
-}
-
-static bool supportsSPMDExecutionMode(ASTContext &Ctx,
-                                      const OMPExecutableDirective &D) {
-  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
-  switch (DirectiveKind) {
-  case OMPD_target:
-  case OMPD_target_teams:
-    return hasNestedSPMDDirective(Ctx, D);
-  case OMPD_target_parallel:
-  case OMPD_target_parallel_for:
-  case OMPD_target_parallel_for_simd:
-  case OMPD_target_teams_distribute_parallel_for:
-  case OMPD_target_teams_distribute_parallel_for_simd:
-  case OMPD_target_simd:
-  case OMPD_target_teams_distribute_simd:
-    return true;
-  case OMPD_target_teams_distribute:
-    return false;
-  case OMPD_parallel:
-  case OMPD_for:
-  case OMPD_parallel_for:
-  case OMPD_parallel_master:
-  case OMPD_parallel_sections:
-  case OMPD_for_simd:
-  case OMPD_parallel_for_simd:
-  case OMPD_cancel:
-  case OMPD_cancellation_point:
-  case OMPD_ordered:
-  case OMPD_threadprivate:
-  case OMPD_allocate:
-  case OMPD_task:
-  case OMPD_simd:
-  case OMPD_sections:
-  case OMPD_section:
-  case OMPD_single:
-  case OMPD_master:
-  case OMPD_critical:
-  case OMPD_taskyield:
-  case OMPD_barrier:
-  case OMPD_taskwait:
-  case OMPD_taskgroup:
-  case OMPD_atomic:
-  case OMPD_flush:
-  case OMPD_depobj:
-  case OMPD_scan:
-  case OMPD_teams:
-  case OMPD_target_data:
-  case OMPD_target_exit_data:
-  case OMPD_target_enter_data:
-  case OMPD_distribute:
-  case OMPD_distribute_simd:
-  case OMPD_distribute_parallel_for:
-  case OMPD_distribute_parallel_for_simd:
-  case OMPD_teams_distribute:
-  case OMPD_teams_distribute_simd:
-  case OMPD_teams_distribute_parallel_for:
-  case OMPD_teams_distribute_parallel_for_simd:
-  case OMPD_target_update:
-  case OMPD_declare_simd:
-  case OMPD_declare_variant:
-  case OMPD_begin_declare_variant:
-  case OMPD_end_declare_variant:
-  case OMPD_declare_target:
-  case OMPD_end_declare_target:
-  case OMPD_declare_reduction:
-  case OMPD_declare_mapper:
-  case OMPD_taskloop:
-  case OMPD_taskloop_simd:
-  case OMPD_master_taskloop:
-  case OMPD_master_taskloop_simd:
-  case OMPD_parallel_master_taskloop:
-  case OMPD_parallel_master_taskloop_simd:
-  case OMPD_requires:
-  case OMPD_unknown:
-  default:
-    break;
-  }
-  llvm_unreachable(
-      "Unknown programming model for OpenMP directive on NVPTX target.");
-}
-
-/// Check if the directive is loops based and has schedule clause at all or has
-/// static scheduling.
-static bool hasStaticScheduling(const OMPExecutableDirective &D) {
-  assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) &&
-         isOpenMPLoopDirective(D.getDirectiveKind()) &&
-         "Expected loop-based directive.");
-  return !D.hasClausesOfKind() &&
-         (!D.hasClausesOfKind() ||
-          llvm::any_of(D.getClausesOfKind(),
-                       [](const OMPScheduleClause *C) {
-                         return C->getScheduleKind() == OMPC_SCHEDULE_static;
-                       }));
-}
-
-/// Check for inner (nested) lightweight runtime construct, if any
-static bool hasNestedLightweightDirective(ASTContext &Ctx,
-                                          const OMPExecutableDirective &D) {
-  assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive.");
-  const auto *CS = D.getInnermostCapturedStmt();
-  const auto *Body =
-      CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
-  const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-
-  if (const auto *NestedDir =
-          dyn_cast_or_null(ChildStmt)) {
-    OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
-    switch (D.getDirectiveKind()) {
-    case OMPD_target:
-      if (isOpenMPParallelDirective(DKind) &&
-          isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
-          hasStaticScheduling(*NestedDir))
-        return true;
-      if (DKind == OMPD_teams_distribute_simd || DKind == OMPD_simd)
-        return true;
-      if (DKind == OMPD_parallel) {
-        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
-            /*IgnoreCaptured=*/true);
-        if (!Body)
-          return false;
-        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-        if (const auto *NND =
-                dyn_cast_or_null(ChildStmt)) {
-          DKind = NND->getDirectiveKind();
-          if (isOpenMPWorksharingDirective(DKind) &&
-              isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
-            return true;
-        }
-      } else if (DKind == OMPD_teams) {
-        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
-            /*IgnoreCaptured=*/true);
-        if (!Body)
-          return false;
-        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-        if (const auto *NND =
-                dyn_cast_or_null(ChildStmt)) {
-          DKind = NND->getDirectiveKind();
-          if (isOpenMPParallelDirective(DKind) &&
-              isOpenMPWorksharingDirective(DKind) &&
-              isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
-            return true;
-          if (DKind == OMPD_parallel) {
-            Body = NND->getInnermostCapturedStmt()->IgnoreContainers(
-                /*IgnoreCaptured=*/true);
-            if (!Body)
-              return false;
-            ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-            if (const auto *NND =
-                    dyn_cast_or_null(ChildStmt)) {
-              DKind = NND->getDirectiveKind();
-              if (isOpenMPWorksharingDirective(DKind) &&
-                  isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
-                return true;
-            }
-          }
-        }
-      }
-      return false;
-    case OMPD_target_teams:
-      if (isOpenMPParallelDirective(DKind) &&
-          isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
-          hasStaticScheduling(*NestedDir))
-        return true;
-      if (DKind == OMPD_distribute_simd || DKind == OMPD_simd)
-        return true;
-      if (DKind == OMPD_parallel) {
-        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
-            /*IgnoreCaptured=*/true);
-        if (!Body)
-          return false;
-        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-        if (const auto *NND =
-                dyn_cast_or_null(ChildStmt)) {
-          DKind = NND->getDirectiveKind();
-          if (isOpenMPWorksharingDirective(DKind) &&
-              isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
-            return true;
-        }
-      }
-      return false;
-    case OMPD_target_parallel:
-      if (DKind == OMPD_simd)
-        return true;
-      return isOpenMPWorksharingDirective(DKind) &&
-             isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir);
-    case OMPD_target_teams_distribute:
-    case OMPD_target_simd:
-    case OMPD_target_parallel_for:
-    case OMPD_target_parallel_for_simd:
-    case OMPD_target_teams_distribute_simd:
-    case OMPD_target_teams_distribute_parallel_for:
-    case OMPD_target_teams_distribute_parallel_for_simd:
-    case OMPD_parallel:
-    case OMPD_for:
-    case OMPD_parallel_for:
-    case OMPD_parallel_master:
-    case OMPD_parallel_sections:
-    case OMPD_for_simd:
-    case OMPD_parallel_for_simd:
-    case OMPD_cancel:
-    case OMPD_cancellation_point:
-    case OMPD_ordered:
-    case OMPD_threadprivate:
-    case OMPD_allocate:
-    case OMPD_task:
-    case OMPD_simd:
-    case OMPD_sections:
-    case OMPD_section:
-    case OMPD_single:
-    case OMPD_master:
-    case OMPD_critical:
-    case OMPD_taskyield:
-    case OMPD_barrier:
-    case OMPD_taskwait:
-    case OMPD_taskgroup:
-    case OMPD_atomic:
-    case OMPD_flush:
-    case OMPD_depobj:
-    case OMPD_scan:
-    case OMPD_teams:
-    case OMPD_target_data:
-    case OMPD_target_exit_data:
-    case OMPD_target_enter_data:
-    case OMPD_distribute:
-    case OMPD_distribute_simd:
-    case OMPD_distribute_parallel_for:
-    case OMPD_distribute_parallel_for_simd:
-    case OMPD_teams_distribute:
-    case OMPD_teams_distribute_simd:
-    case OMPD_teams_distribute_parallel_for:
-    case OMPD_teams_distribute_parallel_for_simd:
-    case OMPD_target_update:
-    case OMPD_declare_simd:
-    case OMPD_declare_variant:
-    case OMPD_begin_declare_variant:
-    case OMPD_end_declare_variant:
-    case OMPD_declare_target:
-    case OMPD_end_declare_target:
-    case OMPD_declare_reduction:
-    case OMPD_declare_mapper:
-    case OMPD_taskloop:
-    case OMPD_taskloop_simd:
-    case OMPD_master_taskloop:
-    case OMPD_master_taskloop_simd:
-    case OMPD_parallel_master_taskloop:
-    case OMPD_parallel_master_taskloop_simd:
-    case OMPD_requires:
-    case OMPD_unknown:
-    default:
-      llvm_unreachable("Unexpected directive.");
-    }
-  }
-
-  return false;
-}
-
-/// Checks if the construct supports lightweight runtime. It must be SPMD
-/// construct + inner loop-based construct with static scheduling.
-static bool supportsLightweightRuntime(ASTContext &Ctx,
-                                       const OMPExecutableDirective &D) {
-  if (!supportsSPMDExecutionMode(Ctx, D))
-    return false;
-  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
-  switch (DirectiveKind) {
-  case OMPD_target:
-  case OMPD_target_teams:
-  case OMPD_target_parallel:
-    return hasNestedLightweightDirective(Ctx, D);
-  case OMPD_target_parallel_for:
-  case OMPD_target_parallel_for_simd:
-  case OMPD_target_teams_distribute_parallel_for:
-  case OMPD_target_teams_distribute_parallel_for_simd:
-    // (Last|First)-privates must be shared in parallel region.
-    return hasStaticScheduling(D);
-  case OMPD_target_simd:
-  case OMPD_target_teams_distribute_simd:
-    return true;
-  case OMPD_target_teams_distribute:
-    return false;
-  case OMPD_parallel:
-  case OMPD_for:
-  case OMPD_parallel_for:
-  case OMPD_parallel_master:
-  case OMPD_parallel_sections:
-  case OMPD_for_simd:
-  case OMPD_parallel_for_simd:
-  case OMPD_cancel:
-  case OMPD_cancellation_point:
-  case OMPD_ordered:
-  case OMPD_threadprivate:
-  case OMPD_allocate:
-  case OMPD_task:
-  case OMPD_simd:
-  case OMPD_sections:
-  case OMPD_section:
-  case OMPD_single:
-  case OMPD_master:
-  case OMPD_critical:
-  case OMPD_taskyield:
-  case OMPD_barrier:
-  case OMPD_taskwait:
-  case OMPD_taskgroup:
-  case OMPD_atomic:
-  case OMPD_flush:
-  case OMPD_depobj:
-  case OMPD_scan:
-  case OMPD_teams:
-  case OMPD_target_data:
-  case OMPD_target_exit_data:
-  case OMPD_target_enter_data:
-  case OMPD_distribute:
-  case OMPD_distribute_simd:
-  case OMPD_distribute_parallel_for:
-  case OMPD_distribute_parallel_for_simd:
-  case OMPD_teams_distribute:
-  case OMPD_teams_distribute_simd:
-  case OMPD_teams_distribute_parallel_for:
-  case OMPD_teams_distribute_parallel_for_simd:
-  case OMPD_target_update:
-  case OMPD_declare_simd:
-  case OMPD_declare_variant:
-  case OMPD_begin_declare_variant:
-  case OMPD_end_declare_variant:
-  case OMPD_declare_target:
-  case OMPD_end_declare_target:
-  case OMPD_declare_reduction:
-  case OMPD_declare_mapper:
-  case OMPD_taskloop:
-  case OMPD_taskloop_simd:
-  case OMPD_master_taskloop:
-  case OMPD_master_taskloop_simd:
-  case OMPD_parallel_master_taskloop:
-  case OMPD_parallel_master_taskloop_simd:
-  case OMPD_requires:
-  case OMPD_unknown:
-  default:
-    break;
-  }
-  llvm_unreachable(
-      "Unknown programming model for OpenMP directive on NVPTX target.");
-}
-
-void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D,
-                                             StringRef ParentName,
-                                             llvm::Function *&OutlinedFn,
-                                             llvm::Constant *&OutlinedFnID,
-                                             bool IsOffloadEntry,
-                                             const RegionCodeGenTy &CodeGen) {
-  ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode);
-  EntryFunctionState EST;
-  WorkerFunctionState WST(CGM, D.getBeginLoc());
-  Work.clear();
-  WrapperFunctionsMap.clear();
-
-  // Emit target region as a standalone region.
-  class NVPTXPrePostActionTy : public PrePostActionTy {
-    CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
-    CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
-
-  public:
-    NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
-                         CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
-        : EST(EST), WST(WST) {}
-    void Enter(CodeGenFunction &CGF) override {
-      auto &RT =
-          static_cast(CGF.CGM.getOpenMPRuntime());
-      RT.emitNonSPMDEntryHeader(CGF, EST, WST);
-      // Skip target region initialization.
-      RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
-    }
-    void Exit(CodeGenFunction &CGF) override {
-      auto &RT =
-          static_cast(CGF.CGM.getOpenMPRuntime());
-      RT.clearLocThreadIdInsertPt(CGF);
-      RT.emitNonSPMDEntryFooter(CGF, EST);
-    }
-  } Action(EST, WST);
-  CodeGen.setAction(Action);
-  IsInTTDRegion = true;
-  // Reserve place for the globalized memory.
-  GlobalizedRecords.emplace_back();
-  if (!KernelStaticGlobalized) {
-    KernelStaticGlobalized = new llvm::GlobalVariable(
-        CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
-        llvm::GlobalValue::InternalLinkage,
-        llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
-        "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
-        llvm::GlobalValue::NotThreadLocal,
-        CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
-  }
-  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
-                                   IsOffloadEntry, CodeGen);
-  IsInTTDRegion = false;
-
-  // Now change the name of the worker function to correspond to this target
-  // region's entry function.
-  WST.WorkerFn->setName(Twine(OutlinedFn->getName(), "_worker"));
-
-  // Create the worker function
-  emitWorkerFunction(WST);
-}
-
-// Setup NVPTX threads for master-worker OpenMP scheme.
-void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryHeader(CodeGenFunction &CGF,
-                                                  EntryFunctionState &EST,
-                                                  WorkerFunctionState &WST) {
-  CGBuilderTy &Bld = CGF.Builder;
-
-  llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker");
-  llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
-  llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
-  EST.ExitBB = CGF.createBasicBlock(".exit");
-
-  llvm::Value *IsWorker =
-      Bld.CreateICmpULT(getNVPTXThreadID(CGF), getThreadLimit(CGF));
-  Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
-
-  CGF.EmitBlock(WorkerBB);
-  emitCall(CGF, WST.Loc, WST.WorkerFn);
-  CGF.EmitBranch(EST.ExitBB);
-
-  CGF.EmitBlock(MasterCheckBB);
-  llvm::Value *IsMaster =
-      Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
-  Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB);
-
-  CGF.EmitBlock(MasterBB);
-  IsInTargetMasterThreadRegion = true;
-  // SEQUENTIAL (MASTER) REGION START
-  // First action in sequential region:
-  // Initialize the state of the OpenMP runtime library on the GPU.
-  // TODO: Optimize runtime initialization and pass in correct value.
-  llvm::Value *Args[] = {getThreadLimit(CGF),
-                         Bld.getInt16(/*RequiresOMPRuntime=*/1)};
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
-
-  // For data sharing, we need to initialize the stack.
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(
-          OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
-
-  emitGenericVarsProlog(CGF, WST.Loc);
-}
-
-void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryFooter(CodeGenFunction &CGF,
-                                                  EntryFunctionState &EST) {
-  IsInTargetMasterThreadRegion = false;
-  if (!CGF.HaveInsertPoint())
-    return;
-
-  emitGenericVarsEpilog(CGF);
-
-  if (!EST.ExitBB)
-    EST.ExitBB = CGF.createBasicBlock(".exit");
-
-  llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".termination.notifier");
-  CGF.EmitBranch(TerminateBB);
-
-  CGF.EmitBlock(TerminateBB);
-  // Signal termination condition.
-  // TODO: Optimize runtime initialization and pass in correct value.
-  llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)};
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args);
-  // Barrier to terminate worker threads.
-  syncCTAThreads(CGF);
-  // Master thread jumps to exit point.
-  CGF.EmitBranch(EST.ExitBB);
-
-  CGF.EmitBlock(EST.ExitBB);
-  EST.ExitBB = nullptr;
-}
-
-void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D,
-                                          StringRef ParentName,
-                                          llvm::Function *&OutlinedFn,
-                                          llvm::Constant *&OutlinedFnID,
-                                          bool IsOffloadEntry,
-                                          const RegionCodeGenTy &CodeGen) {
-  ExecutionRuntimeModesRAII ModeRAII(
-      CurrentExecutionMode, RequiresFullRuntime,
-      CGM.getLangOpts().OpenMPCUDAForceFullRuntime ||
-          !supportsLightweightRuntime(CGM.getContext(), D));
-  EntryFunctionState EST;
-
-  // Emit target region as a standalone region.
-  class NVPTXPrePostActionTy : public PrePostActionTy {
-    CGOpenMPRuntimeNVPTX &RT;
-    CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
-    const OMPExecutableDirective &D;
-
-  public:
-    NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
-                         CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
-                         const OMPExecutableDirective &D)
-        : RT(RT), EST(EST), D(D) {}
-    void Enter(CodeGenFunction &CGF) override {
-      RT.emitSPMDEntryHeader(CGF, EST, D);
-      // Skip target region initialization.
-      RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
-    }
-    void Exit(CodeGenFunction &CGF) override {
-      RT.clearLocThreadIdInsertPt(CGF);
-      RT.emitSPMDEntryFooter(CGF, EST);
-    }
-  } Action(*this, EST, D);
-  CodeGen.setAction(Action);
-  IsInTTDRegion = true;
-  // Reserve place for the globalized memory.
-  GlobalizedRecords.emplace_back();
-  if (!KernelStaticGlobalized) {
-    KernelStaticGlobalized = new llvm::GlobalVariable(
-        CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
-        llvm::GlobalValue::InternalLinkage,
-        llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
-        "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
-        llvm::GlobalValue::NotThreadLocal,
-        CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
-  }
-  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
-                                   IsOffloadEntry, CodeGen);
-  IsInTTDRegion = false;
-}
-
-void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
-    CodeGenFunction &CGF, EntryFunctionState &EST,
-    const OMPExecutableDirective &D) {
-  CGBuilderTy &Bld = CGF.Builder;
-
-  // Setup BBs in entry function.
-  llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute");
-  EST.ExitBB = CGF.createBasicBlock(".exit");
-
-  llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
-                         /*RequiresOMPRuntime=*/
-                         Bld.getInt16(RequiresFullRuntime ? 1 : 0),
-                         /*RequiresDataSharing=*/Bld.getInt16(0)};
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
-
-  if (RequiresFullRuntime) {
-    // For data sharing, we need to initialize the stack.
-    CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
-        OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
-  }
-
-  CGF.EmitBranch(ExecuteBB);
-
-  CGF.EmitBlock(ExecuteBB);
-
-  IsInTargetMasterThreadRegion = true;
-}
-
-void CGOpenMPRuntimeNVPTX::emitSPMDEntryFooter(CodeGenFunction &CGF,
-                                               EntryFunctionState &EST) {
-  IsInTargetMasterThreadRegion = false;
-  if (!CGF.HaveInsertPoint())
-    return;
-
-  if (!EST.ExitBB)
-    EST.ExitBB = CGF.createBasicBlock(".exit");
-
-  llvm::BasicBlock *OMPDeInitBB = CGF.createBasicBlock(".omp.deinit");
-  CGF.EmitBranch(OMPDeInitBB);
-
-  CGF.EmitBlock(OMPDeInitBB);
-  // DeInitialize the OMP state in the runtime; called by all active threads.
-  llvm::Value *Args[] = {/*RequiresOMPRuntime=*/
-                         CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)};
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(
-          OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args);
-  CGF.EmitBranch(EST.ExitBB);
-
-  CGF.EmitBlock(EST.ExitBB);
-  EST.ExitBB = nullptr;
-}
-
-// Create a unique global variable to indicate the execution mode of this target
-// region. The execution mode is either 'generic', or 'spmd' depending on the
-// target directive. This variable is picked up by the offload library to setup
-// the device appropriately before kernel launch. If the execution mode is
-// 'generic', the runtime reserves one warp for the master, otherwise, all
-// warps participate in parallel work.
-static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
-                                     bool Mode) {
-  auto *GVMode =
-      new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
-                               llvm::GlobalValue::WeakAnyLinkage,
-                               llvm::ConstantInt::get(CGM.Int8Ty, Mode ? 0 : 1),
-                               Twine(Name, "_exec_mode"));
-  CGM.addCompilerUsedGlobal(GVMode);
-}
-
-void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
-  ASTContext &Ctx = CGM.getContext();
-
-  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
-  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, WST.CGFI, {},
-                    WST.Loc, WST.Loc);
-  emitWorkerLoop(CGF, WST);
-  CGF.FinishFunction();
-}
-
-void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
-                                          WorkerFunctionState &WST) {
-  //
-  // The workers enter this loop and wait for parallel work from the master.
-  // When the master encounters a parallel region it sets up the work + variable
-  // arguments, and wakes up the workers.  The workers first check to see if
-  // they are required for the parallel region, i.e., within the # of requested
-  // parallel threads.  The activated workers load the variable arguments and
-  // execute the parallel work.
-  //
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  llvm::BasicBlock *AwaitBB = CGF.createBasicBlock(".await.work");
-  llvm::BasicBlock *SelectWorkersBB = CGF.createBasicBlock(".select.workers");
-  llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute.parallel");
-  llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".terminate.parallel");
-  llvm::BasicBlock *BarrierBB = CGF.createBasicBlock(".barrier.parallel");
-  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
-
-  CGF.EmitBranch(AwaitBB);
-
-  // Workers wait for work from master.
-  CGF.EmitBlock(AwaitBB);
-  // Wait for parallel work
-  syncCTAThreads(CGF);
-
-  Address WorkFn =
-      CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrTy, /*Name=*/"work_fn");
-  Address ExecStatus =
-      CGF.CreateDefaultAlignTempAlloca(CGF.Int8Ty, /*Name=*/"exec_status");
-  CGF.InitTempAlloca(ExecStatus, Bld.getInt8(/*C=*/0));
-  CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy));
-
-  // TODO: Optimize runtime initialization and pass in correct value.
-  llvm::Value *Args[] = {WorkFn.getPointer(),
-                         /*RequiresOMPRuntime=*/Bld.getInt16(1)};
-  llvm::Value *Ret = CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
-  Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
-
-  // On termination condition (workid == 0), exit loop.
-  llvm::Value *WorkID = Bld.CreateLoad(WorkFn);
-  llvm::Value *ShouldTerminate = Bld.CreateIsNull(WorkID, "should_terminate");
-  Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
-
-  // Activate requested workers.
-  CGF.EmitBlock(SelectWorkersBB);
-  llvm::Value *IsActive =
-      Bld.CreateIsNotNull(Bld.CreateLoad(ExecStatus), "is_active");
-  Bld.CreateCondBr(IsActive, ExecuteBB, BarrierBB);
-
-  // Signal start of parallel region.
-  CGF.EmitBlock(ExecuteBB);
-  // Skip initialization.
-  setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
-
-  // Process work items: outlined parallel functions.
-  for (llvm::Function *W : Work) {
-    // Try to match this outlined function.
-    llvm::Value *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy);
-
-    llvm::Value *WorkFnMatch =
-        Bld.CreateICmpEQ(Bld.CreateLoad(WorkFn), ID, "work_match");
-
-    llvm::BasicBlock *ExecuteFNBB = CGF.createBasicBlock(".execute.fn");
-    llvm::BasicBlock *CheckNextBB = CGF.createBasicBlock(".check.next");
-    Bld.CreateCondBr(WorkFnMatch, ExecuteFNBB, CheckNextBB);
-
-    // Execute this outlined function.
-    CGF.EmitBlock(ExecuteFNBB);
-
-    // Insert call to work function via shared wrapper. The shared
-    // wrapper takes two arguments:
-    //   - the parallelism level;
-    //   - the thread ID;
-    emitCall(CGF, WST.Loc, W,
-             {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
-
-    // Go to end of parallel region.
-    CGF.EmitBranch(TerminateBB);
-
-    CGF.EmitBlock(CheckNextBB);
-  }
-  // Default case: call to outlined function through pointer if the target
-  // region makes a declare target call that may contain an orphaned parallel
-  // directive.
-  auto *ParallelFnTy =
-      llvm::FunctionType::get(CGM.VoidTy, {CGM.Int16Ty, CGM.Int32Ty},
-                              /*isVarArg=*/false);
-  llvm::Value *WorkFnCast =
-      Bld.CreateBitCast(WorkID, ParallelFnTy->getPointerTo());
-  // Insert call to work function via shared wrapper. The shared
-  // wrapper takes two arguments:
-  //   - the parallelism level;
-  //   - the thread ID;
-  emitCall(CGF, WST.Loc, {ParallelFnTy, WorkFnCast},
-           {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
-  // Go to end of parallel region.
-  CGF.EmitBranch(TerminateBB);
-
-  // Signal end of parallel region.
-  CGF.EmitBlock(TerminateBB);
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel),
-      llvm::None);
-  CGF.EmitBranch(BarrierBB);
-
-  // All active and inactive workers wait at a barrier after parallel region.
-  CGF.EmitBlock(BarrierBB);
-  // Barrier after parallel region.
-  syncCTAThreads(CGF);
-  CGF.EmitBranch(AwaitBB);
-
-  // Exit target region.
-  CGF.EmitBlock(ExitBB);
-  // Skip initialization.
-  clearLocThreadIdInsertPt(CGF);
-}
-
-/// Returns specified OpenMP runtime function for the current OpenMP
-/// implementation.  Specialized for the NVPTX device.
-/// \param Function OpenMP runtime function.
-/// \return Specified function.
-llvm::FunctionCallee
-CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
-  llvm::FunctionCallee RTLFn = nullptr;
-  switch (static_cast(Function)) {
-  case OMPRTL_NVPTX__kmpc_kernel_init: {
-    // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t
-    // RequiresOMPRuntime);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_kernel_deinit: {
-    // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
-    llvm::Type *TypeParams[] = {CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
-    // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
-    // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: {
-    // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
-    llvm::Type *TypeParams[] = {CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
-    /// Build void __kmpc_kernel_prepare_parallel(
-    /// void *outlined_function, int16_t IsOMPRuntimeInitialized);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrTy, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_kernel_parallel: {
-    /// Build bool __kmpc_kernel_parallel(void **outlined_function,
-    /// int16_t IsOMPRuntimeInitialized);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, CGM.Int16Ty};
-    llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
-    auto *FnTy =
-        llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
-    /// Build void __kmpc_kernel_end_parallel();
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_serialized_parallel: {
-    // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
-    // global_tid);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_end_serialized_parallel: {
-    // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
-    // global_tid);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_shuffle_int32: {
-    // Build int32_t __kmpc_shuffle_int32(int32_t element,
-    // int16_t lane_offset, int16_t warp_size);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_shuffle_int64: {
-    // Build int64_t __kmpc_shuffle_int64(int64_t element,
-    // int16_t lane_offset, int16_t warp_size);
-    llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: {
-    // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc,
-    // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void*
-    // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t
-    // lane_id, int16_t lane_offset, int16_t Algorithm Version), void
-    // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
-    llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
-                                             CGM.Int16Ty, CGM.Int16Ty};
-    auto *ShuffleReduceFnTy =
-        llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
-                                /*isVarArg=*/false);
-    llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
-    auto *InterWarpCopyFnTy =
-        llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
-                                /*isVarArg=*/false);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
-                                CGM.Int32Ty,
-                                CGM.Int32Ty,
-                                CGM.SizeTy,
-                                CGM.VoidPtrTy,
-                                ShuffleReduceFnTy->getPointerTo(),
-                                InterWarpCopyFnTy->getPointerTo()};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(
-        FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
-    // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(
-        FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: {
-    // Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
-    // global_tid, void *global_buffer, int32_t num_of_records, void*
-    // reduce_data,
-    // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
-    // lane_offset, int16_t shortCircuit),
-    // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
-    // (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
-    // void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
-    // void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
-    // int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
-    // *buffer, int idx, void *reduce_data));
-    llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
-                                             CGM.Int16Ty, CGM.Int16Ty};
-    auto *ShuffleReduceFnTy =
-        llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
-                                /*isVarArg=*/false);
-    llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
-    auto *InterWarpCopyFnTy =
-        llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
-                                /*isVarArg=*/false);
-    llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy,
-                                          CGM.VoidPtrTy};
-    auto *GlobalListFnTy =
-        llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams,
-                                /*isVarArg=*/false);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
-                                CGM.Int32Ty,
-                                CGM.VoidPtrTy,
-                                CGM.Int32Ty,
-                                CGM.VoidPtrTy,
-                                ShuffleReduceFnTy->getPointerTo(),
-                                InterWarpCopyFnTy->getPointerTo(),
-                                GlobalListFnTy->getPointerTo(),
-                                GlobalListFnTy->getPointerTo(),
-                                GlobalListFnTy->getPointerTo(),
-                                GlobalListFnTy->getPointerTo()};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(
-        FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
-    /// Build void __kmpc_data_sharing_init_stack();
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: {
-    /// Build void __kmpc_data_sharing_init_stack_spmd();
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
-    RTLFn =
-        CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: {
-    // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size,
-    // int16_t UseSharedMemory);
-    llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(
-        FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
-    // Build void *__kmpc_data_sharing_push_stack(size_t size, int16_t
-    // UseSharedMemory);
-    llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(
-        FnTy, /*Name=*/"__kmpc_data_sharing_push_stack");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
-    // Build void __kmpc_data_sharing_pop_stack(void *a);
-    llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy,
-                                      /*Name=*/"__kmpc_data_sharing_pop_stack");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_begin_sharing_variables: {
-    /// Build void __kmpc_begin_sharing_variables(void ***args,
-    /// size_t n_args);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_end_sharing_variables: {
-    /// Build void __kmpc_end_sharing_variables();
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_get_shared_variables: {
-    /// Build void __kmpc_get_shared_variables(void ***GlobalArgs);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_parallel_level: {
-    // Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: {
-    // Build int8_t __kmpc_is_spmd_exec_mode();
-    auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_get_team_static_memory: {
-    // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
-    // const void *buf, size_t size, int16_t is_shared, const void **res);
-    llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy,
-                                CGM.Int16Ty, CGM.VoidPtrPtrTy};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_restore_team_static_memory: {
-    // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
-    // int16_t is_shared);
-    llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
-    RTLFn =
-        CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory");
-    break;
-  }
-  case OMPRTL__kmpc_barrier: {
-    // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn =
-        CGM.CreateConvergentRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
-    break;
-  }
-  case OMPRTL__kmpc_barrier_simple_spmd: {
-    // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
-    // global_tid);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateConvergentRuntimeFunction(
-        FnTy, /*Name*/ "__kmpc_barrier_simple_spmd");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: {
-    // Build int32_t __kmpc_warp_active_thread_mask(void);
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false);
-    RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_syncwarp: {
-    // Build void __kmpc_syncwarp(kmp_int32 Mask);
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false);
-    RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_syncwarp");
-    break;
-  }
-  }
-  return RTLFn;
-}
-
-void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID,
-                                              llvm::Constant *Addr,
-                                              uint64_t Size, int32_t,
-                                              llvm::GlobalValue::LinkageTypes) {
-  // TODO: Add support for global variables on the device after declare target
-  // support.
-  if (!isa(Addr))
-    return;
-  llvm::Module &M = CGM.getModule();
-  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
-
-  // Get "nvvm.annotations" metadata node
-  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
-
-  llvm::Metadata *MDVals[] = {
-      llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx, "kernel"),
-      llvm::ConstantAsMetadata::get(
-          llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
-  // Append metadata to nvvm.annotations
-  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
-}
-
-void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
-    const OMPExecutableDirective &D, StringRef ParentName,
-    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
-    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
-  if (!IsOffloadEntry) // Nothing to do.
-    return;
-
-  assert(!ParentName.empty() && "Invalid target region parent name!");
-
-  bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
-  if (Mode)
-    emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
-                   CodeGen);
-  else
-    emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
-                      CodeGen);
-
-  setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
-}
-
-namespace {
-LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
-/// Enum for accesseing the reserved_2 field of the ident_t struct.
-enum ModeFlagsTy : unsigned {
-  /// Bit set to 1 when in SPMD mode.
-  KMP_IDENT_SPMD_MODE = 0x01,
-  /// Bit set to 1 when a simplified runtime is used.
-  KMP_IDENT_SIMPLE_RT_MODE = 0x02,
-  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/KMP_IDENT_SIMPLE_RT_MODE)
-};
-
-/// Special mode Undefined. Is the combination of Non-SPMD mode + SimpleRuntime.
-static const ModeFlagsTy UndefinedMode =
-    (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE;
-} // anonymous namespace
-
-unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const {
-  switch (getExecutionMode()) {
-  case EM_SPMD:
-    if (requiresFullRuntime())
-      return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE);
-    return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE;
-  case EM_NonSPMD:
-    assert(requiresFullRuntime() && "Expected full runtime.");
-    return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE);
-  case EM_Unknown:
-    return UndefinedMode;
-  }
-  llvm_unreachable("Unknown flags are requested.");
-}
-
-CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
-    : CGOpenMPRuntime(CGM, "_", "$") {
-  if (!CGM.getLangOpts().OpenMPIsDevice)
-    llvm_unreachable("OpenMP NVPTX can only handle device code.");
-}
-
-void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
-                                              ProcBindKind ProcBind,
-                                              SourceLocation Loc) {
-  // Do nothing in case of SPMD mode and L0 parallel.
-  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
-    return;
-
-  CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
-}
-
-void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF,
-                                                llvm::Value *NumThreads,
-                                                SourceLocation Loc) {
-  // Do nothing in case of SPMD mode and L0 parallel.
-  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
-    return;
-
-  CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
-}
-
-void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
-                                              const Expr *NumTeams,
-                                              const Expr *ThreadLimit,
-                                              SourceLocation Loc) {}
-
-llvm::Function *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
-    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
-    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
-  // Emit target region as a standalone region.
-  class NVPTXPrePostActionTy : public PrePostActionTy {
-    bool &IsInParallelRegion;
-    bool PrevIsInParallelRegion;
-
-  public:
-    NVPTXPrePostActionTy(bool &IsInParallelRegion)
-        : IsInParallelRegion(IsInParallelRegion) {}
-    void Enter(CodeGenFunction &CGF) override {
-      PrevIsInParallelRegion = IsInParallelRegion;
-      IsInParallelRegion = true;
-    }
-    void Exit(CodeGenFunction &CGF) override {
-      IsInParallelRegion = PrevIsInParallelRegion;
-    }
-  } Action(IsInParallelRegion);
-  CodeGen.setAction(Action);
-  bool PrevIsInTTDRegion = IsInTTDRegion;
-  IsInTTDRegion = false;
-  bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
-  IsInTargetMasterThreadRegion = false;
-  auto *OutlinedFun =
-      cast(CGOpenMPRuntime::emitParallelOutlinedFunction(
-          D, ThreadIDVar, InnermostKind, CodeGen));
-  if (CGM.getLangOpts().Optimize) {
-    OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
-    OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
-    OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
-  }
-  IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
-  IsInTTDRegion = PrevIsInTTDRegion;
-  if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD &&
-      !IsInParallelRegion) {
-    llvm::Function *WrapperFun =
-        createParallelDataSharingWrapper(OutlinedFun, D);
-    WrapperFunctionsMap[OutlinedFun] = WrapperFun;
-  }
-
-  return OutlinedFun;
-}
-
-/// Get list of lastprivate variables from the teams distribute ... or
-/// teams {distribute ...} directives.
-static void
-getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D,
-                             llvm::SmallVectorImpl &Vars) {
-  assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
-         "expected teams directive.");
-  const OMPExecutableDirective *Dir = &D;
-  if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
-    if (const Stmt *S = CGOpenMPRuntime::getSingleCompoundChild(
-            Ctx,
-            D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
-                /*IgnoreCaptured=*/true))) {
-      Dir = dyn_cast_or_null(S);
-      if (Dir && !isOpenMPDistributeDirective(Dir->getDirectiveKind()))
-        Dir = nullptr;
-    }
-  }
-  if (!Dir)
-    return;
-  for (const auto *C : Dir->getClausesOfKind()) {
-    for (const Expr *E : C->getVarRefs())
-      Vars.push_back(getPrivateItem(E));
-  }
-}
-
-/// Get list of reduction variables from the teams ... directives.
-static void
-getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D,
-                      llvm::SmallVectorImpl &Vars) {
-  assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
-         "expected teams directive.");
-  for (const auto *C : D.getClausesOfKind()) {
-    for (const Expr *E : C->privates())
-      Vars.push_back(getPrivateItem(E));
-  }
-}
-
-llvm::Function *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
-    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
-    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
-  SourceLocation Loc = D.getBeginLoc();
-
-  const RecordDecl *GlobalizedRD = nullptr;
-  llvm::SmallVector LastPrivatesReductions;
-  llvm::SmallDenseMap MappedDeclsFields;
-  // Globalize team reductions variable unconditionally in all modes.
-  if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
-    getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
-  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
-    getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions);
-    if (!LastPrivatesReductions.empty()) {
-      GlobalizedRD = ::buildRecordForGlobalizedVars(
-          CGM.getContext(), llvm::None, LastPrivatesReductions,
-          MappedDeclsFields, WarpSize);
-    }
-  } else if (!LastPrivatesReductions.empty()) {
-    assert(!TeamAndReductions.first &&
-           "Previous team declaration is not expected.");
-    TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl();
-    std::swap(TeamAndReductions.second, LastPrivatesReductions);
-  }
-
-  // Emit target region as a standalone region.
-  class NVPTXPrePostActionTy : public PrePostActionTy {
-    SourceLocation &Loc;
-    const RecordDecl *GlobalizedRD;
-    llvm::SmallDenseMap
-        &MappedDeclsFields;
-
-  public:
-    NVPTXPrePostActionTy(
-        SourceLocation &Loc, const RecordDecl *GlobalizedRD,
-        llvm::SmallDenseMap
-            &MappedDeclsFields)
-        : Loc(Loc), GlobalizedRD(GlobalizedRD),
-          MappedDeclsFields(MappedDeclsFields) {}
-    void Enter(CodeGenFunction &CGF) override {
-      auto &Rt =
-          static_cast(CGF.CGM.getOpenMPRuntime());
-      if (GlobalizedRD) {
-        auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
-        I->getSecond().GlobalRecord = GlobalizedRD;
-        I->getSecond().MappedParams =
-            std::make_unique();
-        DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
-        for (const auto &Pair : MappedDeclsFields) {
-          assert(Pair.getFirst()->isCanonicalDecl() &&
-                 "Expected canonical declaration");
-          Data.insert(std::make_pair(Pair.getFirst(),
-                                     MappedVarData(Pair.getSecond(),
-                                                   /*IsOnePerTeam=*/true)));
-        }
-      }
-      Rt.emitGenericVarsProlog(CGF, Loc);
-    }
-    void Exit(CodeGenFunction &CGF) override {
-      static_cast(CGF.CGM.getOpenMPRuntime())
-          .emitGenericVarsEpilog(CGF);
-    }
-  } Action(Loc, GlobalizedRD, MappedDeclsFields);
-  CodeGen.setAction(Action);
-  llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction(
-      D, ThreadIDVar, InnermostKind, CodeGen);
-  if (CGM.getLangOpts().Optimize) {
-    OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
-    OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
-    OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
-  }
-
-  return OutlinedFun;
-}
-
-void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
-                                                 SourceLocation Loc,
-                                                 bool WithSPMDCheck) {
-  if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic &&
-      getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
-    return;
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
-  if (I == FunctionGlobalizedDecls.end())
-    return;
-  if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {
-    QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
-    QualType SecGlobalRecTy;
-
-    // Recover pointer to this function's global record. The runtime will
-    // handle the specifics of the allocation of the memory.
-    // Use actual memory size of the record including the padding
-    // for alignment purposes.
-    unsigned Alignment =
-        CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity();
-    unsigned GlobalRecordSize =
-        CGM.getContext().getTypeSizeInChars(GlobalRecTy).getQuantity();
-    GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
-
-    llvm::PointerType *GlobalRecPtrTy =
-        CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo();
-    llvm::Value *GlobalRecCastAddr;
-    llvm::Value *IsTTD = nullptr;
-    if (!IsInTTDRegion &&
-        (WithSPMDCheck ||
-         getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) {
-      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
-      llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");
-      llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
-      if (I->getSecond().SecondaryGlobalRecord.hasValue()) {
-        llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
-        llvm::Value *ThreadID = getThreadID(CGF, Loc);
-        llvm::Value *PL = CGF.EmitRuntimeCall(
-            createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
-            {RTLoc, ThreadID});
-        IsTTD = Bld.CreateIsNull(PL);
-      }
-      llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
-      Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
-      // There is no need to emit line number for unconditional branch.
-      (void)ApplyDebugLocation::CreateEmpty(CGF);
-      CGF.EmitBlock(SPMDBB);
-      Address RecPtr = Address(llvm::ConstantPointerNull::get(GlobalRecPtrTy),
-                               CharUnits::fromQuantity(Alignment));
-      CGF.EmitBranch(ExitBB);
-      // There is no need to emit line number for unconditional branch.
-      (void)ApplyDebugLocation::CreateEmpty(CGF);
-      CGF.EmitBlock(NonSPMDBB);
-      llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize);
-      if (const RecordDecl *SecGlobalizedVarsRecord =
-              I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) {
-        SecGlobalRecTy =
-            CGM.getContext().getRecordType(SecGlobalizedVarsRecord);
-
-        // Recover pointer to this function's global record. The runtime will
-        // handle the specifics of the allocation of the memory.
-        // Use actual memory size of the record including the padding
-        // for alignment purposes.
-        unsigned Alignment =
-            CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity();
-        unsigned GlobalRecordSize =
-            CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity();
-        GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
-        Size = Bld.CreateSelect(
-            IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), Size);
-      }
-      // TODO: allow the usage of shared memory to be controlled by
-      // the user, for now, default to global.
-      llvm::Value *GlobalRecordSizeArg[] = {
-          Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
-      llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(
-              OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
-          GlobalRecordSizeArg);
-      GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-          GlobalRecValue, GlobalRecPtrTy);
-      CGF.EmitBlock(ExitBB);
-      auto *Phi = Bld.CreatePHI(GlobalRecPtrTy,
-                                /*NumReservedValues=*/2, "_select_stack");
-      Phi->addIncoming(RecPtr.getPointer(), SPMDBB);
-      Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB);
-      GlobalRecCastAddr = Phi;
-      I->getSecond().GlobalRecordAddr = Phi;
-      I->getSecond().IsInSPMDModeFlag = IsSPMD;
-    } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) {
-      assert(GlobalizedRecords.back().Records.size() < 2 &&
-             "Expected less than 2 globalized records: one for target and one "
-             "for teams.");
-      unsigned Offset = 0;
-      for (const RecordDecl *RD : GlobalizedRecords.back().Records) {
-        QualType RDTy = CGM.getContext().getRecordType(RD);
-        unsigned Alignment =
-            CGM.getContext().getTypeAlignInChars(RDTy).getQuantity();
-        unsigned Size = CGM.getContext().getTypeSizeInChars(RDTy).getQuantity();
-        Offset =
-            llvm::alignTo(llvm::alignTo(Offset, Alignment) + Size, Alignment);
-      }
-      unsigned Alignment =
-          CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity();
-      Offset = llvm::alignTo(Offset, Alignment);
-      GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord);
-      ++GlobalizedRecords.back().RegionCounter;
-      if (GlobalizedRecords.back().Records.size() == 1) {
-        assert(KernelStaticGlobalized &&
-               "Kernel static pointer must be initialized already.");
-        auto *UseSharedMemory = new llvm::GlobalVariable(
-            CGM.getModule(), CGM.Int16Ty, /*isConstant=*/true,
-            llvm::GlobalValue::InternalLinkage, nullptr,
-            "_openmp_static_kernel$is_shared");
-        UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-        QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
-            /*DestWidth=*/16, /*Signed=*/0);
-        llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
-            Address(UseSharedMemory,
-                    CGM.getContext().getTypeAlignInChars(Int16Ty)),
-            /*Volatile=*/false, Int16Ty, Loc);
-        auto *StaticGlobalized = new llvm::GlobalVariable(
-            CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false,
-            llvm::GlobalValue::CommonLinkage, nullptr);
-        auto *RecSize = new llvm::GlobalVariable(
-            CGM.getModule(), CGM.SizeTy, /*isConstant=*/true,
-            llvm::GlobalValue::InternalLinkage, nullptr,
-            "_openmp_static_kernel$size");
-        RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-        llvm::Value *Ld = CGF.EmitLoadOfScalar(
-            Address(RecSize, CGM.getSizeAlign()), /*Volatile=*/false,
-            CGM.getContext().getSizeType(), Loc);
-        llvm::Value *ResAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-            KernelStaticGlobalized, CGM.VoidPtrPtrTy);
-        llvm::Value *GlobalRecordSizeArg[] = {
-            llvm::ConstantInt::get(
-                CGM.Int16Ty,
-                getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD ? 1 : 0),
-            StaticGlobalized, Ld, IsInSharedMemory, ResAddr};
-        CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
-                                OMPRTL_NVPTX__kmpc_get_team_static_memory),
-                            GlobalRecordSizeArg);
-        GlobalizedRecords.back().Buffer = StaticGlobalized;
-        GlobalizedRecords.back().RecSize = RecSize;
-        GlobalizedRecords.back().UseSharedMemory = UseSharedMemory;
-        GlobalizedRecords.back().Loc = Loc;
-      }
-      assert(KernelStaticGlobalized && "Global address must be set already.");
-      Address FrameAddr = CGF.EmitLoadOfPointer(
-          Address(KernelStaticGlobalized, CGM.getPointerAlign()),
-          CGM.getContext()
-              .getPointerType(CGM.getContext().VoidPtrTy)
-              .castAs());
-      llvm::Value *GlobalRecValue =
-          Bld.CreateConstInBoundsGEP(FrameAddr, Offset).getPointer();
-      I->getSecond().GlobalRecordAddr = GlobalRecValue;
-      I->getSecond().IsInSPMDModeFlag = nullptr;
-      GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-          GlobalRecValue, CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo());
-    } else {
-      // TODO: allow the usage of shared memory to be controlled by
-      // the user, for now, default to global.
-      bool UseSharedMemory =
-          IsInTTDRegion && GlobalRecordSize <= SharedMemorySize;
-      llvm::Value *GlobalRecordSizeArg[] = {
-          llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
-          CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)};
-      llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(
-              IsInTTDRegion
-                  ? OMPRTL_NVPTX__kmpc_data_sharing_push_stack
-                  : OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
-          GlobalRecordSizeArg);
-      GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-          GlobalRecValue, GlobalRecPtrTy);
-      I->getSecond().GlobalRecordAddr = GlobalRecValue;
-      I->getSecond().IsInSPMDModeFlag = nullptr;
-    }
-    LValue Base =
-        CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, GlobalRecTy);
-
-    // Emit the "global alloca" which is a GEP from the global declaration
-    // record using the pointer returned by the runtime.
-    LValue SecBase;
-    decltype(I->getSecond().LocalVarData)::const_iterator SecIt;
-    if (IsTTD) {
-      SecIt = I->getSecond().SecondaryLocalVarData->begin();
-      llvm::PointerType *SecGlobalRecPtrTy =
-          CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo();
-      SecBase = CGF.MakeNaturalAlignPointeeAddrLValue(
-          Bld.CreatePointerBitCastOrAddrSpaceCast(
-              I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),
-          SecGlobalRecTy);
-    }
-    for (auto &Rec : I->getSecond().LocalVarData) {
-      bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
-      llvm::Value *ParValue;
-      if (EscapedParam) {
-        const auto *VD = cast(Rec.first);
-        LValue ParLVal =
-            CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
-        ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);
-      }
-      LValue VarAddr = CGF.EmitLValueForField(Base, Rec.second.FD);
-      // Emit VarAddr basing on lane-id if required.
-      QualType VarTy;
-      if (Rec.second.IsOnePerTeam) {
-        VarTy = Rec.second.FD->getType();
-      } else {
-        llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(
-            VarAddr.getAddress(CGF).getPointer(),
-            {Bld.getInt32(0), getNVPTXLaneID(CGF)});
-        VarTy =
-            Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType();
-        VarAddr = CGF.MakeAddrLValue(
-            Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy,
-            AlignmentSource::Decl);
-      }
-      Rec.second.PrivateAddr = VarAddr.getAddress(CGF);
-      if (!IsInTTDRegion &&
-          (WithSPMDCheck ||
-           getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) {
-        assert(I->getSecond().IsInSPMDModeFlag &&
-               "Expected unknown execution mode or required SPMD check.");
-        if (IsTTD) {
-          assert(SecIt->second.IsOnePerTeam &&
-                 "Secondary glob data must be one per team.");
-          LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD);
-          VarAddr.setAddress(
-              Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(CGF),
-                                       VarAddr.getPointer(CGF)),
-                      VarAddr.getAlignment()));
-          Rec.second.PrivateAddr = VarAddr.getAddress(CGF);
-        }
-        Address GlobalPtr = Rec.second.PrivateAddr;
-        Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName());
-        Rec.second.PrivateAddr = Address(
-            Bld.CreateSelect(I->getSecond().IsInSPMDModeFlag,
-                             LocalAddr.getPointer(), GlobalPtr.getPointer()),
-            LocalAddr.getAlignment());
-      }
-      if (EscapedParam) {
-        const auto *VD = cast(Rec.first);
-        CGF.EmitStoreOfScalar(ParValue, VarAddr);
-        I->getSecond().MappedParams->setVarAddr(CGF, VD,
-                                                VarAddr.getAddress(CGF));
-      }
-      if (IsTTD)
-        ++SecIt;
-    }
-  }
-  for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
-    // Recover pointer to this function's global record. The runtime will
-    // handle the specifics of the allocation of the memory.
-    // Use actual memory size of the record including the padding
-    // for alignment purposes.
-    CGBuilderTy &Bld = CGF.Builder;
-    llvm::Value *Size = CGF.getTypeSize(VD->getType());
-    CharUnits Align = CGM.getContext().getDeclAlign(VD);
-    Size = Bld.CreateNUWAdd(
-        Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1));
-    llvm::Value *AlignVal =
-        llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity());
-    Size = Bld.CreateUDiv(Size, AlignVal);
-    Size = Bld.CreateNUWMul(Size, AlignVal);
-    // TODO: allow the usage of shared memory to be controlled by
-    // the user, for now, default to global.
-    llvm::Value *GlobalRecordSizeArg[] = {
-        Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
-    llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
-        createNVPTXRuntimeFunction(
-            OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
-        GlobalRecordSizeArg);
-    llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-        GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo());
-    LValue Base = CGF.MakeAddrLValue(GlobalRecCastAddr, VD->getType(),
-                                     CGM.getContext().getDeclAlign(VD),
-                                     AlignmentSource::Decl);
-    I->getSecond().MappedParams->setVarAddr(CGF, cast(VD),
-                                            Base.getAddress(CGF));
-    I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(GlobalRecValue);
-  }
-  I->getSecond().MappedParams->apply(CGF);
-}
-
-void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF,
-                                                 bool WithSPMDCheck) {
-  if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic &&
-      getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
-    return;
-
-  const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
-  if (I != FunctionGlobalizedDecls.end()) {
-    I->getSecond().MappedParams->restore(CGF);
-    if (!CGF.HaveInsertPoint())
-      return;
-    for (llvm::Value *Addr :
-         llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
-      CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
-          Addr);
-    }
-    if (I->getSecond().GlobalRecordAddr) {
-      if (!IsInTTDRegion &&
-          (WithSPMDCheck ||
-           getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) {
-        CGBuilderTy &Bld = CGF.Builder;
-        llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
-        llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
-        Bld.CreateCondBr(I->getSecond().IsInSPMDModeFlag, ExitBB, NonSPMDBB);
-        // There is no need to emit line number for unconditional branch.
-        (void)ApplyDebugLocation::CreateEmpty(CGF);
-        CGF.EmitBlock(NonSPMDBB);
-        CGF.EmitRuntimeCall(
-            createNVPTXRuntimeFunction(
-                OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
-            CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
-        CGF.EmitBlock(ExitBB);
-      } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) {
-        assert(GlobalizedRecords.back().RegionCounter > 0 &&
-               "region counter must be > 0.");
-        --GlobalizedRecords.back().RegionCounter;
-        // Emit the restore function only in the target region.
-        if (GlobalizedRecords.back().RegionCounter == 0) {
-          QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
-              /*DestWidth=*/16, /*Signed=*/0);
-          llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
-              Address(GlobalizedRecords.back().UseSharedMemory,
-                      CGM.getContext().getTypeAlignInChars(Int16Ty)),
-              /*Volatile=*/false, Int16Ty, GlobalizedRecords.back().Loc);
-          llvm::Value *Args[] = {
-              llvm::ConstantInt::get(
-                  CGM.Int16Ty,
-                  getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD ? 1 : 0),
-              IsInSharedMemory};
-          CGF.EmitRuntimeCall(
-              createNVPTXRuntimeFunction(
-                  OMPRTL_NVPTX__kmpc_restore_team_static_memory),
-              Args);
-        }
-      } else {
-        CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
-                                OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
-                            I->getSecond().GlobalRecordAddr);
-      }
-    }
-  }
-}
-
-void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
-                                         const OMPExecutableDirective &D,
-                                         SourceLocation Loc,
-                                         llvm::Function *OutlinedFn,
-                                         ArrayRef CapturedVars) {
-  if (!CGF.HaveInsertPoint())
-    return;
-
-  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
-                                                      /*Name=*/".zero.addr");
-  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-  llvm::SmallVector OutlinedFnArgs;
-  OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
-  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
-  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
-  emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
-}
-
-void CGOpenMPRuntimeNVPTX::emitParallelCall(
-    CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
-    ArrayRef CapturedVars, const Expr *IfCond) {
-  if (!CGF.HaveInsertPoint())
-    return;
-
-  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
-    emitSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
-  else
-    emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
-}
-
-void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
-    CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
-    ArrayRef CapturedVars, const Expr *IfCond) {
-  llvm::Function *Fn = cast(OutlinedFn);
-
-  // Force inline this outlined function at its call site.
-  Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
-
-  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
-                                                      /*Name=*/".zero.addr");
-  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-  // ThreadId for serialized parallels is 0.
-  Address ThreadIDAddr = ZeroAddr;
-  auto &&CodeGen = [this, Fn, CapturedVars, Loc, &ThreadIDAddr](
-                       CodeGenFunction &CGF, PrePostActionTy &Action) {
-    Action.Enter(CGF);
-
-    Address ZeroAddr =
-        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
-                                         /*Name=*/".bound.zero.addr");
-    CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-    llvm::SmallVector OutlinedFnArgs;
-    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
-    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
-    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
-    emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs);
-  };
-  auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF,
-                                        PrePostActionTy &) {
-
-    RegionCodeGenTy RCG(CodeGen);
-    llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
-    llvm::Value *ThreadID = getThreadID(CGF, Loc);
-    llvm::Value *Args[] = {RTLoc, ThreadID};
-
-    NVPTXActionTy Action(
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
-        Args,
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
-        Args);
-    RCG.setAction(Action);
-    RCG(CGF);
-  };
-
-  auto &&L0ParallelGen = [this, CapturedVars, Fn](CodeGenFunction &CGF,
-                                                  PrePostActionTy &Action) {
-    CGBuilderTy &Bld = CGF.Builder;
-    llvm::Function *WFn = WrapperFunctionsMap[Fn];
-    assert(WFn && "Wrapper function does not exist!");
-    llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
-
-    // Prepare for parallel region. Indicate the outlined function.
-    llvm::Value *Args[] = {ID, /*RequiresOMPRuntime=*/Bld.getInt16(1)};
-    CGF.EmitRuntimeCall(
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
-        Args);
-
-    // Create a private scope that will globalize the arguments
-    // passed from the outside of the target region.
-    CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF);
-
-    // There's something to share.
-    if (!CapturedVars.empty()) {
-      // Prepare for parallel region. Indicate the outlined function.
-      Address SharedArgs =
-          CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "shared_arg_refs");
-      llvm::Value *SharedArgsPtr = SharedArgs.getPointer();
-
-      llvm::Value *DataSharingArgs[] = {
-          SharedArgsPtr,
-          llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
-      CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
-                              OMPRTL_NVPTX__kmpc_begin_sharing_variables),
-                          DataSharingArgs);
-
-      // Store variable address in a list of references to pass to workers.
-      unsigned Idx = 0;
-      ASTContext &Ctx = CGF.getContext();
-      Address SharedArgListAddress = CGF.EmitLoadOfPointer(
-          SharedArgs, Ctx.getPointerType(Ctx.getPointerType(Ctx.VoidPtrTy))
-                          .castAs());
-      for (llvm::Value *V : CapturedVars) {
-        Address Dst = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx);
-        llvm::Value *PtrV;
-        if (V->getType()->isIntegerTy())
-          PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy);
-        else
-          PtrV = Bld.CreatePointerBitCastOrAddrSpaceCast(V, CGF.VoidPtrTy);
-        CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false,
-                              Ctx.getPointerType(Ctx.VoidPtrTy));
-        ++Idx;
-      }
-    }
-
-    // Activate workers. This barrier is used by the master to signal
-    // work for the workers.
-    syncCTAThreads(CGF);
-
-    // OpenMP [2.5, Parallel Construct, p.49]
-    // There is an implied barrier at the end of a parallel region. After the
-    // end of a parallel region, only the master thread of the team resumes
-    // execution of the enclosing task region.
-    //
-    // The master waits at this barrier until all workers are done.
-    syncCTAThreads(CGF);
-
-    if (!CapturedVars.empty())
-      CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables));
-
-    // Remember for post-processing in worker loop.
-    Work.emplace_back(WFn);
-  };
-
-  auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen](
-                             CodeGenFunction &CGF, PrePostActionTy &Action) {
-    if (IsInParallelRegion) {
-      SeqGen(CGF, Action);
-    } else if (IsInTargetMasterThreadRegion) {
-      L0ParallelGen(CGF, Action);
-    } else {
-      // Check for master and then parallelism:
-      // if (__kmpc_is_spmd_exec_mode() || __kmpc_parallel_level(loc, gtid)) {
-      //   Serialized execution.
-      // } else {
-      //   Worker call.
-      // }
-      CGBuilderTy &Bld = CGF.Builder;
-      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
-      llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential");
-      llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck");
-      llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
-      llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
-      Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB);
-      // There is no need to emit line number for unconditional branch.
-      (void)ApplyDebugLocation::CreateEmpty(CGF);
-      CGF.EmitBlock(ParallelCheckBB);
-      llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
-      llvm::Value *ThreadID = getThreadID(CGF, Loc);
-      llvm::Value *PL = CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
-          {RTLoc, ThreadID});
-      llvm::Value *Res = Bld.CreateIsNotNull(PL);
-      Bld.CreateCondBr(Res, SeqBB, MasterBB);
-      CGF.EmitBlock(SeqBB);
-      SeqGen(CGF, Action);
-      CGF.EmitBranch(ExitBB);
-      // There is no need to emit line number for unconditional branch.
-      (void)ApplyDebugLocation::CreateEmpty(CGF);
-      CGF.EmitBlock(MasterBB);
-      L0ParallelGen(CGF, Action);
-      CGF.EmitBranch(ExitBB);
-      // There is no need to emit line number for unconditional branch.
-      (void)ApplyDebugLocation::CreateEmpty(CGF);
-      // Emit the continuation block for code after the if.
-      CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
-    }
-  };
-
-  if (IfCond) {
-    emitIfClause(CGF, IfCond, LNParallelGen, SeqGen);
-  } else {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    RegionCodeGenTy ThenRCG(LNParallelGen);
-    ThenRCG(CGF);
-  }
-}
-
-void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall(
-    CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
-    ArrayRef CapturedVars, const Expr *IfCond) {
-  // Just call the outlined function to execute the parallel region.
-  // OutlinedFn(>id, &zero, CapturedStruct);
-  //
-  llvm::SmallVector OutlinedFnArgs;
-
-  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
-                                                      /*Name=*/".zero.addr");
-  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-  // ThreadId for serialized parallels is 0.
-  Address ThreadIDAddr = ZeroAddr;
-  auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, &ThreadIDAddr](
-                       CodeGenFunction &CGF, PrePostActionTy &Action) {
-    Action.Enter(CGF);
-
-    Address ZeroAddr =
-        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
-                                         /*Name=*/".bound.zero.addr");
-    CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-    llvm::SmallVector OutlinedFnArgs;
-    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
-    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
-    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
-    emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
-  };
-  auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF,
-                                        PrePostActionTy &) {
-
-    RegionCodeGenTy RCG(CodeGen);
-    llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
-    llvm::Value *ThreadID = getThreadID(CGF, Loc);
-    llvm::Value *Args[] = {RTLoc, ThreadID};
-
-    NVPTXActionTy Action(
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
-        Args,
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
-        Args);
-    RCG.setAction(Action);
-    RCG(CGF);
-  };
-
-  if (IsInTargetMasterThreadRegion) {
-    // In the worker need to use the real thread id.
-    ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
-    RegionCodeGenTy RCG(CodeGen);
-    RCG(CGF);
-  } else {
-    // If we are not in the target region, it is definitely L2 parallelism or
-    // more, because for SPMD mode we always has L1 parallel level, sowe don't
-    // need to check for orphaned directives.
-    RegionCodeGenTy RCG(SeqGen);
-    RCG(CGF);
-  }
-}
-
-void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
-  // Always emit simple barriers!
-  if (!CGF.HaveInsertPoint())
-    return;
-  // Build call __kmpc_barrier_simple_spmd(nullptr, 0);
-  // This function does not use parameters, so we can emit just default values.
-  llvm::Value *Args[] = {
-      llvm::ConstantPointerNull::get(
-          cast(getIdentTyPointerTy())),
-      llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)};
-  llvm::CallInst *Call = CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args);
-  Call->setConvergent();
-}
-
-void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF,
-                                           SourceLocation Loc,
-                                           OpenMPDirectiveKind Kind, bool,
-                                           bool) {
-  // Always emit simple barriers!
-  if (!CGF.HaveInsertPoint())
-    return;
-  // Build call __kmpc_cancel_barrier(loc, thread_id);
-  unsigned Flags = getDefaultFlagsForBarriers(Kind);
-  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
-                         getThreadID(CGF, Loc)};
-  llvm::CallInst *Call = CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args);
-  Call->setConvergent();
-}
-
-void CGOpenMPRuntimeNVPTX::emitCriticalRegion(
-    CodeGenFunction &CGF, StringRef CriticalName,
-    const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
-    const Expr *Hint) {
-  llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.critical.loop");
-  llvm::BasicBlock *TestBB = CGF.createBasicBlock("omp.critical.test");
-  llvm::BasicBlock *SyncBB = CGF.createBasicBlock("omp.critical.sync");
-  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");
-  llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");
-
-  // Get the mask of active threads in the warp.
-  llvm::Value *Mask = CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask));
-  // Fetch team-local id of the thread.
-  llvm::Value *ThreadID = getNVPTXThreadID(CGF);
-
-  // Get the width of the team.
-  llvm::Value *TeamWidth = getNVPTXNumThreads(CGF);
-
-  // Initialize the counter variable for the loop.
-  QualType Int32Ty =
-      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/0);
-  Address Counter = CGF.CreateMemTemp(Int32Ty, "critical_counter");
-  LValue CounterLVal = CGF.MakeAddrLValue(Counter, Int32Ty);
-  CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), CounterLVal,
-                        /*isInit=*/true);
-
-  // Block checks if loop counter exceeds upper bound.
-  CGF.EmitBlock(LoopBB);
-  llvm::Value *CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);
-  llvm::Value *CmpLoopBound = CGF.Builder.CreateICmpSLT(CounterVal, TeamWidth);
-  CGF.Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);
-
-  // Block tests which single thread should execute region, and which threads
-  // should go straight to synchronisation point.
-  CGF.EmitBlock(TestBB);
-  CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);
-  llvm::Value *CmpThreadToCounter =
-      CGF.Builder.CreateICmpEQ(ThreadID, CounterVal);
-  CGF.Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);
-
-  // Block emits the body of the critical region.
-  CGF.EmitBlock(BodyBB);
-
-  // Output the critical statement.
-  CGOpenMPRuntime::emitCriticalRegion(CGF, CriticalName, CriticalOpGen, Loc,
-                                      Hint);
-
-  // After the body surrounded by the critical region, the single executing
-  // thread will jump to the synchronisation point.
-  // Block waits for all threads in current team to finish then increments the
-  // counter variable and returns to the loop.
-  CGF.EmitBlock(SyncBB);
-  // Reconverge active threads in the warp.
-  (void)CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask);
-
-  llvm::Value *IncCounterVal =
-      CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));
-  CGF.EmitStoreOfScalar(IncCounterVal, CounterLVal);
-  CGF.EmitBranch(LoopBB);
-
-  // Block that is reached when  all threads in the team complete the region.
-  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
-}
-
-/// Cast value to the specified type.
-static llvm::Value *castValueToType(CodeGenFunction &CGF, llvm::Value *Val,
-                                    QualType ValTy, QualType CastTy,
-                                    SourceLocation Loc) {
-  assert(!CGF.getContext().getTypeSizeInChars(CastTy).isZero() &&
-         "Cast type must sized.");
-  assert(!CGF.getContext().getTypeSizeInChars(ValTy).isZero() &&
-         "Val type must sized.");
-  llvm::Type *LLVMCastTy = CGF.ConvertTypeForMem(CastTy);
-  if (ValTy == CastTy)
-    return Val;
-  if (CGF.getContext().getTypeSizeInChars(ValTy) ==
-      CGF.getContext().getTypeSizeInChars(CastTy))
-    return CGF.Builder.CreateBitCast(Val, LLVMCastTy);
-  if (CastTy->isIntegerType() && ValTy->isIntegerType())
-    return CGF.Builder.CreateIntCast(Val, LLVMCastTy,
-                                     CastTy->hasSignedIntegerRepresentation());
-  Address CastItem = CGF.CreateMemTemp(CastTy);
-  Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace()));
-  CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy);
-  return CGF.EmitLoadOfScalar(CastItem, /*Volatile=*/false, CastTy, Loc);
-}
-
-/// This function creates calls to one of two shuffle functions to copy
-/// variables between lanes in a warp.
-static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF,
-                                                 llvm::Value *Elem,
-                                                 QualType ElemType,
-                                                 llvm::Value *Offset,
-                                                 SourceLocation Loc) {
-  CodeGenModule &CGM = CGF.CGM;
-  CGBuilderTy &Bld = CGF.Builder;
-  CGOpenMPRuntimeNVPTX &RT =
-      *(static_cast(&CGM.getOpenMPRuntime()));
-
-  CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType);
-  assert(Size.getQuantity() <= 8 &&
-         "Unsupported bitwidth in shuffle instruction.");
-
-  OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4
-                                         ? OMPRTL_NVPTX__kmpc_shuffle_int32
-                                         : OMPRTL_NVPTX__kmpc_shuffle_int64;
-
-  // Cast all types to 32- or 64-bit values before calling shuffle routines.
-  QualType CastTy = CGF.getContext().getIntTypeForBitwidth(
-      Size.getQuantity() <= 4 ? 32 : 64, /*Signed=*/1);
-  llvm::Value *ElemCast = castValueToType(CGF, Elem, ElemType, CastTy, Loc);
-  llvm::Value *WarpSize =
-      Bld.CreateIntCast(getNVPTXWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true);
-
-  llvm::Value *ShuffledVal = CGF.EmitRuntimeCall(
-      RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize});
-
-  return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc);
-}
-
-static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr,
-                            Address DestAddr, QualType ElemType,
-                            llvm::Value *Offset, SourceLocation Loc) {
-  CGBuilderTy &Bld = CGF.Builder;
-
-  CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType);
-  // Create the loop over the big sized data.
-  // ptr = (void*)Elem;
-  // ptrEnd = (void*) Elem + 1;
-  // Step = 8;
-  // while (ptr + Step < ptrEnd)
-  //   shuffle((int64_t)*ptr);
-  // Step = 4;
-  // while (ptr + Step < ptrEnd)
-  //   shuffle((int32_t)*ptr);
-  // ...
-  Address ElemPtr = DestAddr;
-  Address Ptr = SrcAddr;
-  Address PtrEnd = Bld.CreatePointerBitCastOrAddrSpaceCast(
-      Bld.CreateConstGEP(SrcAddr, 1), CGF.VoidPtrTy);
-  for (int IntSize = 8; IntSize >= 1; IntSize /= 2) {
-    if (Size < CharUnits::fromQuantity(IntSize))
-      continue;
-    QualType IntType = CGF.getContext().getIntTypeForBitwidth(
-        CGF.getContext().toBits(CharUnits::fromQuantity(IntSize)),
-        /*Signed=*/1);
-    llvm::Type *IntTy = CGF.ConvertTypeForMem(IntType);
-    Ptr = Bld.CreatePointerBitCastOrAddrSpaceCast(Ptr, IntTy->getPointerTo());
-    ElemPtr =
-        Bld.CreatePointerBitCastOrAddrSpaceCast(ElemPtr, IntTy->getPointerTo());
-    if (Size.getQuantity() / IntSize > 1) {
-      llvm::BasicBlock *PreCondBB = CGF.createBasicBlock(".shuffle.pre_cond");
-      llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".shuffle.then");
-      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".shuffle.exit");
-      llvm::BasicBlock *CurrentBB = Bld.GetInsertBlock();
-      CGF.EmitBlock(PreCondBB);
-      llvm::PHINode *PhiSrc =
-          Bld.CreatePHI(Ptr.getType(), /*NumReservedValues=*/2);
-      PhiSrc->addIncoming(Ptr.getPointer(), CurrentBB);
-      llvm::PHINode *PhiDest =
-          Bld.CreatePHI(ElemPtr.getType(), /*NumReservedValues=*/2);
-      PhiDest->addIncoming(ElemPtr.getPointer(), CurrentBB);
-      Ptr = Address(PhiSrc, Ptr.getAlignment());
-      ElemPtr = Address(PhiDest, ElemPtr.getAlignment());
-      llvm::Value *PtrDiff = Bld.CreatePtrDiff(
-          PtrEnd.getPointer(), Bld.CreatePointerBitCastOrAddrSpaceCast(
-                                   Ptr.getPointer(), CGF.VoidPtrTy));
-      Bld.CreateCondBr(Bld.CreateICmpSGT(PtrDiff, Bld.getInt64(IntSize - 1)),
-                       ThenBB, ExitBB);
-      CGF.EmitBlock(ThenBB);
-      llvm::Value *Res = createRuntimeShuffleFunction(
-          CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
-          IntType, Offset, Loc);
-      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
-      Address LocalPtr = Bld.CreateConstGEP(Ptr, 1);
-      Address LocalElemPtr = Bld.CreateConstGEP(ElemPtr, 1);
-      PhiSrc->addIncoming(LocalPtr.getPointer(), ThenBB);
-      PhiDest->addIncoming(LocalElemPtr.getPointer(), ThenBB);
-      CGF.EmitBranch(PreCondBB);
-      CGF.EmitBlock(ExitBB);
-    } else {
-      llvm::Value *Res = createRuntimeShuffleFunction(
-          CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
-          IntType, Offset, Loc);
-      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
-      Ptr = Bld.CreateConstGEP(Ptr, 1);
-      ElemPtr = Bld.CreateConstGEP(ElemPtr, 1);
-    }
-    Size = Size % IntSize;
-  }
-}
-
-namespace {
-enum CopyAction : unsigned {
-  // RemoteLaneToThread: Copy over a Reduce list from a remote lane in
-  // the warp using shuffle instructions.
-  RemoteLaneToThread,
-  // ThreadCopy: Make a copy of a Reduce list on the thread's stack.
-  ThreadCopy,
-  // ThreadToScratchpad: Copy a team-reduced array to the scratchpad.
-  ThreadToScratchpad,
-  // ScratchpadToThread: Copy from a scratchpad array in global memory
-  // containing team-reduced data to a thread's stack.
-  ScratchpadToThread,
-};
-} // namespace
-
-struct CopyOptionsTy {
-  llvm::Value *RemoteLaneOffset;
-  llvm::Value *ScratchpadIndex;
-  llvm::Value *ScratchpadWidth;
-};
-
-/// Emit instructions to copy a Reduce list, which contains partially
-/// aggregated values, in the specified direction.
-static void emitReductionListCopy(
-    CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy,
-    ArrayRef Privates, Address SrcBase, Address DestBase,
-    CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr}) {
-
-  CodeGenModule &CGM = CGF.CGM;
-  ASTContext &C = CGM.getContext();
-  CGBuilderTy &Bld = CGF.Builder;
-
-  llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
-  llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex;
-  llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth;
-
-  // Iterates, element-by-element, through the source Reduce list and
-  // make a copy.
-  unsigned Idx = 0;
-  unsigned Size = Privates.size();
-  for (const Expr *Private : Privates) {
-    Address SrcElementAddr = Address::invalid();
-    Address DestElementAddr = Address::invalid();
-    Address DestElementPtrAddr = Address::invalid();
-    // Should we shuffle in an element from a remote lane?
-    bool ShuffleInElement = false;
-    // Set to true to update the pointer in the dest Reduce list to a
-    // newly created element.
-    bool UpdateDestListPtr = false;
-    // Increment the src or dest pointer to the scratchpad, for each
-    // new element.
-    bool IncrScratchpadSrc = false;
-    bool IncrScratchpadDest = false;
-
-    switch (Action) {
-    case RemoteLaneToThread: {
-      // Step 1.1: Get the address for the src element in the Reduce list.
-      Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx);
-      SrcElementAddr = CGF.EmitLoadOfPointer(
-          SrcElementPtrAddr,
-          C.getPointerType(Private->getType())->castAs());
-
-      // Step 1.2: Create a temporary to store the element in the destination
-      // Reduce list.
-      DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx);
-      DestElementAddr =
-          CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element");
-      ShuffleInElement = true;
-      UpdateDestListPtr = true;
-      break;
-    }
-    case ThreadCopy: {
-      // Step 1.1: Get the address for the src element in the Reduce list.
-      Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx);
-      SrcElementAddr = CGF.EmitLoadOfPointer(
-          SrcElementPtrAddr,
-          C.getPointerType(Private->getType())->castAs());
-
-      // Step 1.2: Get the address for dest element.  The destination
-      // element has already been created on the thread's stack.
-      DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx);
-      DestElementAddr = CGF.EmitLoadOfPointer(
-          DestElementPtrAddr,
-          C.getPointerType(Private->getType())->castAs());
-      break;
-    }
-    case ThreadToScratchpad: {
-      // Step 1.1: Get the address for the src element in the Reduce list.
-      Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx);
-      SrcElementAddr = CGF.EmitLoadOfPointer(
-          SrcElementPtrAddr,
-          C.getPointerType(Private->getType())->castAs());
-
-      // Step 1.2: Get the address for dest element:
-      // address = base + index * ElementSizeInChars.
-      llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
-      llvm::Value *CurrentOffset =
-          Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
-      llvm::Value *ScratchPadElemAbsolutePtrVal =
-          Bld.CreateNUWAdd(DestBase.getPointer(), CurrentOffset);
-      ScratchPadElemAbsolutePtrVal =
-          Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
-      DestElementAddr = Address(ScratchPadElemAbsolutePtrVal,
-                                C.getTypeAlignInChars(Private->getType()));
-      IncrScratchpadDest = true;
-      break;
-    }
-    case ScratchpadToThread: {
-      // Step 1.1: Get the address for the src element in the scratchpad.
-      // address = base + index * ElementSizeInChars.
-      llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
-      llvm::Value *CurrentOffset =
-          Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
-      llvm::Value *ScratchPadElemAbsolutePtrVal =
-          Bld.CreateNUWAdd(SrcBase.getPointer(), CurrentOffset);
-      ScratchPadElemAbsolutePtrVal =
-          Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
-      SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal,
-                               C.getTypeAlignInChars(Private->getType()));
-      IncrScratchpadSrc = true;
-
-      // Step 1.2: Create a temporary to store the element in the destination
-      // Reduce list.
-      DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx);
-      DestElementAddr =
-          CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element");
-      UpdateDestListPtr = true;
-      break;
-    }
-    }
-
-    // Regardless of src and dest of copy, we emit the load of src
-    // element as this is required in all directions
-    SrcElementAddr = Bld.CreateElementBitCast(
-        SrcElementAddr, CGF.ConvertTypeForMem(Private->getType()));
-    DestElementAddr = Bld.CreateElementBitCast(DestElementAddr,
-                                               SrcElementAddr.getElementType());
-
-    // Now that all active lanes have read the element in the
-    // Reduce list, shuffle over the value from the remote lane.
-    if (ShuffleInElement) {
-      shuffleAndStore(CGF, SrcElementAddr, DestElementAddr, Private->getType(),
-                      RemoteLaneOffset, Private->getExprLoc());
-    } else {
-      switch (CGF.getEvaluationKind(Private->getType())) {
-      case TEK_Scalar: {
-        llvm::Value *Elem =
-            CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false,
-                                 Private->getType(), Private->getExprLoc());
-        // Store the source element value to the dest element address.
-        CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false,
-                              Private->getType());
-        break;
-      }
-      case TEK_Complex: {
-        CodeGenFunction::ComplexPairTy Elem = CGF.EmitLoadOfComplex(
-            CGF.MakeAddrLValue(SrcElementAddr, Private->getType()),
-            Private->getExprLoc());
-        CGF.EmitStoreOfComplex(
-            Elem, CGF.MakeAddrLValue(DestElementAddr, Private->getType()),
-            /*isInit=*/false);
-        break;
-      }
-      case TEK_Aggregate:
-        CGF.EmitAggregateCopy(
-            CGF.MakeAddrLValue(DestElementAddr, Private->getType()),
-            CGF.MakeAddrLValue(SrcElementAddr, Private->getType()),
-            Private->getType(), AggValueSlot::DoesNotOverlap);
-        break;
-      }
-    }
-
-    // Step 3.1: Modify reference in dest Reduce list as needed.
-    // Modifying the reference in Reduce list to point to the newly
-    // created element.  The element is live in the current function
-    // scope and that of functions it invokes (i.e., reduce_function).
-    // RemoteReduceData[i] = (void*)&RemoteElem
-    if (UpdateDestListPtr) {
-      CGF.EmitStoreOfScalar(Bld.CreatePointerBitCastOrAddrSpaceCast(
-                                DestElementAddr.getPointer(), CGF.VoidPtrTy),
-                            DestElementPtrAddr, /*Volatile=*/false,
-                            C.VoidPtrTy);
-    }
-
-    // Step 4.1: Increment SrcBase/DestBase so that it points to the starting
-    // address of the next element in scratchpad memory, unless we're currently
-    // processing the last one.  Memory alignment is also taken care of here.
-    if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
-      llvm::Value *ScratchpadBasePtr =
-          IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer();
-      llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
-      ScratchpadBasePtr = Bld.CreateNUWAdd(
-          ScratchpadBasePtr,
-          Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars));
-
-      // Take care of global memory alignment for performance
-      ScratchpadBasePtr = Bld.CreateNUWSub(
-          ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
-      ScratchpadBasePtr = Bld.CreateUDiv(
-          ScratchpadBasePtr,
-          llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
-      ScratchpadBasePtr = Bld.CreateNUWAdd(
-          ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
-      ScratchpadBasePtr = Bld.CreateNUWMul(
-          ScratchpadBasePtr,
-          llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
-
-      if (IncrScratchpadDest)
-        DestBase = Address(ScratchpadBasePtr, CGF.getPointerAlign());
-      else /* IncrScratchpadSrc = true */
-        SrcBase = Address(ScratchpadBasePtr, CGF.getPointerAlign());
-    }
-
-    ++Idx;
-  }
-}
-
-/// This function emits a helper that gathers Reduce lists from the first
-/// lane of every active warp to lanes in the first warp.
-///
-/// void inter_warp_copy_func(void* reduce_data, num_warps)
-///   shared smem[warp_size];
-///   For all data entries D in reduce_data:
-///     sync
-///     If (I am the first lane in each warp)
-///       Copy my local D to smem[warp_id]
-///     sync
-///     if (I am the first warp)
-///       Copy smem[thread_id] to my local D
-static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
-                                              ArrayRef Privates,
-                                              QualType ReductionArrayTy,
-                                              SourceLocation Loc) {
-  ASTContext &C = CGM.getContext();
-  llvm::Module &M = CGM.getModule();
-
-  // ReduceList: thread local Reduce list.
-  // At the stage of the computation when this function is called, partially
-  // aggregated values reside in the first lane of every active warp.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                  C.VoidPtrTy, ImplicitParamDecl::Other);
-  // NumWarps: number of warps active in the parallel region.  This could
-  // be smaller than 32 (max warps in a CTA) for partial block reduction.
-  ImplicitParamDecl NumWarpsArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                C.getIntTypeForBitwidth(32, /* Signed */ true),
-                                ImplicitParamDecl::Other);
-  FunctionArgList Args;
-  Args.push_back(&ReduceListArg);
-  Args.push_back(&NumWarpsArg);
-
-  const CGFunctionInfo &CGFI =
-      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
-  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
-                                    llvm::GlobalValue::InternalLinkage,
-                                    "_omp_reduction_inter_warp_copy_func", &M);
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
-  Fn->setDoesNotRecurse();
-  CodeGenFunction CGF(CGM);
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  // This array is used as a medium to transfer, one reduce element at a time,
-  // the data from the first lane of every warp to lanes in the first warp
-  // in order to perform the final step of a reduction in a parallel region
-  // (reduction across warps).  The array is placed in NVPTX __shared__ memory
-  // for reduced latency, as well as to have a distinct copy for concurrently
-  // executing target regions.  The array is declared with common linkage so
-  // as to be shared across compilation units.
-  StringRef TransferMediumName =
-      "__openmp_nvptx_data_transfer_temporary_storage";
-  llvm::GlobalVariable *TransferMedium =
-      M.getGlobalVariable(TransferMediumName);
-  if (!TransferMedium) {
-    auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
-    unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
-    TransferMedium = new llvm::GlobalVariable(
-        M, Ty, /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage,
-        llvm::Constant::getNullValue(Ty), TransferMediumName,
-        /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
-        SharedAddressSpace);
-    CGM.addCompilerUsedGlobal(TransferMedium);
-  }
-
-  // Get the CUDA thread id of the current OpenMP thread on the GPU.
-  llvm::Value *ThreadID = getNVPTXThreadID(CGF);
-  // nvptx_lane_id = nvptx_id % warpsize
-  llvm::Value *LaneID = getNVPTXLaneID(CGF);
-  // nvptx_warp_id = nvptx_id / warpsize
-  llvm::Value *WarpID = getNVPTXWarpID(CGF);
-
-  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
-  Address LocalReduceList(
-      Bld.CreatePointerBitCastOrAddrSpaceCast(
-          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
-                               C.VoidPtrTy, Loc),
-          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
-      CGF.getPointerAlign());
-
-  unsigned Idx = 0;
-  for (const Expr *Private : Privates) {
-    //
-    // Warp master copies reduce element to transfer medium in __shared__
-    // memory.
-    //
-    unsigned RealTySize =
-        C.getTypeSizeInChars(Private->getType())
-            .alignTo(C.getTypeAlignInChars(Private->getType()))
-            .getQuantity();
-    for (unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /=2) {
-      unsigned NumIters = RealTySize / TySize;
-      if (NumIters == 0)
-        continue;
-      QualType CType = C.getIntTypeForBitwidth(
-          C.toBits(CharUnits::fromQuantity(TySize)), /*Signed=*/1);
-      llvm::Type *CopyType = CGF.ConvertTypeForMem(CType);
-      CharUnits Align = CharUnits::fromQuantity(TySize);
-      llvm::Value *Cnt = nullptr;
-      Address CntAddr = Address::invalid();
-      llvm::BasicBlock *PrecondBB = nullptr;
-      llvm::BasicBlock *ExitBB = nullptr;
-      if (NumIters > 1) {
-        CntAddr = CGF.CreateMemTemp(C.IntTy, ".cnt.addr");
-        CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.IntTy), CntAddr,
-                              /*Volatile=*/false, C.IntTy);
-        PrecondBB = CGF.createBasicBlock("precond");
-        ExitBB = CGF.createBasicBlock("exit");
-        llvm::BasicBlock *BodyBB = CGF.createBasicBlock("body");
-        // There is no need to emit line number for unconditional branch.
-        (void)ApplyDebugLocation::CreateEmpty(CGF);
-        CGF.EmitBlock(PrecondBB);
-        Cnt = CGF.EmitLoadOfScalar(CntAddr, /*Volatile=*/false, C.IntTy, Loc);
-        llvm::Value *Cmp =
-            Bld.CreateICmpULT(Cnt, llvm::ConstantInt::get(CGM.IntTy, NumIters));
-        Bld.CreateCondBr(Cmp, BodyBB, ExitBB);
-        CGF.EmitBlock(BodyBB);
-      }
-      // kmpc_barrier.
-      CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown,
-                                             /*EmitChecks=*/false,
-                                             /*ForceSimpleCall=*/true);
-      llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
-      llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
-      llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
-
-      // if (lane_id == 0)
-      llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master");
-      Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
-      CGF.EmitBlock(ThenBB);
-
-      // Reduce element = LocalReduceList[i]
-      Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
-      llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
-          ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
-      // elemptr = ((CopyType*)(elemptrptr)) + I
-      Address ElemPtr = Address(ElemPtrPtr, Align);
-      ElemPtr = Bld.CreateElementBitCast(ElemPtr, CopyType);
-      if (NumIters > 1) {
-        ElemPtr = Address(Bld.CreateGEP(ElemPtr.getPointer(), Cnt),
-                          ElemPtr.getAlignment());
-      }
-
-      // Get pointer to location in transfer medium.
-      // MediumPtr = &medium[warp_id]
-      llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
-          TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID});
-      Address MediumPtr(MediumPtrVal, Align);
-      // Casting to actual data type.
-      // MediumPtr = (CopyType*)MediumPtrAddr;
-      MediumPtr = Bld.CreateElementBitCast(MediumPtr, CopyType);
-
-      // elem = *elemptr
-      //*MediumPtr = elem
-      llvm::Value *Elem =
-          CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, CType, Loc);
-      // Store the source element value to the dest element address.
-      CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/true, CType);
-
-      Bld.CreateBr(MergeBB);
-
-      CGF.EmitBlock(ElseBB);
-      Bld.CreateBr(MergeBB);
-
-      CGF.EmitBlock(MergeBB);
-
-      // kmpc_barrier.
-      CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown,
-                                             /*EmitChecks=*/false,
-                                             /*ForceSimpleCall=*/true);
-
-      //
-      // Warp 0 copies reduce element from transfer medium.
-      //
-      llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then");
-      llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else");
-      llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont");
-
-      Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg);
-      llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar(
-          AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, Loc);
-
-      // Up to 32 threads in warp 0 are active.
-      llvm::Value *IsActiveThread =
-          Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread");
-      Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
-
-      CGF.EmitBlock(W0ThenBB);
-
-      // SrcMediumPtr = &medium[tid]
-      llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
-          TransferMedium,
-          {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID});
-      Address SrcMediumPtr(SrcMediumPtrVal, Align);
-      // SrcMediumVal = *SrcMediumPtr;
-      SrcMediumPtr = Bld.CreateElementBitCast(SrcMediumPtr, CopyType);
-
-      // TargetElemPtr = (CopyType*)(SrcDataAddr[i]) + I
-      Address TargetElemPtrPtr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
-      llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar(
-          TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, Loc);
-      Address TargetElemPtr = Address(TargetElemPtrVal, Align);
-      TargetElemPtr = Bld.CreateElementBitCast(TargetElemPtr, CopyType);
-      if (NumIters > 1) {
-        TargetElemPtr = Address(Bld.CreateGEP(TargetElemPtr.getPointer(), Cnt),
-                                TargetElemPtr.getAlignment());
-      }
-
-      // *TargetElemPtr = SrcMediumVal;
-      llvm::Value *SrcMediumValue =
-          CGF.EmitLoadOfScalar(SrcMediumPtr, /*Volatile=*/true, CType, Loc);
-      CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false,
-                            CType);
-      Bld.CreateBr(W0MergeBB);
-
-      CGF.EmitBlock(W0ElseBB);
-      Bld.CreateBr(W0MergeBB);
-
-      CGF.EmitBlock(W0MergeBB);
-
-      if (NumIters > 1) {
-        Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.IntTy, /*V=*/1));
-        CGF.EmitStoreOfScalar(Cnt, CntAddr, /*Volatile=*/false, C.IntTy);
-        CGF.EmitBranch(PrecondBB);
-        (void)ApplyDebugLocation::CreateEmpty(CGF);
-        CGF.EmitBlock(ExitBB);
-      }
-      RealTySize %= TySize;
-    }
-    ++Idx;
-  }
-
-  CGF.FinishFunction();
-  return Fn;
-}
-
-/// Emit a helper that reduces data across two OpenMP threads (lanes)
-/// in the same warp.  It uses shuffle instructions to copy over data from
-/// a remote lane's stack.  The reduction algorithm performed is specified
-/// by the fourth parameter.
-///
-/// Algorithm Versions.
-/// Full Warp Reduce (argument value 0):
-///   This algorithm assumes that all 32 lanes are active and gathers
-///   data from these 32 lanes, producing a single resultant value.
-/// Contiguous Partial Warp Reduce (argument value 1):
-///   This algorithm assumes that only a *contiguous* subset of lanes
-///   are active.  This happens for the last warp in a parallel region
-///   when the user specified num_threads is not an integer multiple of
-///   32.  This contiguous subset always starts with the zeroth lane.
-/// Partial Warp Reduce (argument value 2):
-///   This algorithm gathers data from any number of lanes at any position.
-/// All reduced values are stored in the lowest possible lane.  The set
-/// of problems every algorithm addresses is a super set of those
-/// addressable by algorithms with a lower version number.  Overhead
-/// increases as algorithm version increases.
-///
-/// Terminology
-/// Reduce element:
-///   Reduce element refers to the individual data field with primitive
-///   data types to be combined and reduced across threads.
-/// Reduce list:
-///   Reduce list refers to a collection of local, thread-private
-///   reduce elements.
-/// Remote Reduce list:
-///   Remote Reduce list refers to a collection of remote (relative to
-///   the current thread) reduce elements.
-///
-/// We distinguish between three states of threads that are important to
-/// the implementation of this function.
-/// Alive threads:
-///   Threads in a warp executing the SIMT instruction, as distinguished from
-///   threads that are inactive due to divergent control flow.
-/// Active threads:
-///   The minimal set of threads that has to be alive upon entry to this
-///   function.  The computation is correct iff active threads are alive.
-///   Some threads are alive but they are not active because they do not
-///   contribute to the computation in any useful manner.  Turning them off
-///   may introduce control flow overheads without any tangible benefits.
-/// Effective threads:
-///   In order to comply with the argument requirements of the shuffle
-///   function, we must keep all lanes holding data alive.  But at most
-///   half of them perform value aggregation; we refer to this half of
-///   threads as effective. The other half is simply handing off their
-///   data.
-///
-/// Procedure
-/// Value shuffle:
-///   In this step active threads transfer data from higher lane positions
-///   in the warp to lower lane positions, creating Remote Reduce list.
-/// Value aggregation:
-///   In this step, effective threads combine their thread local Reduce list
-///   with Remote Reduce list and store the result in the thread local
-///   Reduce list.
-/// Value copy:
-///   In this step, we deal with the assumption made by algorithm 2
-///   (i.e. contiguity assumption).  When we have an odd number of lanes
-///   active, say 2k+1, only k threads will be effective and therefore k
-///   new values will be produced.  However, the Reduce list owned by the
-///   (2k+1)th thread is ignored in the value aggregation.  Therefore
-///   we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so
-///   that the contiguity assumption still holds.
-static llvm::Function *emitShuffleAndReduceFunction(
-    CodeGenModule &CGM, ArrayRef Privates,
-    QualType ReductionArrayTy, llvm::Function *ReduceFn, SourceLocation Loc) {
-  ASTContext &C = CGM.getContext();
-
-  // Thread local Reduce list used to host the values of data to be reduced.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                  C.VoidPtrTy, ImplicitParamDecl::Other);
-  // Current lane id; could be logical.
-  ImplicitParamDecl LaneIDArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.ShortTy,
-                              ImplicitParamDecl::Other);
-  // Offset of the remote source lane relative to the current lane.
-  ImplicitParamDecl RemoteLaneOffsetArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                        C.ShortTy, ImplicitParamDecl::Other);
-  // Algorithm version.  This is expected to be known at compile time.
-  ImplicitParamDecl AlgoVerArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                               C.ShortTy, ImplicitParamDecl::Other);
-  FunctionArgList Args;
-  Args.push_back(&ReduceListArg);
-  Args.push_back(&LaneIDArg);
-  Args.push_back(&RemoteLaneOffsetArg);
-  Args.push_back(&AlgoVerArg);
-
-  const CGFunctionInfo &CGFI =
-      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
-  auto *Fn = llvm::Function::Create(
-      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
-      "_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
-  Fn->setDoesNotRecurse();
-  if (CGM.getLangOpts().Optimize) {
-    Fn->removeFnAttr(llvm::Attribute::NoInline);
-    Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
-    Fn->addFnAttr(llvm::Attribute::AlwaysInline);
-  }
-
-  CodeGenFunction CGF(CGM);
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
-  Address LocalReduceList(
-      Bld.CreatePointerBitCastOrAddrSpaceCast(
-          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
-                               C.VoidPtrTy, SourceLocation()),
-          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
-      CGF.getPointerAlign());
-
-  Address AddrLaneIDArg = CGF.GetAddrOfLocalVar(&LaneIDArg);
-  llvm::Value *LaneIDArgVal = CGF.EmitLoadOfScalar(
-      AddrLaneIDArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
-
-  Address AddrRemoteLaneOffsetArg = CGF.GetAddrOfLocalVar(&RemoteLaneOffsetArg);
-  llvm::Value *RemoteLaneOffsetArgVal = CGF.EmitLoadOfScalar(
-      AddrRemoteLaneOffsetArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
-
-  Address AddrAlgoVerArg = CGF.GetAddrOfLocalVar(&AlgoVerArg);
-  llvm::Value *AlgoVerArgVal = CGF.EmitLoadOfScalar(
-      AddrAlgoVerArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
-
-  // Create a local thread-private variable to host the Reduce list
-  // from a remote lane.
-  Address RemoteReduceList =
-      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_reduce_list");
-
-  // This loop iterates through the list of reduce elements and copies,
-  // element by element, from a remote lane in the warp to RemoteReduceList,
-  // hosted on the thread's stack.
-  emitReductionListCopy(RemoteLaneToThread, CGF, ReductionArrayTy, Privates,
-                        LocalReduceList, RemoteReduceList,
-                        {/*RemoteLaneOffset=*/RemoteLaneOffsetArgVal,
-                         /*ScratchpadIndex=*/nullptr,
-                         /*ScratchpadWidth=*/nullptr});
-
-  // The actions to be performed on the Remote Reduce list is dependent
-  // on the algorithm version.
-  //
-  //  if (AlgoVer==0) || (AlgoVer==1 && (LaneId < Offset)) || (AlgoVer==2 &&
-  //  LaneId % 2 == 0 && Offset > 0):
-  //    do the reduction value aggregation
-  //
-  //  The thread local variable Reduce list is mutated in place to host the
-  //  reduced data, which is the aggregated value produced from local and
-  //  remote lanes.
-  //
-  //  Note that AlgoVer is expected to be a constant integer known at compile
-  //  time.
-  //  When AlgoVer==0, the first conjunction evaluates to true, making
-  //    the entire predicate true during compile time.
-  //  When AlgoVer==1, the second conjunction has only the second part to be
-  //    evaluated during runtime.  Other conjunctions evaluates to false
-  //    during compile time.
-  //  When AlgoVer==2, the third conjunction has only the second part to be
-  //    evaluated during runtime.  Other conjunctions evaluates to false
-  //    during compile time.
-  llvm::Value *CondAlgo0 = Bld.CreateIsNull(AlgoVerArgVal);
-
-  llvm::Value *Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
-  llvm::Value *CondAlgo1 = Bld.CreateAnd(
-      Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal));
-
-  llvm::Value *Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
-  llvm::Value *CondAlgo2 = Bld.CreateAnd(
-      Algo2, Bld.CreateIsNull(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1))));
-  CondAlgo2 = Bld.CreateAnd(
-      CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0)));
-
-  llvm::Value *CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
-  CondReduce = Bld.CreateOr(CondReduce, CondAlgo2);
-
-  llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
-  llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
-  llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
-  Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
-
-  CGF.EmitBlock(ThenBB);
-  // reduce_function(LocalReduceList, RemoteReduceList)
-  llvm::Value *LocalReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-      LocalReduceList.getPointer(), CGF.VoidPtrTy);
-  llvm::Value *RemoteReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-      RemoteReduceList.getPointer(), CGF.VoidPtrTy);
-  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
-      CGF, Loc, ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
-  Bld.CreateBr(MergeBB);
-
-  CGF.EmitBlock(ElseBB);
-  Bld.CreateBr(MergeBB);
-
-  CGF.EmitBlock(MergeBB);
-
-  // if (AlgoVer==1 && (LaneId >= Offset)) copy Remote Reduce list to local
-  // Reduce list.
-  Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
-  llvm::Value *CondCopy = Bld.CreateAnd(
-      Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal));
-
-  llvm::BasicBlock *CpyThenBB = CGF.createBasicBlock("then");
-  llvm::BasicBlock *CpyElseBB = CGF.createBasicBlock("else");
-  llvm::BasicBlock *CpyMergeBB = CGF.createBasicBlock("ifcont");
-  Bld.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
-
-  CGF.EmitBlock(CpyThenBB);
-  emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates,
-                        RemoteReduceList, LocalReduceList);
-  Bld.CreateBr(CpyMergeBB);
-
-  CGF.EmitBlock(CpyElseBB);
-  Bld.CreateBr(CpyMergeBB);
-
-  CGF.EmitBlock(CpyMergeBB);
-
-  CGF.FinishFunction();
-  return Fn;
-}
-
-/// This function emits a helper that copies all the reduction variables from
-/// the team into the provided global buffer for the reduction variables.
-///
-/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data)
-///   For all data entries D in reduce_data:
-///     Copy local D to buffer.D[Idx]
-static llvm::Value *emitListToGlobalCopyFunction(
-    CodeGenModule &CGM, ArrayRef Privates,
-    QualType ReductionArrayTy, SourceLocation Loc,
-    const RecordDecl *TeamReductionRec,
-    const llvm::SmallDenseMap
-        &VarFieldMap) {
-  ASTContext &C = CGM.getContext();
-
-  // Buffer: global reduction buffer.
-  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                              C.VoidPtrTy, ImplicitParamDecl::Other);
-  // Idx: index of the buffer.
-  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
-                           ImplicitParamDecl::Other);
-  // ReduceList: thread local Reduce list.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                  C.VoidPtrTy, ImplicitParamDecl::Other);
-  FunctionArgList Args;
-  Args.push_back(&BufferArg);
-  Args.push_back(&IdxArg);
-  Args.push_back(&ReduceListArg);
-
-  const CGFunctionInfo &CGFI =
-      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
-  auto *Fn = llvm::Function::Create(
-      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
-      "_omp_reduction_list_to_global_copy_func", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
-  Fn->setDoesNotRecurse();
-  CodeGenFunction CGF(CGM);
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
-  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
-  Address LocalReduceList(
-      Bld.CreatePointerBitCastOrAddrSpaceCast(
-          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
-                               C.VoidPtrTy, Loc),
-          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
-      CGF.getPointerAlign());
-  QualType StaticTy = C.getRecordType(TeamReductionRec);
-  llvm::Type *LLVMReductionsBufferTy =
-      CGM.getTypes().ConvertTypeForMem(StaticTy);
-  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
-      LLVMReductionsBufferTy->getPointerTo());
-  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
-                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
-                                              /*Volatile=*/false, C.IntTy,
-                                              Loc)};
-  unsigned Idx = 0;
-  for (const Expr *Private : Privates) {
-    // Reduce element = LocalReduceList[i]
-    Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
-    llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
-        ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
-    // elemptr = ((CopyType*)(elemptrptr)) + I
-    ElemPtrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-        ElemPtrPtr, CGF.ConvertTypeForMem(Private->getType())->getPointerTo());
-    Address ElemPtr =
-        Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType()));
-    const ValueDecl *VD = cast(Private)->getDecl();
-    // Global = Buffer.VD[Idx];
-    const FieldDecl *FD = VarFieldMap.lookup(VD);
-    LValue GlobLVal = CGF.EmitLValueForField(
-        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
-    llvm::Value *BufferPtr =
-        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
-    GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment()));
-    switch (CGF.getEvaluationKind(Private->getType())) {
-    case TEK_Scalar: {
-      llvm::Value *V = CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false,
-                                            Private->getType(), Loc);
-      CGF.EmitStoreOfScalar(V, GlobLVal);
-      break;
-    }
-    case TEK_Complex: {
-      CodeGenFunction::ComplexPairTy V = CGF.EmitLoadOfComplex(
-          CGF.MakeAddrLValue(ElemPtr, Private->getType()), Loc);
-      CGF.EmitStoreOfComplex(V, GlobLVal, /*isInit=*/false);
-      break;
-    }
-    case TEK_Aggregate:
-      CGF.EmitAggregateCopy(GlobLVal,
-                            CGF.MakeAddrLValue(ElemPtr, Private->getType()),
-                            Private->getType(), AggValueSlot::DoesNotOverlap);
-      break;
-    }
-    ++Idx;
-  }
-
-  CGF.FinishFunction();
-  return Fn;
-}
-
-/// This function emits a helper that reduces all the reduction variables from
-/// the team into the provided global buffer for the reduction variables.
-///
-/// void list_to_global_reduce_func(void *buffer, int Idx, void *reduce_data)
-///  void *GlobPtrs[];
-///  GlobPtrs[0] = (void*)&buffer.D0[Idx];
-///  ...
-///  GlobPtrs[N] = (void*)&buffer.DN[Idx];
-///  reduce_function(GlobPtrs, reduce_data);
-static llvm::Value *emitListToGlobalReduceFunction(
-    CodeGenModule &CGM, ArrayRef Privates,
-    QualType ReductionArrayTy, SourceLocation Loc,
-    const RecordDecl *TeamReductionRec,
-    const llvm::SmallDenseMap
-        &VarFieldMap,
-    llvm::Function *ReduceFn) {
-  ASTContext &C = CGM.getContext();
-
-  // Buffer: global reduction buffer.
-  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                              C.VoidPtrTy, ImplicitParamDecl::Other);
-  // Idx: index of the buffer.
-  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
-                           ImplicitParamDecl::Other);
-  // ReduceList: thread local Reduce list.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                  C.VoidPtrTy, ImplicitParamDecl::Other);
-  FunctionArgList Args;
-  Args.push_back(&BufferArg);
-  Args.push_back(&IdxArg);
-  Args.push_back(&ReduceListArg);
-
-  const CGFunctionInfo &CGFI =
-      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
-  auto *Fn = llvm::Function::Create(
-      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
-      "_omp_reduction_list_to_global_reduce_func", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
-  Fn->setDoesNotRecurse();
-  CodeGenFunction CGF(CGM);
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
-  QualType StaticTy = C.getRecordType(TeamReductionRec);
-  llvm::Type *LLVMReductionsBufferTy =
-      CGM.getTypes().ConvertTypeForMem(StaticTy);
-  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
-      LLVMReductionsBufferTy->getPointerTo());
-
-  // 1. Build a list of reduction variables.
-  // void *RedList[] = {[0], ..., [-1]};
-  Address ReductionList =
-      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
-  auto IPriv = Privates.begin();
-  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
-                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
-                                              /*Volatile=*/false, C.IntTy,
-                                              Loc)};
-  unsigned Idx = 0;
-  for (unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
-    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
-    // Global = Buffer.VD[Idx];
-    const ValueDecl *VD = cast(*IPriv)->getDecl();
-    const FieldDecl *FD = VarFieldMap.lookup(VD);
-    LValue GlobLVal = CGF.EmitLValueForField(
-        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
-    llvm::Value *BufferPtr =
-        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
-    llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr);
-    CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy);
-    if ((*IPriv)->getType()->isVariablyModifiedType()) {
-      // Store array size.
-      ++Idx;
-      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
-      llvm::Value *Size = CGF.Builder.CreateIntCast(
-          CGF.getVLASize(
-                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
-              .NumElts,
-          CGF.SizeTy, /*isSigned=*/false);
-      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
-                              Elem);
-    }
-  }
-
-  // Call reduce_function(GlobalReduceList, ReduceList)
-  llvm::Value *GlobalReduceList =
-      CGF.EmitCastToVoidPtr(ReductionList.getPointer());
-  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
-  llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar(
-      AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
-  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
-      CGF, Loc, ReduceFn, {GlobalReduceList, ReducedPtr});
-  CGF.FinishFunction();
-  return Fn;
-}
-
-/// This function emits a helper that copies all the reduction variables from
-/// the team into the provided global buffer for the reduction variables.
-///
-/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data)
-///   For all data entries D in reduce_data:
-///     Copy buffer.D[Idx] to local D;
-static llvm::Value *emitGlobalToListCopyFunction(
-    CodeGenModule &CGM, ArrayRef Privates,
-    QualType ReductionArrayTy, SourceLocation Loc,
-    const RecordDecl *TeamReductionRec,
-    const llvm::SmallDenseMap
-        &VarFieldMap) {
-  ASTContext &C = CGM.getContext();
-
-  // Buffer: global reduction buffer.
-  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                              C.VoidPtrTy, ImplicitParamDecl::Other);
-  // Idx: index of the buffer.
-  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
-                           ImplicitParamDecl::Other);
-  // ReduceList: thread local Reduce list.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                  C.VoidPtrTy, ImplicitParamDecl::Other);
-  FunctionArgList Args;
-  Args.push_back(&BufferArg);
-  Args.push_back(&IdxArg);
-  Args.push_back(&ReduceListArg);
-
-  const CGFunctionInfo &CGFI =
-      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
-  auto *Fn = llvm::Function::Create(
-      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
-      "_omp_reduction_global_to_list_copy_func", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
-  Fn->setDoesNotRecurse();
-  CodeGenFunction CGF(CGM);
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
-  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
-  Address LocalReduceList(
-      Bld.CreatePointerBitCastOrAddrSpaceCast(
-          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
-                               C.VoidPtrTy, Loc),
-          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
-      CGF.getPointerAlign());
-  QualType StaticTy = C.getRecordType(TeamReductionRec);
-  llvm::Type *LLVMReductionsBufferTy =
-      CGM.getTypes().ConvertTypeForMem(StaticTy);
-  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
-      LLVMReductionsBufferTy->getPointerTo());
-
-  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
-                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
-                                              /*Volatile=*/false, C.IntTy,
-                                              Loc)};
-  unsigned Idx = 0;
-  for (const Expr *Private : Privates) {
-    // Reduce element = LocalReduceList[i]
-    Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
-    llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
-        ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
-    // elemptr = ((CopyType*)(elemptrptr)) + I
-    ElemPtrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-        ElemPtrPtr, CGF.ConvertTypeForMem(Private->getType())->getPointerTo());
-    Address ElemPtr =
-        Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType()));
-    const ValueDecl *VD = cast(Private)->getDecl();
-    // Global = Buffer.VD[Idx];
-    const FieldDecl *FD = VarFieldMap.lookup(VD);
-    LValue GlobLVal = CGF.EmitLValueForField(
-        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
-    llvm::Value *BufferPtr =
-        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
-    GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment()));
-    switch (CGF.getEvaluationKind(Private->getType())) {
-    case TEK_Scalar: {
-      llvm::Value *V = CGF.EmitLoadOfScalar(GlobLVal, Loc);
-      CGF.EmitStoreOfScalar(V, ElemPtr, /*Volatile=*/false, Private->getType());
-      break;
-    }
-    case TEK_Complex: {
-      CodeGenFunction::ComplexPairTy V = CGF.EmitLoadOfComplex(GlobLVal, Loc);
-      CGF.EmitStoreOfComplex(V, CGF.MakeAddrLValue(ElemPtr, Private->getType()),
-                             /*isInit=*/false);
-      break;
-    }
-    case TEK_Aggregate:
-      CGF.EmitAggregateCopy(CGF.MakeAddrLValue(ElemPtr, Private->getType()),
-                            GlobLVal, Private->getType(),
-                            AggValueSlot::DoesNotOverlap);
-      break;
-    }
-    ++Idx;
-  }
-
-  CGF.FinishFunction();
-  return Fn;
-}
-
-/// This function emits a helper that reduces all the reduction variables from
-/// the team into the provided global buffer for the reduction variables.
-///
-/// void global_to_list_reduce_func(void *buffer, int Idx, void *reduce_data)
-///  void *GlobPtrs[];
-///  GlobPtrs[0] = (void*)&buffer.D0[Idx];
-///  ...
-///  GlobPtrs[N] = (void*)&buffer.DN[Idx];
-///  reduce_function(reduce_data, GlobPtrs);
-static llvm::Value *emitGlobalToListReduceFunction(
-    CodeGenModule &CGM, ArrayRef Privates,
-    QualType ReductionArrayTy, SourceLocation Loc,
-    const RecordDecl *TeamReductionRec,
-    const llvm::SmallDenseMap
-        &VarFieldMap,
-    llvm::Function *ReduceFn) {
-  ASTContext &C = CGM.getContext();
-
-  // Buffer: global reduction buffer.
-  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                              C.VoidPtrTy, ImplicitParamDecl::Other);
-  // Idx: index of the buffer.
-  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
-                           ImplicitParamDecl::Other);
-  // ReduceList: thread local Reduce list.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                  C.VoidPtrTy, ImplicitParamDecl::Other);
-  FunctionArgList Args;
-  Args.push_back(&BufferArg);
-  Args.push_back(&IdxArg);
-  Args.push_back(&ReduceListArg);
-
-  const CGFunctionInfo &CGFI =
-      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
-  auto *Fn = llvm::Function::Create(
-      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
-      "_omp_reduction_global_to_list_reduce_func", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
-  Fn->setDoesNotRecurse();
-  CodeGenFunction CGF(CGM);
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
-  QualType StaticTy = C.getRecordType(TeamReductionRec);
-  llvm::Type *LLVMReductionsBufferTy =
-      CGM.getTypes().ConvertTypeForMem(StaticTy);
-  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
-      LLVMReductionsBufferTy->getPointerTo());
-
-  // 1. Build a list of reduction variables.
-  // void *RedList[] = {[0], ..., [-1]};
-  Address ReductionList =
-      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
-  auto IPriv = Privates.begin();
-  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
-                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
-                                              /*Volatile=*/false, C.IntTy,
-                                              Loc)};
-  unsigned Idx = 0;
-  for (unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
-    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
-    // Global = Buffer.VD[Idx];
-    const ValueDecl *VD = cast(*IPriv)->getDecl();
-    const FieldDecl *FD = VarFieldMap.lookup(VD);
-    LValue GlobLVal = CGF.EmitLValueForField(
-        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
-    llvm::Value *BufferPtr =
-        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
-    llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr);
-    CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy);
-    if ((*IPriv)->getType()->isVariablyModifiedType()) {
-      // Store array size.
-      ++Idx;
-      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
-      llvm::Value *Size = CGF.Builder.CreateIntCast(
-          CGF.getVLASize(
-                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
-              .NumElts,
-          CGF.SizeTy, /*isSigned=*/false);
-      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
-                              Elem);
-    }
-  }
-
-  // Call reduce_function(ReduceList, GlobalReduceList)
-  llvm::Value *GlobalReduceList =
-      CGF.EmitCastToVoidPtr(ReductionList.getPointer());
-  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
-  llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar(
-      AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
-  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
-      CGF, Loc, ReduceFn, {ReducedPtr, GlobalReduceList});
-  CGF.FinishFunction();
-  return Fn;
-}
-
-///
-/// Design of OpenMP reductions on the GPU
-///
-/// Consider a typical OpenMP program with one or more reduction
-/// clauses:
-///
-/// float foo;
-/// double bar;
-/// #pragma omp target teams distribute parallel for \
-///             reduction(+:foo) reduction(*:bar)
-/// for (int i = 0; i < N; i++) {
-///   foo += A[i]; bar *= B[i];
-/// }
-///
-/// where 'foo' and 'bar' are reduced across all OpenMP threads in
-/// all teams.  In our OpenMP implementation on the NVPTX device an
-/// OpenMP team is mapped to a CUDA threadblock and OpenMP threads
-/// within a team are mapped to CUDA threads within a threadblock.
-/// Our goal is to efficiently aggregate values across all OpenMP
-/// threads such that:
-///
-///   - the compiler and runtime are logically concise, and
-///   - the reduction is performed efficiently in a hierarchical
-///     manner as follows: within OpenMP threads in the same warp,
-///     across warps in a threadblock, and finally across teams on
-///     the NVPTX device.
-///
-/// Introduction to Decoupling
-///
-/// We would like to decouple the compiler and the runtime so that the
-/// latter is ignorant of the reduction variables (number, data types)
-/// and the reduction operators.  This allows a simpler interface
-/// and implementation while still attaining good performance.
-///
-/// Pseudocode for the aforementioned OpenMP program generated by the
-/// compiler is as follows:
-///
-/// 1. Create private copies of reduction variables on each OpenMP
-///    thread: 'foo_private', 'bar_private'
-/// 2. Each OpenMP thread reduces the chunk of 'A' and 'B' assigned
-///    to it and writes the result in 'foo_private' and 'bar_private'
-///    respectively.
-/// 3. Call the OpenMP runtime on the GPU to reduce within a team
-///    and store the result on the team master:
-///
-///     __kmpc_nvptx_parallel_reduce_nowait_v2(...,
-///        reduceData, shuffleReduceFn, interWarpCpyFn)
-///
-///     where:
-///       struct ReduceData {
-///         double *foo;
-///         double *bar;
-///       } reduceData
-///       reduceData.foo = &foo_private
-///       reduceData.bar = &bar_private
-///
-///     'shuffleReduceFn' and 'interWarpCpyFn' are pointers to two
-///     auxiliary functions generated by the compiler that operate on
-///     variables of type 'ReduceData'.  They aid the runtime perform
-///     algorithmic steps in a data agnostic manner.
-///
-///     'shuffleReduceFn' is a pointer to a function that reduces data
-///     of type 'ReduceData' across two OpenMP threads (lanes) in the
-///     same warp.  It takes the following arguments as input:
-///
-///     a. variable of type 'ReduceData' on the calling lane,
-///     b. its lane_id,
-///     c. an offset relative to the current lane_id to generate a
-///        remote_lane_id.  The remote lane contains the second
-///        variable of type 'ReduceData' that is to be reduced.
-///     d. an algorithm version parameter determining which reduction
-///        algorithm to use.
-///
-///     'shuffleReduceFn' retrieves data from the remote lane using
-///     efficient GPU shuffle intrinsics and reduces, using the
-///     algorithm specified by the 4th parameter, the two operands
-///     element-wise.  The result is written to the first operand.
-///
-///     Different reduction algorithms are implemented in different
-///     runtime functions, all calling 'shuffleReduceFn' to perform
-///     the essential reduction step.  Therefore, based on the 4th
-///     parameter, this function behaves slightly differently to
-///     cooperate with the runtime to ensure correctness under
-///     different circumstances.
-///
-///     'InterWarpCpyFn' is a pointer to a function that transfers
-///     reduced variables across warps.  It tunnels, through CUDA
-///     shared memory, the thread-private data of type 'ReduceData'
-///     from lane 0 of each warp to a lane in the first warp.
-/// 4. Call the OpenMP runtime on the GPU to reduce across teams.
-///    The last team writes the global reduced value to memory.
-///
-///     ret = __kmpc_nvptx_teams_reduce_nowait(...,
-///             reduceData, shuffleReduceFn, interWarpCpyFn,
-///             scratchpadCopyFn, loadAndReduceFn)
-///
-///     'scratchpadCopyFn' is a helper that stores reduced
-///     data from the team master to a scratchpad array in
-///     global memory.
-///
-///     'loadAndReduceFn' is a helper that loads data from
-///     the scratchpad array and reduces it with the input
-///     operand.
-///
-///     These compiler generated functions hide address
-///     calculation and alignment information from the runtime.
-/// 5. if ret == 1:
-///     The team master of the last team stores the reduced
-///     result to the globals in memory.
-///     foo += reduceData.foo; bar *= reduceData.bar
-///
-///
-/// Warp Reduction Algorithms
-///
-/// On the warp level, we have three algorithms implemented in the
-/// OpenMP runtime depending on the number of active lanes:
-///
-/// Full Warp Reduction
-///
-/// The reduce algorithm within a warp where all lanes are active
-/// is implemented in the runtime as follows:
-///
-/// full_warp_reduce(void *reduce_data,
-///                  kmp_ShuffleReductFctPtr ShuffleReduceFn) {
-///   for (int offset = WARPSIZE/2; offset > 0; offset /= 2)
-///     ShuffleReduceFn(reduce_data, 0, offset, 0);
-/// }
-///
-/// The algorithm completes in log(2, WARPSIZE) steps.
-///
-/// 'ShuffleReduceFn' is used here with lane_id set to 0 because it is
-/// not used therefore we save instructions by not retrieving lane_id
-/// from the corresponding special registers.  The 4th parameter, which
-/// represents the version of the algorithm being used, is set to 0 to
-/// signify full warp reduction.
-///
-/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
-///
-/// #reduce_elem refers to an element in the local lane's data structure
-/// #remote_elem is retrieved from a remote lane
-/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
-/// reduce_elem = reduce_elem REDUCE_OP remote_elem;
-///
-/// Contiguous Partial Warp Reduction
-///
-/// This reduce algorithm is used within a warp where only the first
-/// 'n' (n <= WARPSIZE) lanes are active.  It is typically used when the
-/// number of OpenMP threads in a parallel region is not a multiple of
-/// WARPSIZE.  The algorithm is implemented in the runtime as follows:
-///
-/// void
-/// contiguous_partial_reduce(void *reduce_data,
-///                           kmp_ShuffleReductFctPtr ShuffleReduceFn,
-///                           int size, int lane_id) {
-///   int curr_size;
-///   int offset;
-///   curr_size = size;
-///   mask = curr_size/2;
-///   while (offset>0) {
-///     ShuffleReduceFn(reduce_data, lane_id, offset, 1);
-///     curr_size = (curr_size+1)/2;
-///     offset = curr_size/2;
-///   }
-/// }
-///
-/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
-///
-/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
-/// if (lane_id < offset)
-///     reduce_elem = reduce_elem REDUCE_OP remote_elem
-/// else
-///     reduce_elem = remote_elem
-///
-/// This algorithm assumes that the data to be reduced are located in a
-/// contiguous subset of lanes starting from the first.  When there is
-/// an odd number of active lanes, the data in the last lane is not
-/// aggregated with any other lane's dat but is instead copied over.
-///
-/// Dispersed Partial Warp Reduction
-///
-/// This algorithm is used within a warp when any discontiguous subset of
-/// lanes are active.  It is used to implement the reduction operation
-/// across lanes in an OpenMP simd region or in a nested parallel region.
-///
-/// void
-/// dispersed_partial_reduce(void *reduce_data,
-///                          kmp_ShuffleReductFctPtr ShuffleReduceFn) {
-///   int size, remote_id;
-///   int logical_lane_id = number_of_active_lanes_before_me() * 2;
-///   do {
-///       remote_id = next_active_lane_id_right_after_me();
-///       # the above function returns 0 of no active lane
-///       # is present right after the current lane.
-///       size = number_of_active_lanes_in_this_warp();
-///       logical_lane_id /= 2;
-///       ShuffleReduceFn(reduce_data, logical_lane_id,
-///                       remote_id-1-threadIdx.x, 2);
-///   } while (logical_lane_id % 2 == 0 && size > 1);
-/// }
-///
-/// There is no assumption made about the initial state of the reduction.
-/// Any number of lanes (>=1) could be active at any position.  The reduction
-/// result is returned in the first active lane.
-///
-/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
-///
-/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
-/// if (lane_id % 2 == 0 && offset > 0)
-///     reduce_elem = reduce_elem REDUCE_OP remote_elem
-/// else
-///     reduce_elem = remote_elem
-///
-///
-/// Intra-Team Reduction
-///
-/// This function, as implemented in the runtime call
-/// '__kmpc_nvptx_parallel_reduce_nowait_v2', aggregates data across OpenMP
-/// threads in a team.  It first reduces within a warp using the
-/// aforementioned algorithms.  We then proceed to gather all such
-/// reduced values at the first warp.
-///
-/// The runtime makes use of the function 'InterWarpCpyFn', which copies
-/// data from each of the "warp master" (zeroth lane of each warp, where
-/// warp-reduced data is held) to the zeroth warp.  This step reduces (in
-/// a mathematical sense) the problem of reduction across warp masters in
-/// a block to the problem of warp reduction.
-///
-///
-/// Inter-Team Reduction
-///
-/// Once a team has reduced its data to a single value, it is stored in
-/// a global scratchpad array.  Since each team has a distinct slot, this
-/// can be done without locking.
-///
-/// The last team to write to the scratchpad array proceeds to reduce the
-/// scratchpad array.  One or more workers in the last team use the helper
-/// 'loadAndReduceDataFn' to load and reduce values from the array, i.e.,
-/// the k'th worker reduces every k'th element.
-///
-/// Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait_v2' to
-/// reduce across workers and compute a globally reduced value.
-///
-void CGOpenMPRuntimeNVPTX::emitReduction(
-    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
-    ArrayRef LHSExprs, ArrayRef RHSExprs,
-    ArrayRef ReductionOps, ReductionOptionsTy Options) {
-  if (!CGF.HaveInsertPoint())
-    return;
-
-  bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind);
-#ifndef NDEBUG
-  bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind);
-#endif
-
-  if (Options.SimpleReduction) {
-    assert(!TeamsReduction && !ParallelReduction &&
-           "Invalid reduction selection in emitReduction.");
-    CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
-                                   ReductionOps, Options);
-    return;
-  }
-
-  assert((TeamsReduction || ParallelReduction) &&
-         "Invalid reduction selection in emitReduction.");
-
-  // Build res = __kmpc_reduce{_nowait}(, , sizeof(RedList),
-  // RedList, shuffle_reduce_func, interwarp_copy_func);
-  // or
-  // Build res = __kmpc_reduce_teams_nowait_simple(, , );
-  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
-  llvm::Value *ThreadId = getThreadID(CGF, Loc);
-
-  llvm::Value *Res;
-  ASTContext &C = CGM.getContext();
-  // 1. Build a list of reduction variables.
-  // void *RedList[] = {[0], ..., [-1]};
-  auto Size = RHSExprs.size();
-  for (const Expr *E : Privates) {
-    if (E->getType()->isVariablyModifiedType())
-      // Reserve place for array size.
-      ++Size;
-  }
-  llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
-  QualType ReductionArrayTy =
-      C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
-                             /*IndexTypeQuals=*/0);
-  Address ReductionList =
-      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
-  auto IPriv = Privates.begin();
-  unsigned Idx = 0;
-  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
-    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
-    CGF.Builder.CreateStore(
-        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-            CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
-        Elem);
-    if ((*IPriv)->getType()->isVariablyModifiedType()) {
-      // Store array size.
-      ++Idx;
-      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
-      llvm::Value *Size = CGF.Builder.CreateIntCast(
-          CGF.getVLASize(
-                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
-              .NumElts,
-          CGF.SizeTy, /*isSigned=*/false);
-      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
-                              Elem);
-    }
-  }
-
-  llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      ReductionList.getPointer(), CGF.VoidPtrTy);
-  llvm::Function *ReductionFn = emitReductionFunction(
-      Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
-      LHSExprs, RHSExprs, ReductionOps);
-  llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
-  llvm::Function *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
-      CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
-  llvm::Value *InterWarpCopyFn =
-      emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc);
-
-  if (ParallelReduction) {
-    llvm::Value *Args[] = {RTLoc,
-                           ThreadId,
-                           CGF.Builder.getInt32(RHSExprs.size()),
-                           ReductionArrayTySize,
-                           RL,
-                           ShuffleAndReduceFn,
-                           InterWarpCopyFn};
-
-    Res = CGF.EmitRuntimeCall(
-        createNVPTXRuntimeFunction(
-            OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2),
-        Args);
-  } else {
-    assert(TeamsReduction && "expected teams reduction.");
-    llvm::SmallDenseMap VarFieldMap;
-    llvm::SmallVector PrivatesReductions(Privates.size());
-    int Cnt = 0;
-    for (const Expr *DRE : Privates) {
-      PrivatesReductions[Cnt] = cast(DRE)->getDecl();
-      ++Cnt;
-    }
-    const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars(
-        CGM.getContext(), PrivatesReductions, llvm::None, VarFieldMap,
-        C.getLangOpts().OpenMPCUDAReductionBufNum);
-    TeamsReductions.push_back(TeamReductionRec);
-    if (!KernelTeamsReductionPtr) {
-      KernelTeamsReductionPtr = new llvm::GlobalVariable(
-          CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/true,
-          llvm::GlobalValue::InternalLinkage, nullptr,
-          "_openmp_teams_reductions_buffer_$_$ptr");
-    }
-    llvm::Value *GlobalBufferPtr = CGF.EmitLoadOfScalar(
-        Address(KernelTeamsReductionPtr, CGM.getPointerAlign()),
-        /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
-    llvm::Value *GlobalToBufferCpyFn = ::emitListToGlobalCopyFunction(
-        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
-    llvm::Value *GlobalToBufferRedFn = ::emitListToGlobalReduceFunction(
-        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
-        ReductionFn);
-    llvm::Value *BufferToGlobalCpyFn = ::emitGlobalToListCopyFunction(
-        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
-    llvm::Value *BufferToGlobalRedFn = ::emitGlobalToListReduceFunction(
-        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
-        ReductionFn);
-
-    llvm::Value *Args[] = {
-        RTLoc,
-        ThreadId,
-        GlobalBufferPtr,
-        CGF.Builder.getInt32(C.getLangOpts().OpenMPCUDAReductionBufNum),
-        RL,
-        ShuffleAndReduceFn,
-        InterWarpCopyFn,
-        GlobalToBufferCpyFn,
-        GlobalToBufferRedFn,
-        BufferToGlobalCpyFn,
-        BufferToGlobalRedFn};
-
-    Res = CGF.EmitRuntimeCall(
-        createNVPTXRuntimeFunction(
-            OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2),
-        Args);
-  }
-
-  // 5. Build if (res == 1)
-  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.reduction.done");
-  llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.then");
-  llvm::Value *Cond = CGF.Builder.CreateICmpEQ(
-      Res, llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1));
-  CGF.Builder.CreateCondBr(Cond, ThenBB, ExitBB);
-
-  // 6. Build then branch: where we have reduced values in the master
-  //    thread in each team.
-  //    __kmpc_end_reduce{_nowait}();
-  //    break;
-  CGF.EmitBlock(ThenBB);
-
-  // Add emission of __kmpc_end_reduce{_nowait}();
-  auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps,
-                    this](CodeGenFunction &CGF, PrePostActionTy &Action) {
-    auto IPriv = Privates.begin();
-    auto ILHS = LHSExprs.begin();
-    auto IRHS = RHSExprs.begin();
-    for (const Expr *E : ReductionOps) {
-      emitSingleReductionCombiner(CGF, E, *IPriv, cast(*ILHS),
-                                  cast(*IRHS));
-      ++IPriv;
-      ++ILHS;
-      ++IRHS;
-    }
-  };
-  llvm::Value *EndArgs[] = {ThreadId};
-  RegionCodeGenTy RCG(CodeGen);
-  NVPTXActionTy Action(
-      nullptr, llvm::None,
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait),
-      EndArgs);
-  RCG.setAction(Action);
-  RCG(CGF);
-  // There is no need to emit line number for unconditional branch.
-  (void)ApplyDebugLocation::CreateEmpty(CGF);
-  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
-}
-
-const VarDecl *
-CGOpenMPRuntimeNVPTX::translateParameter(const FieldDecl *FD,
-                                         const VarDecl *NativeParam) const {
-  if (!NativeParam->getType()->isReferenceType())
-    return NativeParam;
-  QualType ArgType = NativeParam->getType();
-  QualifierCollector QC;
-  const Type *NonQualTy = QC.strip(ArgType);
-  QualType PointeeTy = cast(NonQualTy)->getPointeeType();
-  if (const auto *Attr = FD->getAttr()) {
-    if (Attr->getCaptureKind() == OMPC_map) {
-      PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy,
-                                                        LangAS::opencl_global);
-    } else if (Attr->getCaptureKind() == OMPC_firstprivate &&
-               PointeeTy.isConstant(CGM.getContext())) {
-      PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy,
-                                                        LangAS::opencl_generic);
-    }
-  }
-  ArgType = CGM.getContext().getPointerType(PointeeTy);
-  QC.addRestrict();
-  enum { NVPTX_local_addr = 5 };
-  QC.addAddressSpace(getLangASFromTargetAS(NVPTX_local_addr));
-  ArgType = QC.apply(CGM.getContext(), ArgType);
-  if (isa(NativeParam))
-    return ImplicitParamDecl::Create(
-        CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(),
-        NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other);
-  return ParmVarDecl::Create(
-      CGM.getContext(),
-      const_cast(NativeParam->getDeclContext()),
-      NativeParam->getBeginLoc(), NativeParam->getLocation(),
-      NativeParam->getIdentifier(), ArgType,
-      /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
-}
-
-Address
-CGOpenMPRuntimeNVPTX::getParameterAddress(CodeGenFunction &CGF,
-                                          const VarDecl *NativeParam,
-                                          const VarDecl *TargetParam) const {
-  assert(NativeParam != TargetParam &&
-         NativeParam->getType()->isReferenceType() &&
-         "Native arg must not be the same as target arg.");
-  Address LocalAddr = CGF.GetAddrOfLocalVar(TargetParam);
-  QualType NativeParamType = NativeParam->getType();
-  QualifierCollector QC;
-  const Type *NonQualTy = QC.strip(NativeParamType);
-  QualType NativePointeeTy = cast(NonQualTy)->getPointeeType();
-  unsigned NativePointeeAddrSpace =
-      CGF.getContext().getTargetAddressSpace(NativePointeeTy);
-  QualType TargetTy = TargetParam->getType();
-  llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(
-      LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation());
-  // First cast to generic.
-  TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
-                      /*AddrSpace=*/0));
-  // Cast from generic to native address space.
-  TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
-                      NativePointeeAddrSpace));
-  Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType);
-  CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, /*Volatile=*/false,
-                        NativeParamType);
-  return NativeParamAddr;
-}
-
-void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall(
-    CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
-    ArrayRef Args) const {
-  SmallVector TargetArgs;
-  TargetArgs.reserve(Args.size());
-  auto *FnType = OutlinedFn.getFunctionType();
-  for (unsigned I = 0, E = Args.size(); I < E; ++I) {
-    if (FnType->isVarArg() && FnType->getNumParams() <= I) {
-      TargetArgs.append(std::next(Args.begin(), I), Args.end());
-      break;
-    }
-    llvm::Type *TargetType = FnType->getParamType(I);
-    llvm::Value *NativeArg = Args[I];
-    if (!TargetType->isPointerTy()) {
-      TargetArgs.emplace_back(NativeArg);
-      continue;
-    }
-    llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-        NativeArg,
-        NativeArg->getType()->getPointerElementType()->getPointerTo());
-    TargetArgs.emplace_back(
-        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType));
-  }
-  CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs);
-}
-
-/// Emit function which wraps the outline parallel region
-/// and controls the arguments which are passed to this function.
-/// The wrapper ensures that the outlined function is called
-/// with the correct arguments when data is shared.
-llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
-    llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D) {
-  ASTContext &Ctx = CGM.getContext();
-  const auto &CS = *D.getCapturedStmt(OMPD_parallel);
-
-  // Create a function that takes as argument the source thread.
-  FunctionArgList WrapperArgs;
-  QualType Int16QTy =
-      Ctx.getIntTypeForBitwidth(/*DestWidth=*/16, /*Signed=*/false);
-  QualType Int32QTy =
-      Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false);
-  ImplicitParamDecl ParallelLevelArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(),
-                                     /*Id=*/nullptr, Int16QTy,
-                                     ImplicitParamDecl::Other);
-  ImplicitParamDecl WrapperArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(),
-                               /*Id=*/nullptr, Int32QTy,
-                               ImplicitParamDecl::Other);
-  WrapperArgs.emplace_back(&ParallelLevelArg);
-  WrapperArgs.emplace_back(&WrapperArg);
-
-  const CGFunctionInfo &CGFI =
-      CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, WrapperArgs);
-
-  auto *Fn = llvm::Function::Create(
-      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
-      Twine(OutlinedParallelFn->getName(), "_wrapper"), &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
-  Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
-  Fn->setDoesNotRecurse();
-
-  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
-  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs,
-                    D.getBeginLoc(), D.getBeginLoc());
-
-  const auto *RD = CS.getCapturedRecordDecl();
-  auto CurField = RD->field_begin();
-
-  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
-                                                      /*Name=*/".zero.addr");
-  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-  // Get the array of arguments.
-  SmallVector Args;
-
-  Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).getPointer());
-  Args.emplace_back(ZeroAddr.getPointer());
-
-  CGBuilderTy &Bld = CGF.Builder;
-  auto CI = CS.capture_begin();
-
-  // Use global memory for data sharing.
-  // Handle passing of global args to workers.
-  Address GlobalArgs =
-      CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args");
-  llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer();
-  llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables),
-      DataSharingArgs);
-
-  // Retrieve the shared variables from the list of references returned
-  // by the runtime. Pass the variables to the outlined function.
-  Address SharedArgListAddress = Address::invalid();
-  if (CS.capture_size() > 0 ||
-      isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) {
-    SharedArgListAddress = CGF.EmitLoadOfPointer(
-        GlobalArgs, CGF.getContext()
-                        .getPointerType(CGF.getContext().getPointerType(
-                            CGF.getContext().VoidPtrTy))
-                        .castAs());
-  }
-  unsigned Idx = 0;
-  if (isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) {
-    Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx);
-    Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
-        Src, CGF.SizeTy->getPointerTo());
-    llvm::Value *LB = CGF.EmitLoadOfScalar(
-        TypedAddress,
-        /*Volatile=*/false,
-        CGF.getContext().getPointerType(CGF.getContext().getSizeType()),
-        cast(D).getLowerBoundVariable()->getExprLoc());
-    Args.emplace_back(LB);
-    ++Idx;
-    Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx);
-    TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
-        Src, CGF.SizeTy->getPointerTo());
-    llvm::Value *UB = CGF.EmitLoadOfScalar(
-        TypedAddress,
-        /*Volatile=*/false,
-        CGF.getContext().getPointerType(CGF.getContext().getSizeType()),
-        cast(D).getUpperBoundVariable()->getExprLoc());
-    Args.emplace_back(UB);
-    ++Idx;
-  }
-  if (CS.capture_size() > 0) {
-    ASTContext &CGFContext = CGF.getContext();
-    for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) {
-      QualType ElemTy = CurField->getType();
-      Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx);
-      Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
-          Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy)));
-      llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress,
-                                              /*Volatile=*/false,
-                                              CGFContext.getPointerType(ElemTy),
-                                              CI->getLocation());
-      if (CI->capturesVariableByCopy() &&
-          !CI->getCapturedVar()->getType()->isAnyPointerType()) {
-        Arg = castValueToType(CGF, Arg, ElemTy, CGFContext.getUIntPtrType(),
-                              CI->getLocation());
-      }
-      Args.emplace_back(Arg);
-    }
-  }
-
-  emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedParallelFn, Args);
-  CGF.FinishFunction();
-  return Fn;
-}
-
-void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
-                                              const Decl *D) {
-  if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
-    return;
-
-  assert(D && "Expected function or captured|block decl.");
-  assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 &&
-         "Function is registered already.");
-  assert((!TeamAndReductions.first || TeamAndReductions.first == D) &&
-         "Team is set but not processed.");
-  const Stmt *Body = nullptr;
-  bool NeedToDelayGlobalization = false;
-  if (const auto *FD = dyn_cast(D)) {
-    Body = FD->getBody();
-  } else if (const auto *BD = dyn_cast(D)) {
-    Body = BD->getBody();
-  } else if (const auto *CD = dyn_cast(D)) {
-    Body = CD->getBody();
-    NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP;
-    if (NeedToDelayGlobalization &&
-        getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
-      return;
-  }
-  if (!Body)
-    return;
-  CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
-  VarChecker.Visit(Body);
-  const RecordDecl *GlobalizedVarsRecord =
-      VarChecker.getGlobalizedRecord(IsInTTDRegion);
-  TeamAndReductions.first = nullptr;
-  TeamAndReductions.second.clear();
-  ArrayRef EscapedVariableLengthDecls =
-      VarChecker.getEscapedVariableLengthDecls();
-  if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty())
-    return;
-  auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
-  I->getSecond().MappedParams =
-      std::make_unique();
-  I->getSecond().GlobalRecord = GlobalizedVarsRecord;
-  I->getSecond().EscapedParameters.insert(
-      VarChecker.getEscapedParameters().begin(),
-      VarChecker.getEscapedParameters().end());
-  I->getSecond().EscapedVariableLengthDecls.append(
-      EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());
-  DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
-  for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
-    assert(VD->isCanonicalDecl() && "Expected canonical declaration");
-    const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
-    Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion)));
-  }
-  if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {
-    CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None);
-    VarChecker.Visit(Body);
-    I->getSecond().SecondaryGlobalRecord =
-        VarChecker.getGlobalizedRecord(/*IsInTTDRegion=*/true);
-    I->getSecond().SecondaryLocalVarData.emplace();
-    DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue();
-    for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
-      assert(VD->isCanonicalDecl() && "Expected canonical declaration");
-      const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
-      Data.insert(
-          std::make_pair(VD, MappedVarData(FD, /*IsInTTDRegion=*/true)));
-    }
-  }
-  if (!NeedToDelayGlobalization) {
-    emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
-    struct GlobalizationScope final : EHScopeStack::Cleanup {
-      GlobalizationScope() = default;
-
-      void Emit(CodeGenFunction &CGF, Flags flags) override {
-        static_cast(CGF.CGM.getOpenMPRuntime())
-            .emitGenericVarsEpilog(CGF, /*WithSPMDCheck=*/true);
-      }
-    };
-    CGF.EHStack.pushCleanup(NormalAndEHCleanup);
-  }
-}
-
-Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
-                                                        const VarDecl *VD) {
-  if (VD && VD->hasAttr()) {
-    const auto *A = VD->getAttr();
-    switch (A->getAllocatorType()) {
-      // Use the default allocator here as by default local vars are
-      // threadlocal.
-    case OMPAllocateDeclAttr::OMPNullMemAlloc:
-    case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
-    case OMPAllocateDeclAttr::OMPThreadMemAlloc:
-    case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
-    case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
-      // Follow the user decision - use default allocation.
-      return Address::invalid();
-    case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
-      // TODO: implement aupport for user-defined allocators.
-      return Address::invalid();
-    case OMPAllocateDeclAttr::OMPConstMemAlloc: {
-      llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
-      auto *GV = new llvm::GlobalVariable(
-          CGM.getModule(), VarTy, /*isConstant=*/false,
-          llvm::GlobalValue::InternalLinkage,
-          llvm::Constant::getNullValue(VarTy), VD->getName(),
-          /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
-          CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant));
-      CharUnits Align = CGM.getContext().getDeclAlign(VD);
-      GV->setAlignment(Align.getAsAlign());
-      return Address(GV, Align);
-    }
-    case OMPAllocateDeclAttr::OMPPTeamMemAlloc: {
-      llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
-      auto *GV = new llvm::GlobalVariable(
-          CGM.getModule(), VarTy, /*isConstant=*/false,
-          llvm::GlobalValue::InternalLinkage,
-          llvm::Constant::getNullValue(VarTy), VD->getName(),
-          /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
-          CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
-      CharUnits Align = CGM.getContext().getDeclAlign(VD);
-      GV->setAlignment(Align.getAsAlign());
-      return Address(GV, Align);
-    }
-    case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
-    case OMPAllocateDeclAttr::OMPCGroupMemAlloc: {
-      llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
-      auto *GV = new llvm::GlobalVariable(
-          CGM.getModule(), VarTy, /*isConstant=*/false,
-          llvm::GlobalValue::InternalLinkage,
-          llvm::Constant::getNullValue(VarTy), VD->getName());
-      CharUnits Align = CGM.getContext().getDeclAlign(VD);
-      GV->setAlignment(Align.getAsAlign());
-      return Address(GV, Align);
-    }
-    }
-  }
-
-  if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
-    return Address::invalid();
-
-  VD = VD->getCanonicalDecl();
-  auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
-  if (I == FunctionGlobalizedDecls.end())
-    return Address::invalid();
-  auto VDI = I->getSecond().LocalVarData.find(VD);
-  if (VDI != I->getSecond().LocalVarData.end())
-    return VDI->second.PrivateAddr;
-  if (VD->hasAttrs()) {
-    for (specific_attr_iterator IT(VD->attr_begin()),
-         E(VD->attr_end());
-         IT != E; ++IT) {
-      auto VDI = I->getSecond().LocalVarData.find(
-          cast(cast(IT->getRef())->getDecl())
-              ->getCanonicalDecl());
-      if (VDI != I->getSecond().LocalVarData.end())
-        return VDI->second.PrivateAddr;
-    }
-  }
-
-  return Address::invalid();
-}
-
-void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) {
-  FunctionGlobalizedDecls.erase(CGF.CurFn);
-  CGOpenMPRuntime::functionFinished(CGF);
-}
-
-void CGOpenMPRuntimeNVPTX::getDefaultDistScheduleAndChunk(
-    CodeGenFunction &CGF, const OMPLoopDirective &S,
-    OpenMPDistScheduleClauseKind &ScheduleKind,
-    llvm::Value *&Chunk) const {
-  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
-    ScheduleKind = OMPC_DIST_SCHEDULE_static;
-    Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF),
-        CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
-        S.getIterationVariable()->getType(), S.getBeginLoc());
-    return;
-  }
-  CGOpenMPRuntime::getDefaultDistScheduleAndChunk(
-      CGF, S, ScheduleKind, Chunk);
-}
-
-void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk(
-    CodeGenFunction &CGF, const OMPLoopDirective &S,
-    OpenMPScheduleClauseKind &ScheduleKind,
-    const Expr *&ChunkExpr) const {
-  ScheduleKind = OMPC_SCHEDULE_static;
-  // Chunk size is 1 in this case.
-  llvm::APInt ChunkSize(32, 1);
-  ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize,
-      CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
-      SourceLocation());
-}
-
-void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas(
-    CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
-  assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
-         " Expected target-based directive.");
-  const CapturedStmt *CS = D.getCapturedStmt(OMPD_target);
-  for (const CapturedStmt::Capture &C : CS->captures()) {
-    // Capture variables captured by reference in lambdas for target-based
-    // directives.
-    if (!C.capturesVariable())
-      continue;
-    const VarDecl *VD = C.getCapturedVar();
-    const auto *RD = VD->getType()
-                         .getCanonicalType()
-                         .getNonReferenceType()
-                         ->getAsCXXRecordDecl();
-    if (!RD || !RD->isLambda())
-      continue;
-    Address VDAddr = CGF.GetAddrOfLocalVar(VD);
-    LValue VDLVal;
-    if (VD->getType().getCanonicalType()->isReferenceType())
-      VDLVal = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType());
-    else
-      VDLVal = CGF.MakeAddrLValue(
-          VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
-    llvm::DenseMap Captures;
-    FieldDecl *ThisCapture = nullptr;
-    RD->getCaptureFields(Captures, ThisCapture);
-    if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) {
-      LValue ThisLVal =
-          CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
-      llvm::Value *CXXThis = CGF.LoadCXXThis();
-      CGF.EmitStoreOfScalar(CXXThis, ThisLVal);
-    }
-    for (const LambdaCapture &LC : RD->captures()) {
-      if (LC.getCaptureKind() != LCK_ByRef)
-        continue;
-      const VarDecl *VD = LC.getCapturedVar();
-      if (!CS->capturesVariable(VD))
-        continue;
-      auto It = Captures.find(VD);
-      assert(It != Captures.end() && "Found lambda capture without field.");
-      LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
-      Address VDAddr = CGF.GetAddrOfLocalVar(VD);
-      if (VD->getType().getCanonicalType()->isReferenceType())
-        VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr,
-                                               VD->getType().getCanonicalType())
-                     .getAddress(CGF);
-      CGF.EmitStoreOfScalar(VDAddr.getPointer(), VarLVal);
-    }
-  }
-}
-
-unsigned CGOpenMPRuntimeNVPTX::getDefaultFirstprivateAddressSpace() const {
-  return CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant);
-}
-
-bool CGOpenMPRuntimeNVPTX::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
-                                                            LangAS &AS) {
-  if (!VD || !VD->hasAttr())
-    return false;
-  const auto *A = VD->getAttr();
-  switch(A->getAllocatorType()) {
-  case OMPAllocateDeclAttr::OMPNullMemAlloc:
-  case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
-  // Not supported, fallback to the default mem space.
-  case OMPAllocateDeclAttr::OMPThreadMemAlloc:
-  case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
-  case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
-  case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
-  case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
-    AS = LangAS::Default;
-    return true;
-  case OMPAllocateDeclAttr::OMPConstMemAlloc:
-    AS = LangAS::cuda_constant;
-    return true;
-  case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
-    AS = LangAS::cuda_shared;
-    return true;
-  case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
-    llvm_unreachable("Expected predefined allocator for the variables with the "
-                     "static storage.");
-  }
-  return false;
-}
-
-// Get current CudaArch and ignore any unknown values
-static CudaArch getCudaArch(CodeGenModule &CGM) {
-  if (!CGM.getTarget().hasFeature("ptx"))
-    return CudaArch::UNKNOWN;
-  llvm::StringMap Features;
-  CGM.getTarget().initFeatureMap(Features, CGM.getDiags(),
-                                 CGM.getTarget().getTargetOpts().CPU,
-                                 CGM.getTarget().getTargetOpts().Features);
-  for (const auto &Feature : Features) {
-    if (Feature.getValue()) {
-      CudaArch Arch = StringToCudaArch(Feature.getKey());
-      if (Arch != CudaArch::UNKNOWN)
-        return Arch;
-    }
-  }
-  return CudaArch::UNKNOWN;
-}
-
-/// Check to see if target architecture supports unified addressing which is
-/// a restriction for OpenMP requires clause "unified_shared_memory".
-void CGOpenMPRuntimeNVPTX::processRequiresDirective(
-    const OMPRequiresDecl *D) {
-  for (const OMPClause *Clause : D->clauselists()) {
-    if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
-      CudaArch Arch = getCudaArch(CGM);
-      switch (Arch) {
-      case CudaArch::SM_20:
-      case CudaArch::SM_21:
-      case CudaArch::SM_30:
-      case CudaArch::SM_32:
-      case CudaArch::SM_35:
-      case CudaArch::SM_37:
-      case CudaArch::SM_50:
-      case CudaArch::SM_52:
-      case CudaArch::SM_53:
-      case CudaArch::SM_60:
-      case CudaArch::SM_61:
-      case CudaArch::SM_62: {
-        SmallString<256> Buffer;
-        llvm::raw_svector_ostream Out(Buffer);
-        Out << "Target architecture " << CudaArchToString(Arch)
-            << " does not support unified addressing";
-        CGM.Error(Clause->getBeginLoc(), Out.str());
-        return;
-      }
-      case CudaArch::SM_70:
-      case CudaArch::SM_72:
-      case CudaArch::SM_75:
-      case CudaArch::SM_80:
-      case CudaArch::GFX600:
-      case CudaArch::GFX601:
-      case CudaArch::GFX700:
-      case CudaArch::GFX701:
-      case CudaArch::GFX702:
-      case CudaArch::GFX703:
-      case CudaArch::GFX704:
-      case CudaArch::GFX801:
-      case CudaArch::GFX802:
-      case CudaArch::GFX803:
-      case CudaArch::GFX810:
-      case CudaArch::GFX900:
-      case CudaArch::GFX902:
-      case CudaArch::GFX904:
-      case CudaArch::GFX906:
-      case CudaArch::GFX908:
-      case CudaArch::GFX909:
-      case CudaArch::GFX1010:
-      case CudaArch::GFX1011:
-      case CudaArch::GFX1012:
-      case CudaArch::GFX1030:
-      case CudaArch::UNKNOWN:
-        break;
-      case CudaArch::LAST:
-        llvm_unreachable("Unexpected Cuda arch.");
-      }
-    }
-  }
-  CGOpenMPRuntime::processRequiresDirective(D);
-}
-
-/// Get number of SMs and number of blocks per SM.
-static std::pair getSMsBlocksPerSM(CodeGenModule &CGM) {
-  std::pair Data;
-  if (CGM.getLangOpts().OpenMPCUDANumSMs)
-    Data.first = CGM.getLangOpts().OpenMPCUDANumSMs;
-  if (CGM.getLangOpts().OpenMPCUDABlocksPerSM)
-    Data.second = CGM.getLangOpts().OpenMPCUDABlocksPerSM;
-  if (Data.first && Data.second)
-    return Data;
-  switch (getCudaArch(CGM)) {
-  case CudaArch::SM_20:
-  case CudaArch::SM_21:
-  case CudaArch::SM_30:
-  case CudaArch::SM_32:
-  case CudaArch::SM_35:
-  case CudaArch::SM_37:
-  case CudaArch::SM_50:
-  case CudaArch::SM_52:
-  case CudaArch::SM_53:
-    return {16, 16};
-  case CudaArch::SM_60:
-  case CudaArch::SM_61:
-  case CudaArch::SM_62:
-    return {56, 32};
-  case CudaArch::SM_70:
-  case CudaArch::SM_72:
-  case CudaArch::SM_75:
-  case CudaArch::SM_80:
-    return {84, 32};
-  case CudaArch::GFX600:
-  case CudaArch::GFX601:
-  case CudaArch::GFX700:
-  case CudaArch::GFX701:
-  case CudaArch::GFX702:
-  case CudaArch::GFX703:
-  case CudaArch::GFX704:
-  case CudaArch::GFX801:
-  case CudaArch::GFX802:
-  case CudaArch::GFX803:
-  case CudaArch::GFX810:
-  case CudaArch::GFX900:
-  case CudaArch::GFX902:
-  case CudaArch::GFX904:
-  case CudaArch::GFX906:
-  case CudaArch::GFX908:
-  case CudaArch::GFX909:
-  case CudaArch::GFX1010:
-  case CudaArch::GFX1011:
-  case CudaArch::GFX1012:
-  case CudaArch::GFX1030:
-  case CudaArch::UNKNOWN:
-    break;
-  case CudaArch::LAST:
-    llvm_unreachable("Unexpected Cuda arch.");
-  }
-  llvm_unreachable("Unexpected NVPTX target without ptx feature.");
-}
-
-void CGOpenMPRuntimeNVPTX::clear() {
-  if (!GlobalizedRecords.empty() &&
-      !CGM.getLangOpts().OpenMPCUDATargetParallel) {
-    ASTContext &C = CGM.getContext();
-    llvm::SmallVector GlobalRecs;
-    llvm::SmallVector SharedRecs;
-    RecordDecl *StaticRD = C.buildImplicitRecord(
-        "_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
-    StaticRD->startDefinition();
-    RecordDecl *SharedStaticRD = C.buildImplicitRecord(
-        "_shared_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
-    SharedStaticRD->startDefinition();
-    for (const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) {
-      if (Records.Records.empty())
-        continue;
-      unsigned Size = 0;
-      unsigned RecAlignment = 0;
-      for (const RecordDecl *RD : Records.Records) {
-        QualType RDTy = C.getRecordType(RD);
-        unsigned Alignment = C.getTypeAlignInChars(RDTy).getQuantity();
-        RecAlignment = std::max(RecAlignment, Alignment);
-        unsigned RecSize = C.getTypeSizeInChars(RDTy).getQuantity();
-        Size =
-            llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment);
-      }
-      Size = llvm::alignTo(Size, RecAlignment);
-      llvm::APInt ArySize(/*numBits=*/64, Size);
-      QualType SubTy = C.getConstantArrayType(
-          C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
-      const bool UseSharedMemory = Size <= SharedMemorySize;
-      auto *Field =
-          FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD,
-                            SourceLocation(), SourceLocation(), nullptr, SubTy,
-                            C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
-                            /*BW=*/nullptr, /*Mutable=*/false,
-                            /*InitStyle=*/ICIS_NoInit);
-      Field->setAccess(AS_public);
-      if (UseSharedMemory) {
-        SharedStaticRD->addDecl(Field);
-        SharedRecs.push_back(&Records);
-      } else {
-        StaticRD->addDecl(Field);
-        GlobalRecs.push_back(&Records);
-      }
-      Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.SizeTy, Size));
-      Records.UseSharedMemory->setInitializer(
-          llvm::ConstantInt::get(CGM.Int16Ty, UseSharedMemory ? 1 : 0));
-    }
-    // Allocate SharedMemorySize buffer for the shared memory.
-    // FIXME: nvlink does not handle weak linkage correctly (object with the
-    // different size are reported as erroneous).
-    // Restore this code as sson as nvlink is fixed.
-    if (!SharedStaticRD->field_empty()) {
-      llvm::APInt ArySize(/*numBits=*/64, SharedMemorySize);
-      QualType SubTy = C.getConstantArrayType(
-          C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
-      auto *Field = FieldDecl::Create(
-          C, SharedStaticRD, SourceLocation(), SourceLocation(), nullptr, SubTy,
-          C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
-          /*BW=*/nullptr, /*Mutable=*/false,
-          /*InitStyle=*/ICIS_NoInit);
-      Field->setAccess(AS_public);
-      SharedStaticRD->addDecl(Field);
-    }
-    SharedStaticRD->completeDefinition();
-    if (!SharedStaticRD->field_empty()) {
-      QualType StaticTy = C.getRecordType(SharedStaticRD);
-      llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy);
-      auto *GV = new llvm::GlobalVariable(
-          CGM.getModule(), LLVMStaticTy,
-          /*isConstant=*/false, llvm::GlobalValue::CommonLinkage,
-          llvm::Constant::getNullValue(LLVMStaticTy),
-          "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr,
-          llvm::GlobalValue::NotThreadLocal,
-          C.getTargetAddressSpace(LangAS::cuda_shared));
-      auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-          GV, CGM.VoidPtrTy);
-      for (const GlobalPtrSizeRecsTy *Rec : SharedRecs) {
-        Rec->Buffer->replaceAllUsesWith(Replacement);
-        Rec->Buffer->eraseFromParent();
-      }
-    }
-    StaticRD->completeDefinition();
-    if (!StaticRD->field_empty()) {
-      QualType StaticTy = C.getRecordType(StaticRD);
-      std::pair SMsBlockPerSM = getSMsBlocksPerSM(CGM);
-      llvm::APInt Size1(32, SMsBlockPerSM.second);
-      QualType Arr1Ty =
-          C.getConstantArrayType(StaticTy, Size1, nullptr, ArrayType::Normal,
-                                 /*IndexTypeQuals=*/0);
-      llvm::APInt Size2(32, SMsBlockPerSM.first);
-      QualType Arr2Ty =
-          C.getConstantArrayType(Arr1Ty, Size2, nullptr, ArrayType::Normal,
-                                 /*IndexTypeQuals=*/0);
-      llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty);
-      // FIXME: nvlink does not handle weak linkage correctly (object with the
-      // different size are reported as erroneous).
-      // Restore CommonLinkage as soon as nvlink is fixed.
-      auto *GV = new llvm::GlobalVariable(
-          CGM.getModule(), LLVMArr2Ty,
-          /*isConstant=*/false, llvm::GlobalValue::InternalLinkage,
-          llvm::Constant::getNullValue(LLVMArr2Ty),
-          "_openmp_static_glob_rd_$_");
-      auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-          GV, CGM.VoidPtrTy);
-      for (const GlobalPtrSizeRecsTy *Rec : GlobalRecs) {
-        Rec->Buffer->replaceAllUsesWith(Replacement);
-        Rec->Buffer->eraseFromParent();
-      }
-    }
-  }
-  if (!TeamsReductions.empty()) {
-    ASTContext &C = CGM.getContext();
-    RecordDecl *StaticRD = C.buildImplicitRecord(
-        "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::TTK_Union);
-    StaticRD->startDefinition();
-    for (const RecordDecl *TeamReductionRec : TeamsReductions) {
-      QualType RecTy = C.getRecordType(TeamReductionRec);
-      auto *Field = FieldDecl::Create(
-          C, StaticRD, SourceLocation(), SourceLocation(), nullptr, RecTy,
-          C.getTrivialTypeSourceInfo(RecTy, SourceLocation()),
-          /*BW=*/nullptr, /*Mutable=*/false,
-          /*InitStyle=*/ICIS_NoInit);
-      Field->setAccess(AS_public);
-      StaticRD->addDecl(Field);
-    }
-    StaticRD->completeDefinition();
-    QualType StaticTy = C.getRecordType(StaticRD);
-    llvm::Type *LLVMReductionsBufferTy =
-        CGM.getTypes().ConvertTypeForMem(StaticTy);
-    // FIXME: nvlink does not handle weak linkage correctly (object with the
-    // different size are reported as erroneous).
-    // Restore CommonLinkage as soon as nvlink is fixed.
-    auto *GV = new llvm::GlobalVariable(
-        CGM.getModule(), LLVMReductionsBufferTy,
-        /*isConstant=*/false, llvm::GlobalValue::InternalLinkage,
-        llvm::Constant::getNullValue(LLVMReductionsBufferTy),
-        "_openmp_teams_reductions_buffer_$_");
-    KernelTeamsReductionPtr->setInitializer(
-        llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV,
-                                                             CGM.VoidPtrTy));
-  }
-  CGOpenMPRuntime::clear();
-}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index c52ae43817c75..40679f9143b96 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This provides a class for OpenMP runtime code generation specialized to NVPTX
-// targets.
+// targets from generalized CGOpenMPRuntimeGPU class.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,473 +15,18 @@
 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
 
 #include "CGOpenMPRuntime.h"
+#include "CGOpenMPRuntimeGPU.h"
 #include "CodeGenFunction.h"
 #include "clang/AST/StmtOpenMP.h"
 
 namespace clang {
 namespace CodeGen {
 
-class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
-public:
-  /// Defines the execution mode.
-  enum ExecutionMode {
-    /// SPMD execution mode (all threads are worker threads).
-    EM_SPMD,
-    /// Non-SPMD execution mode (1 master thread, others are workers).
-    EM_NonSPMD,
-    /// Unknown execution mode (orphaned directive).
-    EM_Unknown,
-  };
-private:
-  /// Parallel outlined function work for workers to execute.
-  llvm::SmallVector Work;
-
-  struct EntryFunctionState {
-    llvm::BasicBlock *ExitBB = nullptr;
-  };
-
-  class WorkerFunctionState {
-  public:
-    llvm::Function *WorkerFn;
-    const CGFunctionInfo &CGFI;
-    SourceLocation Loc;
-
-    WorkerFunctionState(CodeGenModule &CGM, SourceLocation Loc);
-
-  private:
-    void createWorkerFunction(CodeGenModule &CGM);
-  };
-
-  ExecutionMode getExecutionMode() const;
-
-  bool requiresFullRuntime() const { return RequiresFullRuntime; }
-
-  /// Get barrier to synchronize all threads in a block.
-  void syncCTAThreads(CodeGenFunction &CGF);
-
-  /// Emit the worker function for the current target region.
-  void emitWorkerFunction(WorkerFunctionState &WST);
-
-  /// Helper for worker function. Emit body of worker loop.
-  void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
-
-  /// Helper for non-SPMD target entry function. Guide the master and
-  /// worker threads to their respective locations.
-  void emitNonSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
-                              WorkerFunctionState &WST);
-
-  /// Signal termination of OMP execution for non-SPMD target entry
-  /// function.
-  void emitNonSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
-
-  /// Helper for generic variables globalization prolog.
-  void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc,
-                             bool WithSPMDCheck = false);
-
-  /// Helper for generic variables globalization epilog.
-  void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false);
-
-  /// Helper for SPMD mode target directive's entry function.
-  void emitSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
-                           const OMPExecutableDirective &D);
-
-  /// Signal termination of SPMD mode execution.
-  void emitSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
-
-  //
-  // Base class overrides.
-  //
-
-  /// Creates offloading entry for the provided entry ID \a ID,
-  /// address \a Addr, size \a Size, and flags \a Flags.
-  void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
-                          uint64_t Size, int32_t Flags,
-                          llvm::GlobalValue::LinkageTypes Linkage) override;
-
-  /// Emit outlined function specialized for the Fork-Join
-  /// programming model for applicable target directives on the NVPTX device.
-  /// \param D Directive to emit.
-  /// \param ParentName Name of the function that encloses the target region.
-  /// \param OutlinedFn Outlined function value to be defined by this call.
-  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
-  /// \param IsOffloadEntry True if the outlined function is an offload entry.
-  /// An outlined function may not be an entry if, e.g. the if clause always
-  /// evaluates to false.
-  void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
-                         llvm::Function *&OutlinedFn,
-                         llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
-                         const RegionCodeGenTy &CodeGen);
-
-  /// Emit outlined function specialized for the Single Program
-  /// Multiple Data programming model for applicable target directives on the
-  /// NVPTX device.
-  /// \param D Directive to emit.
-  /// \param ParentName Name of the function that encloses the target region.
-  /// \param OutlinedFn Outlined function value to be defined by this call.
-  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
-  /// \param IsOffloadEntry True if the outlined function is an offload entry.
-  /// \param CodeGen Object containing the target statements.
-  /// An outlined function may not be an entry if, e.g. the if clause always
-  /// evaluates to false.
-  void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
-                      llvm::Function *&OutlinedFn,
-                      llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
-                      const RegionCodeGenTy &CodeGen);
-
-  /// Emit outlined function for 'target' directive on the NVPTX
-  /// device.
-  /// \param D Directive to emit.
-  /// \param ParentName Name of the function that encloses the target region.
-  /// \param OutlinedFn Outlined function value to be defined by this call.
-  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
-  /// \param IsOffloadEntry True if the outlined function is an offload entry.
-  /// An outlined function may not be an entry if, e.g. the if clause always
-  /// evaluates to false.
-  void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
-                                  StringRef ParentName,
-                                  llvm::Function *&OutlinedFn,
-                                  llvm::Constant *&OutlinedFnID,
-                                  bool IsOffloadEntry,
-                                  const RegionCodeGenTy &CodeGen) override;
-
-  /// Emits code for parallel or serial call of the \a OutlinedFn with
-  /// variables captured in a record which address is stored in \a
-  /// CapturedStruct.
-  /// This call is for the Non-SPMD Execution Mode.
-  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
-  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
-  /// \param CapturedVars A pointer to the record with the references to
-  /// variables used in \a OutlinedFn function.
-  /// \param IfCond Condition in the associated 'if' clause, if it was
-  /// specified, nullptr otherwise.
-  void emitNonSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
-                               llvm::Value *OutlinedFn,
-                               ArrayRef CapturedVars,
-                               const Expr *IfCond);
-
-  /// Emits code for parallel or serial call of the \a OutlinedFn with
-  /// variables captured in a record which address is stored in \a
-  /// CapturedStruct.
-  /// This call is for a parallel directive within an SPMD target directive.
-  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
-  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
-  /// \param CapturedVars A pointer to the record with the references to
-  /// variables used in \a OutlinedFn function.
-  /// \param IfCond Condition in the associated 'if' clause, if it was
-  /// specified, nullptr otherwise.
-  ///
-  void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
-                            llvm::Function *OutlinedFn,
-                            ArrayRef CapturedVars,
-                            const Expr *IfCond);
-
-protected:
-  /// Get the function name of an outlined region.
-  //  The name can be customized depending on the target.
-  //
-  StringRef getOutlinedHelperName() const override {
-    return "__omp_outlined__";
-  }
-
-  /// Check if the default location must be constant.
-  /// Constant for NVPTX for better optimization.
-  bool isDefaultLocationConstant() const override { return true; }
-
-  /// Returns additional flags that can be stored in reserved_2 field of the
-  /// default location.
-  /// For NVPTX target contains data about SPMD/Non-SPMD execution mode +
-  /// Full/Lightweight runtime mode. Used for better optimization.
-  unsigned getDefaultLocationReserved2Flags() const override;
+class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntimeGPU {
 
 public:
   explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
-  void clear() override;
-
-  /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
-  /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
-  virtual void emitProcBindClause(CodeGenFunction &CGF,
-                                  llvm::omp::ProcBindKind ProcBind,
-                                  SourceLocation Loc) override;
-
-  /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
-  /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
-  /// clause.
-  /// \param NumThreads An integer value of threads.
-  virtual void emitNumThreadsClause(CodeGenFunction &CGF,
-                                    llvm::Value *NumThreads,
-                                    SourceLocation Loc) override;
-
-  /// This function ought to emit, in the general case, a call to
-  // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
-  // as these numbers are obtained through the PTX grid and block configuration.
-  /// \param NumTeams An integer expression of teams.
-  /// \param ThreadLimit An integer expression of threads.
-  void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
-                          const Expr *ThreadLimit, SourceLocation Loc) override;
-
-  /// Emits inlined function for the specified OpenMP parallel
-  //  directive.
-  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
-  /// kmp_int32 BoundID, struct context_vars*).
-  /// \param D OpenMP directive.
-  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
-  /// \param InnermostKind Kind of innermost directive (for simple directives it
-  /// is a directive itself, for combined - its innermost directive).
-  /// \param CodeGen Code generation sequence for the \a D directive.
-  llvm::Function *
-  emitParallelOutlinedFunction(const OMPExecutableDirective &D,
-                               const VarDecl *ThreadIDVar,
-                               OpenMPDirectiveKind InnermostKind,
-                               const RegionCodeGenTy &CodeGen) override;
-
-  /// Emits inlined function for the specified OpenMP teams
-  //  directive.
-  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
-  /// kmp_int32 BoundID, struct context_vars*).
-  /// \param D OpenMP directive.
-  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
-  /// \param InnermostKind Kind of innermost directive (for simple directives it
-  /// is a directive itself, for combined - its innermost directive).
-  /// \param CodeGen Code generation sequence for the \a D directive.
-  llvm::Function *
-  emitTeamsOutlinedFunction(const OMPExecutableDirective &D,
-                            const VarDecl *ThreadIDVar,
-                            OpenMPDirectiveKind InnermostKind,
-                            const RegionCodeGenTy &CodeGen) override;
-
-  /// Emits code for teams call of the \a OutlinedFn with
-  /// variables captured in a record which address is stored in \a
-  /// CapturedStruct.
-  /// \param OutlinedFn Outlined function to be run by team masters. Type of
-  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
-  /// \param CapturedVars A pointer to the record with the references to
-  /// variables used in \a OutlinedFn function.
-  ///
-  void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
-                     SourceLocation Loc, llvm::Function *OutlinedFn,
-                     ArrayRef CapturedVars) override;
-
-  /// Emits code for parallel or serial call of the \a OutlinedFn with
-  /// variables captured in a record which address is stored in \a
-  /// CapturedStruct.
-  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
-  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
-  /// \param CapturedVars A pointer to the record with the references to
-  /// variables used in \a OutlinedFn function.
-  /// \param IfCond Condition in the associated 'if' clause, if it was
-  /// specified, nullptr otherwise.
-  void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
-                        llvm::Function *OutlinedFn,
-                        ArrayRef CapturedVars,
-                        const Expr *IfCond) override;
-
-  /// Emit an implicit/explicit barrier for OpenMP threads.
-  /// \param Kind Directive for which this implicit barrier call must be
-  /// generated. Must be OMPD_barrier for explicit barrier generation.
-  /// \param EmitChecks true if need to emit checks for cancellation barriers.
-  /// \param ForceSimpleCall true simple barrier call must be emitted, false if
-  /// runtime class decides which one to emit (simple or with cancellation
-  /// checks).
-  ///
-  void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
-                       OpenMPDirectiveKind Kind, bool EmitChecks = true,
-                       bool ForceSimpleCall = false) override;
-
-  /// Emits a critical region.
-  /// \param CriticalName Name of the critical region.
-  /// \param CriticalOpGen Generator for the statement associated with the given
-  /// critical region.
-  /// \param Hint Value of the 'hint' clause (optional).
-  void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
-                          const RegionCodeGenTy &CriticalOpGen,
-                          SourceLocation Loc,
-                          const Expr *Hint = nullptr) override;
-
-  /// Emit a code for reduction clause.
-  ///
-  /// \param Privates List of private copies for original reduction arguments.
-  /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
-  /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
-  /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
-  /// or 'operator binop(LHS, RHS)'.
-  /// \param Options List of options for reduction codegen:
-  ///     WithNowait true if parent directive has also nowait clause, false
-  ///     otherwise.
-  ///     SimpleReduction Emit reduction operation only. Used for omp simd
-  ///     directive on the host.
-  ///     ReductionKind The kind of reduction to perform.
-  virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
-                             ArrayRef Privates,
-                             ArrayRef LHSExprs,
-                             ArrayRef RHSExprs,
-                             ArrayRef ReductionOps,
-                             ReductionOptionsTy Options) override;
-
-  /// Returns specified OpenMP runtime function for the current OpenMP
-  /// implementation.  Specialized for the NVPTX device.
-  /// \param Function OpenMP runtime function.
-  /// \return Specified function.
-  llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function);
-
-  /// Translates the native parameter of outlined function if this is required
-  /// for target.
-  /// \param FD Field decl from captured record for the parameter.
-  /// \param NativeParam Parameter itself.
-  const VarDecl *translateParameter(const FieldDecl *FD,
-                                    const VarDecl *NativeParam) const override;
-
-  /// Gets the address of the native argument basing on the address of the
-  /// target-specific parameter.
-  /// \param NativeParam Parameter itself.
-  /// \param TargetParam Corresponding target-specific parameter.
-  Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam,
-                              const VarDecl *TargetParam) const override;
-
-  /// Emits call of the outlined function with the provided arguments,
-  /// translating these arguments to correct target-specific arguments.
-  void emitOutlinedFunctionCall(
-      CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
-      ArrayRef Args = llvm::None) const override;
-
-  /// Emits OpenMP-specific function prolog.
-  /// Required for device constructs.
-  void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override;
-
-  /// Gets the OpenMP-specific address of the local variable.
-  Address getAddressOfLocalVariable(CodeGenFunction &CGF,
-                                    const VarDecl *VD) override;
-
-  /// Target codegen is specialized based on two data-sharing modes: CUDA, in
-  /// which the local variables are actually global threadlocal, and Generic, in
-  /// which the local variables are placed in global memory if they may escape
-  /// their declaration context.
-  enum DataSharingMode {
-    /// CUDA data sharing mode.
-    CUDA,
-    /// Generic data-sharing mode.
-    Generic,
-  };
-
-  /// Cleans up references to the objects in finished function.
-  ///
-  void functionFinished(CodeGenFunction &CGF) override;
-
-  /// Choose a default value for the dist_schedule clause.
-  void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
-      const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
-      llvm::Value *&Chunk) const override;
-
-  /// Choose a default value for the schedule clause.
-  void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
-      const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
-      const Expr *&ChunkExpr) const override;
-
-  /// Adjust some parameters for the target-based directives, like addresses of
-  /// the variables captured by reference in lambdas.
-  void adjustTargetSpecificDataForLambdas(
-      CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
-
-  /// Perform check on requires decl to ensure that target architecture
-  /// supports unified addressing
-  void processRequiresDirective(const OMPRequiresDecl *D) override;
-
-  /// Returns default address space for the constant firstprivates, __constant__
-  /// address space by default.
-  unsigned getDefaultFirstprivateAddressSpace() const override;
-
-  /// Checks if the variable has associated OMPAllocateDeclAttr attribute with
-  /// the predefined allocator and translates it into the corresponding address
-  /// space.
-  bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override;
-
-private:
-  /// Track the execution mode when codegening directives within a target
-  /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
-  /// target region and used by containing directives such as 'parallel'
-  /// to emit optimized code.
-  ExecutionMode CurrentExecutionMode = EM_Unknown;
-
-  /// Check if the full runtime is required (default - yes).
-  bool RequiresFullRuntime = true;
-
-  /// true if we're emitting the code for the target region and next parallel
-  /// region is L0 for sure.
-  bool IsInTargetMasterThreadRegion = false;
-  /// true if currently emitting code for target/teams/distribute region, false
-  /// - otherwise.
-  bool IsInTTDRegion = false;
-  /// true if we're definitely in the parallel region.
-  bool IsInParallelRegion = false;
-
-  /// Map between an outlined function and its wrapper.
-  llvm::DenseMap WrapperFunctionsMap;
-
-  /// Emit function which wraps the outline parallel region
-  /// and controls the parameters which are passed to this function.
-  /// The wrapper ensures that the outlined function is called
-  /// with the correct arguments when data is shared.
-  llvm::Function *createParallelDataSharingWrapper(
-      llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D);
-
-  /// The data for the single globalized variable.
-  struct MappedVarData {
-    /// Corresponding field in the global record.
-    const FieldDecl *FD = nullptr;
-    /// Corresponding address.
-    Address PrivateAddr = Address::invalid();
-    /// true, if only one element is required (for latprivates in SPMD mode),
-    /// false, if need to create based on the warp-size.
-    bool IsOnePerTeam = false;
-    MappedVarData() = delete;
-    MappedVarData(const FieldDecl *FD, bool IsOnePerTeam = false)
-        : FD(FD), IsOnePerTeam(IsOnePerTeam) {}
-  };
-  /// The map of local variables to their addresses in the global memory.
-  using DeclToAddrMapTy = llvm::MapVector;
-  /// Set of the parameters passed by value escaping OpenMP context.
-  using EscapedParamsTy = llvm::SmallPtrSet;
-  struct FunctionData {
-    DeclToAddrMapTy LocalVarData;
-    llvm::Optional SecondaryLocalVarData = llvm::None;
-    EscapedParamsTy EscapedParameters;
-    llvm::SmallVector EscapedVariableLengthDecls;
-    llvm::SmallVector EscapedVariableLengthDeclsAddrs;
-    const RecordDecl *GlobalRecord = nullptr;
-    llvm::Optional SecondaryGlobalRecord = llvm::None;
-    llvm::Value *GlobalRecordAddr = nullptr;
-    llvm::Value *IsInSPMDModeFlag = nullptr;
-    std::unique_ptr MappedParams;
-  };
-  /// Maps the function to the list of the globalized variables with their
-  /// addresses.
-  llvm::SmallDenseMap FunctionGlobalizedDecls;
-  /// List of records for the globalized variables in target/teams/distribute
-  /// contexts. Inner records are going to be joined into the single record,
-  /// while those resulting records are going to be joined into the single
-  /// union. This resulting union (one per CU) is the entry point for the static
-  /// memory management runtime functions.
-  struct GlobalPtrSizeRecsTy {
-    llvm::GlobalVariable *UseSharedMemory = nullptr;
-    llvm::GlobalVariable *RecSize = nullptr;
-    llvm::GlobalVariable *Buffer = nullptr;
-    SourceLocation Loc;
-    llvm::SmallVector Records;
-    unsigned RegionCounter = 0;
-  };
-  llvm::SmallVector GlobalizedRecords;
-  llvm::GlobalVariable *KernelTeamsReductionPtr = nullptr;
-  /// List of the records with the list of fields for the reductions across the
-  /// teams. Used to build the intermediate buffer for the fast teams
-  /// reductions.
-  /// All the records are gathered into a union `union.type` is created.
-  llvm::SmallVector TeamsReductions;
-  /// Shared pointer for the global memory in the global memory buffer used for
-  /// the given kernel.
-  llvm::GlobalVariable *KernelStaticGlobalized = nullptr;
-  /// Pair of the Non-SPMD team and all reductions variables in this team
-  /// region.
-  std::pair>
-      TeamAndReductions;
+  llvm::Value *getGPUWarpSize(CodeGenFunction &CGF);
 };
 
 } // CodeGen namespace.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 7135135d2a410..0ee1133ebaa16 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -4111,29 +4111,34 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
   VarDecl *BPVD = nullptr;
   VarDecl *PVD = nullptr;
   VarDecl *SVD = nullptr;
+  VarDecl *MVD = nullptr;
   if (InputInfo.NumberOfTargetItems > 0) {
     auto *CD = CapturedDecl::Create(
         getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
     llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
-    QualType BaseAndPointersType = getContext().getConstantArrayType(
+    QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
         getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
         /*IndexTypeQuals=*/0);
     BPVD = createImplicitFirstprivateForType(
-        getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
+        getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
     PVD = createImplicitFirstprivateForType(
-        getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
+        getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
     QualType SizesType = getContext().getConstantArrayType(
         getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
         ArrSize, nullptr, ArrayType::Normal,
         /*IndexTypeQuals=*/0);
     SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
                                             S.getBeginLoc());
+    MVD = createImplicitFirstprivateForType(
+        getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
     TargetScope.addPrivate(
         BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
     TargetScope.addPrivate(PVD,
                            [&InputInfo]() { return InputInfo.PointersArray; });
     TargetScope.addPrivate(SVD,
                            [&InputInfo]() { return InputInfo.SizesArray; });
+    TargetScope.addPrivate(MVD,
+                           [&InputInfo]() { return InputInfo.MappersArray; });
   }
   (void)TargetScope.Privatize();
   // Build list of dependences.
@@ -4142,7 +4147,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
         Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
     DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
   }
-  auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
+  auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
                     &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
     // Set proper addresses for generated private copies.
     OMPPrivateScope Scope(CGF);
@@ -4183,6 +4188,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
           CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
       InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
           CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
+      InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
+          CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
     }
 
     Action.Enter(CGF);
@@ -6077,6 +6084,7 @@ void CodeGenFunction::EmitOMPTargetDataDirective(
         (void)PrivateScope.Privatize();
         RCG(CGF);
       } else {
+        OMPLexicalScope Scope(CGF, S, OMPD_unknown);
         RCG(CGF);
       }
     };
diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
index 21e1c0182f84f..46e019be63287 100644
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -27,15 +27,6 @@ set(LLVM_LINK_COMPONENTS
   TransformUtils
   )
 
-# In a standard Clang+LLVM build, we need to generate intrinsics before
-# building codegen. In a standalone build, LLVM is already built and we don't
-# need this dependency. Furthermore, LLVM doesn't export it so we can't have
-# this dependency.
-set(codegen_deps intrinsics_gen)
-if (CLANG_BUILT_STANDALONE)
-  set(codegen_deps)
-endif()
-
 if (MSVC)
   set_source_files_properties(CodeGenModule.cpp PROPERTIES COMPILE_FLAGS /bigobj)
 endif()
@@ -72,6 +63,7 @@ add_clang_library(clangCodeGen
   CGObjCRuntime.cpp
   CGOpenCLRuntime.cpp
   CGOpenMPRuntime.cpp
+  CGOpenMPRuntimeGPU.cpp
   CGOpenMPRuntimeNVPTX.cpp
   CGRecordLayoutBuilder.cpp
   CGStmt.cpp
@@ -100,7 +92,7 @@ add_clang_library(clangCodeGen
   VarBypassDetector.cpp
 
   DEPENDS
-  ${codegen_deps}
+  intrinsics_gen
 
   LINK_LIBS
   clangAnalysis
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index bf6ece0356ad2..854f9e53f281b 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -124,6 +124,7 @@ enum TypeEvaluationKind {
   SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 1)             \
   SANITIZER_CHECK(ImplicitConversion, implicit_conversion, 0)                  \
   SANITIZER_CHECK(InvalidBuiltin, invalid_builtin, 0)                          \
+  SANITIZER_CHECK(InvalidObjCCast, invalid_objc_cast, 0)                       \
   SANITIZER_CHECK(LoadInvalidValue, load_invalid_value, 0)                     \
   SANITIZER_CHECK(MissingReturn, missing_return, 0)                            \
   SANITIZER_CHECK(MulOverflow, mul_overflow, 0)                                \
@@ -264,6 +265,9 @@ class CodeGenFunction : public CodeGenTypeCache {
   CodeGenModule &CGM;  // Per-module state.
   const TargetInfo &Target;
 
+  // For EH/SEH outlined funclets, this field points to parent's CGF
+  CodeGenFunction *ParentCGF = nullptr;
+
   typedef std::pair ComplexPairTy;
   LoopInfoStack LoopStack;
   CGBuilderTy Builder;
@@ -3330,12 +3334,15 @@ class CodeGenFunction : public CodeGenTypeCache {
     Address BasePointersArray = Address::invalid();
     Address PointersArray = Address::invalid();
     Address SizesArray = Address::invalid();
+    Address MappersArray = Address::invalid();
     unsigned NumberOfTargetItems = 0;
     explicit OMPTargetDataInfo() = default;
     OMPTargetDataInfo(Address BasePointersArray, Address PointersArray,
-                      Address SizesArray, unsigned NumberOfTargetItems)
+                      Address SizesArray, Address MappersArray,
+                      unsigned NumberOfTargetItems)
         : BasePointersArray(BasePointersArray), PointersArray(PointersArray),
-          SizesArray(SizesArray), NumberOfTargetItems(NumberOfTargetItems) {}
+          SizesArray(SizesArray), MappersArray(MappersArray),
+          NumberOfTargetItems(NumberOfTargetItems) {}
   };
   void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S,
                                        const RegionCodeGenTy &BodyGen,
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 9cf54a509a2e1..3d633e35fdcd1 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1252,6 +1252,9 @@ void CodeGenModule::AddGlobalCtor(llvm::Function *Ctor, int Priority,
 /// when the module is unloaded.
 void CodeGenModule::AddGlobalDtor(llvm::Function *Dtor, int Priority) {
   if (CodeGenOpts.RegisterGlobalDtorsWithAtExit) {
+    if (getCXXABI().useSinitAndSterm())
+      llvm::report_fatal_error(
+          "register global dtors with atexit() is not supported yet");
     DtorsUsingAtExit[Priority].push_back(Dtor);
     return;
   }
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 547680dfbb55f..b0822b9ef16a9 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -4572,7 +4572,8 @@ void XLCXXABI::emitCXXStermFinalizer(const VarDecl &D, llvm::Function *dtorStub,
   CodeGenFunction CGF(CGM);
 
   CGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, StermFinalizer, FI,
-                    FunctionArgList());
+                    FunctionArgList(), D.getLocation(),
+                    D.getInit()->getExprLoc());
 
   // The unatexit subroutine unregisters __dtor functions that were previously
   // registered by the atexit subroutine. If the referenced function is found,
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 35d14186c866e..56ca80cef827b 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -1157,17 +1157,6 @@ Compilation *Driver::BuildCompilation(ArrayRef ArgList) {
   // FIXME: Handle environment options which affect driver behavior, somewhere
   // (client?). GCC_EXEC_PREFIX, LPATH, CC_PRINT_OPTIONS.
 
-  if (Optional CompilerPathValue =
-          llvm::sys::Process::GetEnv("COMPILER_PATH")) {
-    StringRef CompilerPath = *CompilerPathValue;
-    while (!CompilerPath.empty()) {
-      std::pair Split =
-          CompilerPath.split(llvm::sys::EnvPathSeparator);
-      PrefixDirs.push_back(std::string(Split.first));
-      CompilerPath = Split.second;
-    }
-  }
-
   // We look for the driver mode option early, because the mode can affect
   // how other options are parsed.
   ParseDriverMode(ClangExecutable, ArgList.slice(1));
@@ -1296,6 +1285,16 @@ Compilation *Driver::BuildCompilation(ArrayRef ArgList) {
     A->claim();
     PrefixDirs.push_back(A->getValue(0));
   }
+  if (Optional CompilerPathValue =
+          llvm::sys::Process::GetEnv("COMPILER_PATH")) {
+    StringRef CompilerPath = *CompilerPathValue;
+    while (!CompilerPath.empty()) {
+      std::pair Split =
+          CompilerPath.split(llvm::sys::EnvPathSeparator);
+      PrefixDirs.push_back(std::string(Split.first));
+      CompilerPath = Split.second;
+    }
+  }
   if (const Arg *A = Args.getLastArg(options::OPT__sysroot_EQ))
     SysRoot = A->getValue();
   if (const Arg *A = Args.getLastArg(options::OPT__dyld_prefix_EQ))
@@ -2008,6 +2007,13 @@ bool Driver::HandleImmediateArgs(const Compilation &C) {
   if (C.getArgs().hasArg(options::OPT_print_search_dirs)) {
     llvm::outs() << "programs: =";
     bool separator = false;
+    // Print -B and COMPILER_PATH.
+    for (const std::string &Path : PrefixDirs) {
+      if (separator)
+        llvm::outs() << llvm::sys::EnvPathSeparator;
+      llvm::outs() << Path;
+      separator = true;
+    }
     for (const std::string &Path : TC.getProgramPaths()) {
       if (separator)
         llvm::outs() << llvm::sys::EnvPathSeparator;
@@ -6297,8 +6303,7 @@ void Driver::generatePrefixedToolNames(
     Names.emplace_back((DefaultTargetTriple + "-" + Tool).str());
 }
 
-static bool ScanDirForExecutable(SmallString<128> &Dir,
-                                 const std::string &Name) {
+static bool ScanDirForExecutable(SmallString<128> &Dir, StringRef Name) {
   llvm::sys::path::append(Dir, Name);
   if (llvm::sys::fs::can_execute(Twine(Dir)))
     return true;
@@ -6315,9 +6320,8 @@ std::string Driver::GetProgramPath(StringRef Name, const ToolChain &TC) const {
   for (const auto &PrefixDir : PrefixDirs) {
     if (llvm::sys::fs::is_directory(PrefixDir)) {
       SmallString<128> P(PrefixDir);
-      for (const auto &TargetSpecificExecutable : TargetSpecificExecutables)
-        if (ScanDirForExecutable(P, TargetSpecificExecutable))
-          return std::string(P.str());
+      if (ScanDirForExecutable(P, Name))
+        return std::string(P.str());
     } else {
       SmallString<128> P((PrefixDir + Name).str());
       if (llvm::sys::fs::can_execute(Twine(P)))
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index 0b81152d57f6b..bcc9ffc7ff8f6 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -27,7 +27,8 @@ using namespace llvm::opt;
 static const SanitizerMask NeedsUbsanRt =
     SanitizerKind::Undefined | SanitizerKind::Integer |
     SanitizerKind::ImplicitConversion | SanitizerKind::Nullability |
-    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero;
+    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero |
+    SanitizerKind::ObjCCast;
 static const SanitizerMask NeedsUbsanCxxRt =
     SanitizerKind::Vptr | SanitizerKind::CFI;
 static const SanitizerMask NotAllowedWithTrap = SanitizerKind::Vptr;
@@ -48,11 +49,11 @@ static const SanitizerMask SupportsCoverage =
     SanitizerKind::DataFlow | SanitizerKind::Fuzzer |
     SanitizerKind::FuzzerNoLink | SanitizerKind::FloatDivideByZero |
     SanitizerKind::SafeStack | SanitizerKind::ShadowCallStack |
-    SanitizerKind::Thread;
+    SanitizerKind::Thread | SanitizerKind::ObjCCast;
 static const SanitizerMask RecoverableByDefault =
     SanitizerKind::Undefined | SanitizerKind::Integer |
     SanitizerKind::ImplicitConversion | SanitizerKind::Nullability |
-    SanitizerKind::FloatDivideByZero;
+    SanitizerKind::FloatDivideByZero | SanitizerKind::ObjCCast;
 static const SanitizerMask Unrecoverable =
     SanitizerKind::Unreachable | SanitizerKind::Return;
 static const SanitizerMask AlwaysRecoverable =
@@ -62,7 +63,8 @@ static const SanitizerMask TrappingSupported =
     (SanitizerKind::Undefined & ~SanitizerKind::Vptr) |
     SanitizerKind::UnsignedIntegerOverflow | SanitizerKind::ImplicitConversion |
     SanitizerKind::Nullability | SanitizerKind::LocalBounds |
-    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero;
+    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero |
+    SanitizerKind::ObjCCast;
 static const SanitizerMask TrappingDefault = SanitizerKind::CFI;
 static const SanitizerMask CFIClasses =
     SanitizerKind::CFIVCall | SanitizerKind::CFINVCall |
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 52fa49ca20ffb..30a64b1de34a9 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -689,9 +689,7 @@ bool ToolChain::isThreadModelSupported(const StringRef Model) const {
     return Triple.getArch() == llvm::Triple::arm ||
            Triple.getArch() == llvm::Triple::armeb ||
            Triple.getArch() == llvm::Triple::thumb ||
-           Triple.getArch() == llvm::Triple::thumbeb ||
-           Triple.getArch() == llvm::Triple::wasm32 ||
-           Triple.getArch() == llvm::Triple::wasm64;
+           Triple.getArch() == llvm::Triple::thumbeb || Triple.isWasm();
   } else if (Model == "posix")
     return true;
 
@@ -1057,9 +1055,8 @@ SanitizerMask ToolChain::getSupportedSanitizers() const {
                       SanitizerKind::Nullability | SanitizerKind::LocalBounds;
   if (getTriple().getArch() == llvm::Triple::x86 ||
       getTriple().getArch() == llvm::Triple::x86_64 ||
-      getTriple().getArch() == llvm::Triple::arm ||
-      getTriple().getArch() == llvm::Triple::wasm32 ||
-      getTriple().getArch() == llvm::Triple::wasm64 || getTriple().isAArch64())
+      getTriple().getArch() == llvm::Triple::arm || getTriple().isWasm() ||
+      getTriple().isAArch64())
     Res |= SanitizerKind::CFIICall;
   if (getTriple().getArch() == llvm::Triple::x86_64 || getTriple().isAArch64())
     Res |= SanitizerKind::ShadowCallStack;
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index cfc71d7810b46..bc6d1fcd4a008 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -489,9 +489,9 @@ bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
 /// ROCM Toolchain
 ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
                              const ArgList &Args)
-    : AMDGPUToolChain(D, Triple, Args),
-      RocmInstallation(D, Triple, Args, /*DetectHIPRuntime=*/false,
-                       /*DetectDeviceLib=*/true) {}
+    : AMDGPUToolChain(D, Triple, Args) {
+  RocmInstallation.detectDeviceLibrary();
+}
 
 void AMDGPUToolChain::addClangTargetOptions(
     const llvm::opt::ArgList &DriverArgs,
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h
index 71c66188b0456..5d44faf28b053 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.h
+++ b/clang/lib/Driver/ToolChains/AMDGPU.h
@@ -90,9 +90,6 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
 };
 
 class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {
-protected:
-  RocmInstallationDetector RocmInstallation;
-
 public:
   ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
                 const llvm::opt::ArgList &Args);
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index 4c198a6037a8b..428b72a489041 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -365,6 +365,16 @@ void aarch64::getAArch64TargetFeatures(const Driver &D,
     }
   }
 
+  auto V8_6Pos = llvm::find(Features, "+v8.6a");
+  if (V8_6Pos != std::end(Features))
+    V8_6Pos = Features.insert(std::next(V8_6Pos), {"+i8mm", "+bf16"});
+
+  bool HasSve = llvm::is_contained(Features, "+sve");
+  // -msve_vector_bits= flag is valid only if SVE is enabled.
+  if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ))
+    if (!HasSve)
+      D.Diag(diag::err_drv_invalid_sve_vector_bits);
+
   if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
                                options::OPT_munaligned_access))
     if (A->getOption().matches(options::OPT_mno_unaligned_access))
diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
index 8659ebf17a722..09ae4538b3acc 100644
--- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -89,7 +89,7 @@ static bool getExtensionVersion(const Driver &D, const ArgList &Args,
 
   if (Major.size() && In.consume_front("p")) {
     Minor = std::string(In.take_while(isDigit));
-    In = In.substr(Major.size());
+    In = In.substr(Major.size() + 1);
 
     // Expected 'p' to be followed by minor version number.
     if (Minor.empty()) {
@@ -101,6 +101,16 @@ static bool getExtensionVersion(const Driver &D, const ArgList &Args,
     }
   }
 
+  // Expected multi-character extension with version number to have no
+  // subsequent characters (i.e. must either end string or be followed by
+  // an underscore).
+  if (Ext.size() > 1 && In.size()) {
+    std::string Error =
+        "multi-character extensions must be separated by underscores";
+    D.Diag(diag::err_drv_invalid_riscv_ext_arch_name) << MArch << Error << In;
+    return false;
+  }
+
   // If experimental extension, require use of current version number number
   if (auto ExperimentalExtension = isExperimentalExtension(Ext)) {
     if (!Args.hasArg(options::OPT_menable_experimental_extensions)) {
@@ -436,6 +446,19 @@ static bool getArchFeatures(const Driver &D, StringRef MArch,
   return true;
 }
 
+// Get features except standard extension feature
+static void getRISCFeaturesFromMcpu(const Driver &D, const llvm::Triple &Triple,
+                                    const llvm::opt::ArgList &Args,
+                                    const llvm::opt::Arg *A, StringRef Mcpu,
+                                    std::vector &Features) {
+  bool Is64Bit = (Triple.getArch() == llvm::Triple::riscv64);
+  llvm::RISCV::CPUKind CPUKind = llvm::RISCV::parseCPUKind(Mcpu);
+  if (!llvm::RISCV::checkCPUKind(CPUKind, Is64Bit) ||
+      !llvm::RISCV::getCPUFeaturesExceptStdExt(CPUKind, Features)) {
+    D.Diag(clang::diag::err_drv_clang_unsupported) << A->getAsString(Args);
+  }
+}
+
 void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple,
                                    const ArgList &Args,
                                    std::vector &Features) {
@@ -444,6 +467,11 @@ void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   if (!getArchFeatures(D, MArch, Features, Args))
     return;
 
+  // If users give march and mcpu, get std extension feature from MArch
+  // and other features (ex. mirco architecture feature) from mcpu
+  if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
+    getRISCFeaturesFromMcpu(D, Triple, Args, A, A->getValue(), Features);
+
   // Handle features corresponding to "-ffixed-X" options
   if (Args.hasArg(options::OPT_ffixed_x1))
     Features.push_back("+reserve-x1");
@@ -533,11 +561,9 @@ StringRef riscv::getRISCVABI(const ArgList &Args, const llvm::Triple &Triple) {
 
   // GCC's logic around choosing a default `-mabi=` is complex. If GCC is not
   // configured using `--with-abi=`, then the logic for the default choice is
-  // defined in config.gcc. This function is based on the logic in GCC 9.2.0. We
-  // deviate from GCC's default only on baremetal targets (UnknownOS) where
-  // neither `-march` nor `-mabi` is specified.
+  // defined in config.gcc. This function is based on the logic in GCC 9.2.0.
   //
-  // The logic uses the following, in order:
+  // The logic used in GCC 9.2.0 is the following, in order:
   // 1. Explicit choices using `--with-abi=`
   // 2. A default based on `--with-arch=`, if provided
   // 3. A default based on the target triple's arch
@@ -546,38 +572,40 @@ StringRef riscv::getRISCVABI(const ArgList &Args, const llvm::Triple &Triple) {
   //
   // Clang does not have `--with-arch=` or `--with-abi=`, so we use `-march=`
   // and `-mabi=` respectively instead.
+  //
+  // In order to make chosing logic more clear, Clang uses the following logic,
+  // in order:
+  // 1. Explicit choices using `-mabi=`
+  // 2. A default based on the architecture as determined by getRISCVArch
+  // 3. Choose a default based on the triple
 
   // 1. If `-mabi=` is specified, use it.
   if (const Arg *A = Args.getLastArg(options::OPT_mabi_EQ))
     return A->getValue();
 
-  // 2. Choose a default based on `-march=`
+  // 2. Choose a default based on the target architecture.
   //
   // rv32g | rv32*d -> ilp32d
   // rv32e -> ilp32e
   // rv32* -> ilp32
   // rv64g | rv64*d -> lp64d
   // rv64* -> lp64
-  if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
-    StringRef MArch = A->getValue();
-
-    if (MArch.startswith_lower("rv32")) {
-      // FIXME: parse `March` to find `D` extension properly
-      if (MArch.substr(4).contains_lower("d") ||
-          MArch.startswith_lower("rv32g"))
-        return "ilp32d";
-      else if (MArch.startswith_lower("rv32e"))
-        return "ilp32e";
-      else
-        return "ilp32";
-    } else if (MArch.startswith_lower("rv64")) {
-      // FIXME: parse `March` to find `D` extension properly
-      if (MArch.substr(4).contains_lower("d") ||
-          MArch.startswith_lower("rv64g"))
-        return "lp64d";
-      else
-        return "lp64";
-    }
+  StringRef MArch = getRISCVArch(Args, Triple);
+
+  if (MArch.startswith_lower("rv32")) {
+    // FIXME: parse `March` to find `D` extension properly
+    if (MArch.substr(4).contains_lower("d") || MArch.startswith_lower("rv32g"))
+      return "ilp32d";
+    else if (MArch.startswith_lower("rv32e"))
+      return "ilp32e";
+    else
+      return "ilp32";
+  } else if (MArch.startswith_lower("rv64")) {
+    // FIXME: parse `March` to find `D` extension properly
+    if (MArch.substr(4).contains_lower("d") || MArch.startswith_lower("rv64g"))
+      return "lp64d";
+    else
+      return "lp64";
   }
 
   // 3. Choose a default based on the triple
@@ -607,10 +635,11 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args,
   // GCC's logic around choosing a default `-march=` is complex. If GCC is not
   // configured using `--with-arch=`, then the logic for the default choice is
   // defined in config.gcc. This function is based on the logic in GCC 9.2.0. We
-  // deviate from GCC's default only on baremetal targets (UnknownOS) where
-  // neither `-march` nor `-mabi` is specified.
+  // deviate from GCC's default on additional `-mcpu` option (GCC does not
+  // support `-mcpu`) and baremetal targets (UnknownOS) where neither `-march`
+  // nor `-mabi` is specified.
   //
-  // The logic uses the following, in order:
+  // The logic used in GCC 9.2.0 is the following, in order:
   // 1. Explicit choices using `--with-arch=`
   // 2. A default based on `--with-abi=`, if provided
   // 3. A default based on the target triple's arch
@@ -620,6 +649,12 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args,
   // Clang does not have `--with-arch=` or `--with-abi=`, so we use `-march=`
   // and `-mabi=` respectively instead.
   //
+  // Clang uses the following logic, in order:
+  // 1. Explicit choices using `-march=`
+  // 2. Based on `-mcpu` if the target CPU has a default ISA string
+  // 3. A default based on `-mabi`, if provided
+  // 4. A default based on the target triple's arch
+  //
   // Clang does not yet support MULTILIB_REUSE, so we use `rv{XLEN}imafdc`
   // instead of `rv{XLEN}gc` though they are (currently) equivalent.
 
@@ -627,7 +662,15 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args,
   if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
     return A->getValue();
 
-  // 2. Choose a default based on `-mabi=`
+  // 2. Get march (isa string) based on `-mcpu=`
+  if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
+    StringRef MArch = llvm::RISCV::getMArchFromMcpu(A->getValue());
+    // Bypass if target cpu's default march is empty.
+    if (MArch != "")
+      return MArch;
+  }
+
+  // 3. Choose a default based on `-mabi=`
   //
   // ilp32e -> rv32e
   // ilp32 | ilp32f | ilp32d -> rv32imafdc
@@ -643,7 +686,7 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args,
       return "rv64imafdc";
   }
 
-  // 3. Choose a default based on the triple
+  // 4. Choose a default based on the triple
   //
   // We deviate from GCC's defaults here:
   // - On `riscv{XLEN}-unknown-elf` we default to `rv{XLEN}imac`
diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp
index aa95c4189d1e2..2cc44c09917f5 100644
--- a/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -94,6 +94,7 @@ const char *x86::getX86TargetCPU(const ArgList &Args,
 
   switch (Triple.getOS()) {
   case llvm::Triple::FreeBSD:
+    return "i686";
   case llvm::Triple::NetBSD:
   case llvm::Triple::OpenBSD:
     return "i486";
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index f1cac440453be..af00b1a3b92be 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1758,6 +1758,21 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
     if (IndirectBranches)
       CmdArgs.push_back("-mbranch-target-enforce");
   }
+
+  // Handle -msve_vector_bits=
+  if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ)) {
+    StringRef Val = A->getValue();
+    const Driver &D = getToolChain().getDriver();
+    if (!Val.equals("128") && !Val.equals("256") && !Val.equals("512") &&
+        !Val.equals("1024") && !Val.equals("2048")) {
+      // Handle the unsupported values passed to msve-vector-bits.
+      D.Diag(diag::err_drv_unsupported_option_argument)
+          << A->getOption().getName() << Val;
+    } else if (A->getOption().matches(options::OPT_msve_vector_bits_EQ)) {
+      CmdArgs.push_back(
+          Args.MakeArgString(llvm::Twine("-msve-vector-bits=") + Val));
+    }
+  }
 }
 
 void Clang::AddMIPSTargetArgs(const ArgList &Args,
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 120143113e9f8..1c612d36269d9 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -341,6 +341,11 @@ std::string tools::getCPUName(const ArgList &Args, const llvm::Triple &T,
 
     return TargetCPUName;
   }
+  case llvm::Triple::riscv32:
+  case llvm::Triple::riscv64:
+    if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
+      return A->getValue();
+    return "";
 
   case llvm::Triple::bpfel:
   case llvm::Triple::bpfeb:
diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index 6bf42e6029eb5..f910c88fa9674 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -779,7 +779,7 @@ MachO::MachO(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
 /// Darwin - Darwin tool chain for i386 and x86_64.
 Darwin::Darwin(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     : MachO(D, Triple, Args), TargetInitialized(false),
-      CudaInstallation(D, Triple, Args) {}
+      CudaInstallation(D, Triple, Args), RocmInstallation(D, Triple, Args) {}
 
 types::ID MachO::LookupTypeForExtension(StringRef Ext) const {
   types::ID Ty = ToolChain::LookupTypeForExtension(Ext);
@@ -831,6 +831,11 @@ void Darwin::AddCudaIncludeArgs(const ArgList &DriverArgs,
   CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
 }
 
+void Darwin::AddHIPIncludeArgs(const ArgList &DriverArgs,
+                               ArgStringList &CC1Args) const {
+  RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
+}
+
 // This is just a MachO name translation routine and there's no
 // way to join this into ARMTargetParser without breaking all
 // other assumptions. Maybe MachO should consider standardising
@@ -1191,7 +1196,8 @@ void Darwin::addProfileRTLibs(const ArgList &Args,
   // runtime's functionality.
   if (hasExportSymbolDirective(Args)) {
     if (ForGCOV) {
-      addExportedSymbol(CmdArgs, "___gcov_flush");
+      addExportedSymbol(CmdArgs, "___gcov_dump");
+      addExportedSymbol(CmdArgs, "___gcov_reset");
       addExportedSymbol(CmdArgs, "_flush_fn_list");
       addExportedSymbol(CmdArgs, "_writeout_fn_list");
       addExportedSymbol(CmdArgs, "_reset_fn_list");
@@ -2716,6 +2722,7 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
   Res |= SanitizerKind::Fuzzer;
   Res |= SanitizerKind::FuzzerNoLink;
   Res |= SanitizerKind::Function;
+  Res |= SanitizerKind::ObjCCast;
 
   // Prior to 10.9, macOS shipped a version of the C++ standard library without
   // C++11 support. The same is true of iOS prior to version 5. These OS'es are
@@ -2736,4 +2743,5 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
 
 void Darwin::printVerboseInfo(raw_ostream &OS) const {
   CudaInstallation.print(OS);
+  RocmInstallation.print(OS);
 }
diff --git a/clang/lib/Driver/ToolChains/Darwin.h b/clang/lib/Driver/ToolChains/Darwin.h
index a543a8fc27b9d..64c252efea7df 100644
--- a/clang/lib/Driver/ToolChains/Darwin.h
+++ b/clang/lib/Driver/ToolChains/Darwin.h
@@ -10,6 +10,7 @@
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_DARWIN_H
 
 #include "Cuda.h"
+#include "ROCm.h"
 #include "clang/Driver/DarwinSDKInfo.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
@@ -293,6 +294,7 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public MachO {
   mutable Optional SDKInfo;
 
   CudaInstallationDetector CudaInstallation;
+  RocmInstallationDetector RocmInstallation;
 
 private:
   void AddDeploymentTarget(llvm::opt::DerivedArgList &Args) const;
@@ -475,6 +477,8 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public MachO {
 
   void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                           llvm::opt::ArgStringList &CC1Args) const override;
+  void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                         llvm::opt::ArgStringList &CC1Args) const override;
 
   bool UseObjCMixedDispatch() const override {
     // This is only used with the non-fragile ABI and non-legacy dispatch.
diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp
index 14cf278c19d9e..909ac5e992129 100644
--- a/clang/lib/Driver/ToolChains/FreeBSD.cpp
+++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp
@@ -425,6 +425,11 @@ void FreeBSD::AddCudaIncludeArgs(const ArgList &DriverArgs,
   CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
 }
 
+void FreeBSD::AddHIPIncludeArgs(const ArgList &DriverArgs,
+                                ArgStringList &CC1Args) const {
+  RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
+}
+
 Tool *FreeBSD::buildAssembler() const {
   return new tools::freebsd::Assembler(*this);
 }
diff --git a/clang/lib/Driver/ToolChains/FreeBSD.h b/clang/lib/Driver/ToolChains/FreeBSD.h
index bca3f6b741b6a..abc0876cef260 100644
--- a/clang/lib/Driver/ToolChains/FreeBSD.h
+++ b/clang/lib/Driver/ToolChains/FreeBSD.h
@@ -68,6 +68,8 @@ class LLVM_LIBRARY_VISIBILITY FreeBSD : public Generic_ELF {
                            llvm::opt::ArgStringList &CmdArgs) const override;
   void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                           llvm::opt::ArgStringList &CC1Args) const override;
+  void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                         llvm::opt::ArgStringList &CC1Args) const override;
 
   llvm::ExceptionHandling
   GetExceptionModel(const llvm::opt::ArgList &Args) const override;
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index e34bddc4c76fd..e476f296b0caa 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -1620,15 +1620,21 @@ static bool findMSP430Multilibs(const Driver &D,
                                 StringRef Path, const ArgList &Args,
                                 DetectedMultilibs &Result) {
   FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());
-  Multilib MSP430Multilib = makeMultilib("/430");
+  Multilib WithoutExceptions = makeMultilib("/430").flag("-exceptions");
+  Multilib WithExceptions = makeMultilib("/430/exceptions").flag("+exceptions");
+
   // FIXME: when clang starts to support msp430x ISA additional logic
   // to select between multilib must be implemented
   // Multilib MSP430xMultilib = makeMultilib("/large");
 
-  Result.Multilibs.push_back(MSP430Multilib);
+  Result.Multilibs.push_back(WithoutExceptions);
+  Result.Multilibs.push_back(WithExceptions);
   Result.Multilibs.FilterOut(NonExistent);
 
   Multilib::flags_list Flags;
+  addMultilibFlag(Args.hasFlag(options::OPT_fexceptions,
+                               options::OPT_fno_exceptions, false),
+                  "exceptions", Flags);
   if (Result.Multilibs.select(Flags, Result.SelectedMultilib))
     return true;
 
diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp
index 32734f5c11809..7d17f809690ea 100644
--- a/clang/lib/Driver/ToolChains/HIP.cpp
+++ b/clang/lib/Driver/ToolChains/HIP.cpp
@@ -224,7 +224,6 @@ HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
   // Lookup binaries into the driver directory, this is used to
   // discover the clang-offload-bundler executable.
   getProgramPaths().push_back(getDriver().Dir);
-  RocmInstallation.detectHIPRuntime();
 }
 
 void HIPToolChain::addClangTargetOptions(
diff --git a/clang/lib/Driver/ToolChains/MSP430.cpp b/clang/lib/Driver/ToolChains/MSP430.cpp
index b0bc2e014b487..6d663e4909e59 100644
--- a/clang/lib/Driver/ToolChains/MSP430.cpp
+++ b/clang/lib/Driver/ToolChains/MSP430.cpp
@@ -128,7 +128,7 @@ MSP430ToolChain::MSP430ToolChain(const Driver &D, const llvm::Triple &Triple,
   }
 
   SmallString<128> SysRootDir(computeSysRoot());
-  llvm::sys::path::append(SysRootDir, "lib", MultilibSuf);
+  llvm::sys::path::append(SysRootDir, "msp430-elf", "lib", MultilibSuf);
   addPathIfExists(D, SysRootDir, getFilePaths());
 }
 
@@ -138,10 +138,9 @@ std::string MSP430ToolChain::computeSysRoot() const {
 
   SmallString<128> Dir;
   if (GCCInstallation.isValid())
-    llvm::sys::path::append(Dir, GCCInstallation.getParentLibPath(), "..",
-                            GCCInstallation.getTriple().str());
+    llvm::sys::path::append(Dir, GCCInstallation.getParentLibPath(), "..");
   else
-    llvm::sys::path::append(Dir, getDriver().Dir, "..", getTriple().str());
+    llvm::sys::path::append(Dir, getDriver().Dir, "..");
 
   return std::string(Dir.str());
 }
@@ -153,7 +152,7 @@ void MSP430ToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
     return;
 
   SmallString<128> Dir(computeSysRoot());
-  llvm::sys::path::append(Dir, "include");
+  llvm::sys::path::append(Dir, "msp430-elf", "include");
   addSystemInclude(DriverArgs, CC1Args, Dir.str());
 }
 
@@ -180,6 +179,87 @@ Tool *MSP430ToolChain::buildLinker() const {
   return new tools::msp430::Linker(*this);
 }
 
+void msp430::Linker::AddStartFiles(bool UseExceptions, const ArgList &Args,
+                                   ArgStringList &CmdArgs) const {
+  const ToolChain &ToolChain = getToolChain();
+
+  CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o")));
+  const char *crtbegin = UseExceptions ? "crtbegin.o" : "crtbegin_no_eh.o";
+  CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
+}
+
+void msp430::Linker::AddDefaultLibs(const llvm::opt::ArgList &Args,
+                                    llvm::opt::ArgStringList &CmdArgs) const {
+  const ToolChain &ToolChain = getToolChain();
+  const Driver &D = ToolChain.getDriver();
+
+  CmdArgs.push_back("--start-group");
+  CmdArgs.push_back(Args.MakeArgString(getHWMultLib(Args)));
+  CmdArgs.push_back("-lc");
+  AddRunTimeLibs(ToolChain, D, CmdArgs, Args);
+  CmdArgs.push_back("-lcrt");
+
+  if (Args.hasArg(options::OPT_msim)) {
+    CmdArgs.push_back("-lsim");
+
+    // msp430-sim.ld relies on __crt0_call_exit being implicitly .refsym-ed
+    // in main() by msp430-gcc.
+    // This workaround should work seamlessly unless the compilation unit that
+    // contains main() is compiled by clang and then passed to
+    // gcc compiler driver for linkage.
+    CmdArgs.push_back("--undefined=__crt0_call_exit");
+  } else
+    CmdArgs.push_back("-lnosys");
+
+  CmdArgs.push_back("--end-group");
+  AddRunTimeLibs(ToolChain, D, CmdArgs, Args);
+}
+
+void msp430::Linker::AddEndFiles(bool UseExceptions, const ArgList &Args,
+                                 ArgStringList &CmdArgs) const {
+  const ToolChain &ToolChain = getToolChain();
+  const Driver &D = ToolChain.getDriver();
+
+  const char *crtend = UseExceptions ? "crtend.o" : "crtend_no_eh.o";
+  CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend)));
+  AddRunTimeLibs(ToolChain, D, CmdArgs, Args);
+}
+
+static void AddSspArgs(const ArgList &Args, ArgStringList &CmdArgs) {
+  Arg *SspFlag = Args.getLastArg(
+      options::OPT_fno_stack_protector, options::OPT_fstack_protector,
+      options::OPT_fstack_protector_all, options::OPT_fstack_protector_strong);
+
+  if (SspFlag &&
+      !SspFlag->getOption().matches(options::OPT_fno_stack_protector)) {
+    CmdArgs.push_back("-lssp_nonshared");
+    CmdArgs.push_back("-lssp");
+  }
+}
+
+static void AddImplicitLinkerScript(const std::string SysRoot,
+                                    const ArgList &Args,
+                                    ArgStringList &CmdArgs) {
+  if (Args.hasArg(options::OPT_T))
+    return;
+
+  if (Args.hasArg(options::OPT_msim)) {
+    CmdArgs.push_back("-Tmsp430-sim.ld");
+    return;
+  }
+
+  const Arg *MCUArg = Args.getLastArg(options::OPT_mmcu_EQ);
+  if (!MCUArg)
+    return;
+
+  SmallString<128> MCULinkerScriptPath(SysRoot);
+  llvm::sys::path::append(MCULinkerScriptPath, "include");
+  // -L because .ld INCLUDEs _symbols.ld
+  CmdArgs.push_back(Args.MakeArgString("-L" + MCULinkerScriptPath));
+  CmdArgs.push_back(
+      Args.MakeArgString("-T" + StringRef(MCUArg->getValue()) + ".ld"));
+}
+
 void msp430::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                   const InputInfo &Output,
                                   const InputInfoList &Inputs,
@@ -189,44 +269,49 @@ void msp430::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   const Driver &D = ToolChain.getDriver();
   std::string Linker = ToolChain.GetProgramPath(getShortName());
   ArgStringList CmdArgs;
-
-  if (!D.SysRoot.empty())
-    CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
+  bool UseExceptions = Args.hasFlag(options::OPT_fexceptions,
+                                    options::OPT_fno_exceptions, false);
+  bool UseStartAndEndFiles = !Args.hasArg(options::OPT_nostdlib, options::OPT_r,
+                                          options::OPT_nostartfiles);
+
+  if (Args.hasArg(options::OPT_mrelax))
+    CmdArgs.push_back("--relax");
+  if (!Args.hasArg(options::OPT_r, options::OPT_g_Group))
+    CmdArgs.push_back("--gc-sections");
+
+  Args.AddAllArgs(CmdArgs, {
+                               options::OPT_e,
+                               options::OPT_n,
+                               options::OPT_s,
+                               options::OPT_t,
+                               options::OPT_u,
+                           });
+
+  if (UseStartAndEndFiles)
+    AddStartFiles(UseExceptions, Args, CmdArgs);
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   ToolChain.AddFilePathLibArgs(Args, CmdArgs);
-
-  if (!Args.hasArg(options::OPT_T)) {
-    if (const Arg *MCUArg = Args.getLastArg(options::OPT_mmcu_EQ))
-      CmdArgs.push_back(
-          Args.MakeArgString("-T" + StringRef(MCUArg->getValue()) + ".ld"));
-  } else {
-    Args.AddAllArgs(CmdArgs, options::OPT_T);
-  }
-
-  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
-    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o")));
-    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtbegin.o")));
-  }
-
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
 
-  CmdArgs.push_back("--start-group");
-  CmdArgs.push_back(Args.MakeArgString(getHWMultLib(Args)));
-  CmdArgs.push_back("-lgcc");
-  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
-    CmdArgs.push_back("-lc");
-    CmdArgs.push_back("-lcrt");
-    CmdArgs.push_back("-lnosys");
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_r,
+                   options::OPT_nodefaultlibs)) {
+    AddSspArgs(Args, CmdArgs);
+    AddRunTimeLibs(ToolChain, D, CmdArgs, Args);
+    if (!Args.hasArg(options::OPT_nolibc)) {
+      AddDefaultLibs(Args, CmdArgs);
+      AddImplicitLinkerScript(D.SysRoot, Args, CmdArgs);
+    }
   }
-  CmdArgs.push_back("--end-group");
 
-  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
-    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o")));
-    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
-  }
+  if (UseStartAndEndFiles)
+    AddEndFiles(UseExceptions, Args, CmdArgs);
+
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
+
+  Args.AddAllArgs(CmdArgs, options::OPT_T);
+
   C.addCommand(
       std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(),
                                 Args.MakeArgString(Linker), CmdArgs, Inputs));
diff --git a/clang/lib/Driver/ToolChains/MSP430.h b/clang/lib/Driver/ToolChains/MSP430.h
index 58fd158cd12f9..3789e7442a238 100644
--- a/clang/lib/Driver/ToolChains/MSP430.h
+++ b/clang/lib/Driver/ToolChains/MSP430.h
@@ -40,6 +40,11 @@ class LLVM_LIBRARY_VISIBILITY MSP430ToolChain : public Generic_ELF {
   bool isPIEDefault() const override { return false; }
   bool isPICDefaultForced() const override { return true; }
 
+  UnwindLibType
+  GetUnwindLibType(const llvm::opt::ArgList &Args) const override {
+    return UNW_None;
+  }
+
 protected:
   Tool *buildLinker() const override;
 
@@ -61,6 +66,14 @@ class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
                     const InputInfo &Output, const InputInfoList &Inputs,
                     const llvm::opt::ArgList &TCArgs,
                     const char *LinkingOutput) const override;
+
+private:
+  void AddStartFiles(bool UseExceptions, const llvm::opt::ArgList &Args,
+                     llvm::opt::ArgStringList &CmdArgs) const;
+  void AddDefaultLibs(const llvm::opt::ArgList &Args,
+                      llvm::opt::ArgStringList &CmdArgs) const;
+  void AddEndFiles(bool UseExceptions, const llvm::opt::ArgList &Args,
+                   llvm::opt::ArgStringList &CmdArgs) const;
 };
 
 void getMSP430TargetFeatures(const Driver &D, const llvm::opt::ArgList &Args,
diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp
index b233e210d889e..a1a1b413fb6c6 100644
--- a/clang/lib/Driver/ToolChains/MinGW.cpp
+++ b/clang/lib/Driver/ToolChains/MinGW.cpp
@@ -398,7 +398,8 @@ llvm::ErrorOr toolchains::MinGW::findClangRelativeSysroot() {
 
 toolchains::MinGW::MinGW(const Driver &D, const llvm::Triple &Triple,
                          const ArgList &Args)
-    : ToolChain(D, Triple, Args), CudaInstallation(D, Triple, Args) {
+    : ToolChain(D, Triple, Args), CudaInstallation(D, Triple, Args),
+      RocmInstallation(D, Triple, Args) {
   getProgramPaths().push_back(getDriver().getInstalledDir());
 
   if (getDriver().SysRoot.size())
@@ -500,8 +501,14 @@ void toolchains::MinGW::AddCudaIncludeArgs(const ArgList &DriverArgs,
   CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
 }
 
+void toolchains::MinGW::AddHIPIncludeArgs(const ArgList &DriverArgs,
+                                          ArgStringList &CC1Args) const {
+  RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
+}
+
 void toolchains::MinGW::printVerboseInfo(raw_ostream &OS) const {
   CudaInstallation.print(OS);
+  RocmInstallation.print(OS);
 }
 
 // Include directories for various hosts:
diff --git a/clang/lib/Driver/ToolChains/MinGW.h b/clang/lib/Driver/ToolChains/MinGW.h
index 46264a55cfc7b..2f1559fcf34cd 100644
--- a/clang/lib/Driver/ToolChains/MinGW.h
+++ b/clang/lib/Driver/ToolChains/MinGW.h
@@ -11,6 +11,7 @@
 
 #include "Cuda.h"
 #include "Gnu.h"
+#include "ROCm.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
 #include "llvm/Support/ErrorOr.h"
@@ -81,6 +82,8 @@ class LLVM_LIBRARY_VISIBILITY MinGW : public ToolChain {
 
   void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                           llvm::opt::ArgStringList &CC1Args) const override;
+  void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                         llvm::opt::ArgStringList &CC1Args) const override;
 
   void printVerboseInfo(raw_ostream &OS) const override;
 
@@ -91,6 +94,7 @@ class LLVM_LIBRARY_VISIBILITY MinGW : public ToolChain {
 
 private:
   CudaInstallationDetector CudaInstallation;
+  RocmInstallationDetector RocmInstallation;
 
   std::string Base;
   std::string GccLibDir;
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index a74015d3b4dc3..7f8e351265127 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -2844,6 +2844,11 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
             Left.Previous &&
             !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon,
                                     tok::l_square));
+  // Ensure right pointer alignement with ellipsis e.g. int *...P
+  if (Left.is(tok::ellipsis) && Left.Previous &&
+      Left.Previous->isOneOf(tok::star, tok::amp, tok::ampamp))
+    return Style.PointerAlignment != FormatStyle::PAS_Right;
+
   if (Right.is(tok::star) && Left.is(tok::l_paren))
     return false;
   if (Left.is(tok::star) && Right.isOneOf(tok::star, tok::amp, tok::ampamp))
diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt
index 0e23b92e2dea9..af5446618b036 100644
--- a/clang/lib/Frontend/CMakeLists.txt
+++ b/clang/lib/Frontend/CMakeLists.txt
@@ -8,11 +8,6 @@ set(LLVM_LINK_COMPONENTS
   Support
   )
 
-set(optional_deps intrinsics_gen)
-if (CLANG_BUILT_STANDALONE)
-  set(optional_deps)
-endif()
-
 add_clang_library(clangFrontend
   ASTConsumers.cpp
   ASTMerge.cpp
@@ -49,7 +44,7 @@ add_clang_library(clangFrontend
 
   DEPENDS
   ClangDriverOptions
-  ${optional_deps}
+  intrinsics_gen
 
   LINK_LIBS
   clangAST
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 2f3a894981c99..0a04ab5c940c9 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1090,8 +1090,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
           A->getOption().getID() == options::OPT_INPUT ||
           A->getOption().getID() == options::OPT_x ||
           A->getOption().getID() == options::OPT_fembed_bitcode ||
-          (A->getOption().getGroup().isValid() &&
-           A->getOption().getGroup().getID() == options::OPT_W_Group))
+          A->getOption().matches(options::OPT_W_Group))
         continue;
       ArgStringList ASL;
       A->render(Args, ASL);
@@ -3016,6 +3015,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.GNUAsm = !Args.hasArg(OPT_fno_gnu_inline_asm);
   Opts.Cmse = Args.hasArg(OPT_mcmse); // Armv8-M Security Extensions
 
+  Opts.ArmSveVectorBits =
+      getLastArgIntValue(Args, options::OPT_msve_vector_bits_EQ, 0, Diags);
+
   // __declspec is enabled by default for the PS4 by the driver, and also
   // enabled for Microsoft Extensions or Borland Extensions, here.
   //
diff --git a/clang/lib/Headers/__clang_cuda_complex_builtins.h b/clang/lib/Headers/__clang_cuda_complex_builtins.h
index c48c754ed1a4b..8c10ff6b461fd 100644
--- a/clang/lib/Headers/__clang_cuda_complex_builtins.h
+++ b/clang/lib/Headers/__clang_cuda_complex_builtins.h
@@ -23,20 +23,16 @@
 #define __DEVICE__ __device__ inline
 #endif
 
-// Make the algorithms available for C and C++ by selecting the right functions.
-#if defined(__cplusplus)
-// TODO: In OpenMP mode we cannot overload isinf/isnan/isfinite the way we
-// overload all other math functions because old math system headers and not
-// always conformant and return an integer instead of a boolean. Until that has
-// been addressed we need to work around it. For now, we substituate with the
-// calls we would have used to implement those three functions. Note that we
-// could use the C alternatives as well.
-#define _ISNANd ::__isnan
-#define _ISNANf ::__isnanf
-#define _ISINFd ::__isinf
-#define _ISINFf ::__isinff
-#define _ISFINITEd ::__isfinited
-#define _ISFINITEf ::__finitef
+// To make the algorithms available for C and C++ in CUDA and OpenMP we select
+// different but equivalent function versions. TODO: For OpenMP we currently
+// select the native builtins as the overload support for templates is lacking.
+#if !defined(_OPENMP)
+#define _ISNANd std::isnan
+#define _ISNANf std::isnan
+#define _ISINFd std::isinf
+#define _ISINFf std::isinf
+#define _ISFINITEd std::isfinite
+#define _ISFINITEf std::isfinite
 #define _COPYSIGNd std::copysign
 #define _COPYSIGNf std::copysign
 #define _SCALBNd std::scalbn
@@ -46,20 +42,20 @@
 #define _LOGBd std::logb
 #define _LOGBf std::logb
 #else
-#define _ISNANd isnan
-#define _ISNANf isnanf
-#define _ISINFd isinf
-#define _ISINFf isinff
-#define _ISFINITEd isfinite
-#define _ISFINITEf isfinitef
-#define _COPYSIGNd copysign
-#define _COPYSIGNf copysignf
-#define _SCALBNd scalbn
-#define _SCALBNf scalbnf
-#define _ABSd abs
-#define _ABSf absf
-#define _LOGBd logb
-#define _LOGBf logbf
+#define _ISNANd __nv_isnand
+#define _ISNANf __nv_isnanf
+#define _ISINFd __nv_isinfd
+#define _ISINFf __nv_isinff
+#define _ISFINITEd __nv_isfinited
+#define _ISFINITEf __nv_finitef
+#define _COPYSIGNd __nv_copysign
+#define _COPYSIGNf __nv_copysignf
+#define _SCALBNd __nv_scalbn
+#define _SCALBNf __nv_scalbnf
+#define _ABSd __nv_fabs
+#define _ABSf __nv_fabsf
+#define _LOGBd __nv_logb
+#define _LOGBf __nv_logbf
 #endif
 
 #if defined(__cplusplus)
diff --git a/clang/lib/Headers/__clang_cuda_math.h b/clang/lib/Headers/__clang_cuda_math.h
index 2e8e6ae71d9cf..332e616702acf 100644
--- a/clang/lib/Headers/__clang_cuda_math.h
+++ b/clang/lib/Headers/__clang_cuda_math.h
@@ -340,16 +340,6 @@ __DEVICE__ float y1f(float __a) { return __nv_y1f(__a); }
 __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }
 __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
 
-// In C++ mode OpenMP takes the system versions of these because some math
-// headers provide the wrong return type. This cannot happen in C and we can and
-// want to use the specialized versions right away.
-#if defined(_OPENMP) && !defined(__cplusplus)
-__DEVICE__ int isinff(float __x) { return __nv_isinff(__x); }
-__DEVICE__ int isinf(double __x) { return __nv_isinfd(__x); }
-__DEVICE__ int isnanf(float __x) { return __nv_isnanf(__x); }
-__DEVICE__ int isnan(double __x) { return __nv_isnand(__x); }
-#endif
-
 #pragma pop_macro("__DEVICE__")
 #pragma pop_macro("__DEVICE_VOID__")
 #pragma pop_macro("__FAST_OR_SLOW")
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 9a4009216930a..ac5f43836316e 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -17146,6 +17146,20 @@ vec_splati_ins(vector float __a, const unsigned int __b, const float __c) {
 #endif
   return __a;
 }
+
+/* vec_test_lsbb_all_ones */
+
+static __inline__ int __ATTRS_o_ai
+vec_test_lsbb_all_ones(vector unsigned char __a) {
+  return __builtin_vsx_xvtlsbb(__a, 1);
+}
+
+/* vec_test_lsbb_all_zeros */
+
+static __inline__ int __ATTRS_o_ai
+vec_test_lsbb_all_zeros(vector unsigned char __a) {
+  return __builtin_vsx_xvtlsbb(__a, 0);
+}
 #endif /* __VSX__ */
 #endif /* __POWER10_VECTOR__ */
 
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index aa35200c33b66..b225bb7c8b36c 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -926,6 +926,15 @@ bool Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro,
     } else if (Tok.is(tok::kw_this)) {
       Kind = LCK_This;
       Loc = ConsumeToken();
+    } else if (Tok.isOneOf(tok::amp, tok::equal) &&
+               NextToken().isOneOf(tok::comma, tok::r_square) &&
+               Intro.Default == LCD_None) {
+      // We have a lone "&" or "=" which is either a misplaced capture-default
+      // or the start of a capture (in the "&" case) with the rest of the
+      // capture missing. Both are an error but a misplaced capture-default
+      // is more likely if we don't already have a capture default.
+      return Invalid(
+          [&] { Diag(Tok.getLocation(), diag::err_capture_default_first); });
     } else {
       TryConsumeToken(tok::ellipsis, EllipsisLocs[0]);
 
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index afcef30438434..5223755c8fdf1 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -1441,7 +1441,7 @@ bool Parser::parseOMPDeclareVariantMatchClause(SourceLocation Loc,
 /// Parsing of simple OpenMP clauses like 'default' or 'proc_bind'.
 ///
 ///    default-clause:
-///         'default' '(' 'none' | 'shared' ')
+///         'default' '(' 'none' | 'shared'  | 'firstprivate' ')
 ///
 ///    proc_bind-clause:
 ///         'proc_bind' '(' 'master' | 'close' | 'spread' ')
@@ -2772,7 +2772,7 @@ OMPClause *Parser::ParseOpenMPSingleExprClause(OpenMPClauseKind Kind,
 /// Parsing of simple OpenMP clauses like 'default' or 'proc_bind'.
 ///
 ///    default-clause:
-///         'default' '(' 'none' | 'shared' ')'
+///         'default' '(' 'none' | 'shared' | 'firstprivate' ')'
 ///
 ///    proc_bind-clause:
 ///         'proc_bind' '(' 'master' | 'close' | 'spread' ')'
@@ -2785,6 +2785,14 @@ OMPClause *Parser::ParseOpenMPSimpleClause(OpenMPClauseKind Kind,
   llvm::Optional Val = parseOpenMPSimpleClause(*this, Kind);
   if (!Val || ParseOnly)
     return nullptr;
+  if (getLangOpts().OpenMP < 51 && Kind == OMPC_default &&
+      static_cast(Val.getValue().Type) ==
+          OMP_DEFAULT_firstprivate) {
+    Diag(Val.getValue().LOpen, diag::err_omp_invalid_dsa)
+        << getOpenMPClauseName(OMPC_firstprivate)
+        << getOpenMPClauseName(OMPC_default) << "5.1";
+    return nullptr;
+  }
   return Actions.ActOnOpenMPSimpleClause(
       Kind, Val.getValue().Type, Val.getValue().TypeLoc, Val.getValue().LOpen,
       Val.getValue().Loc, Val.getValue().RLoc);
diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp
index 948162c11b3ff..f026f3a1bfb29 100644
--- a/clang/lib/Parse/ParseTentative.cpp
+++ b/clang/lib/Parse/ParseTentative.cpp
@@ -1110,8 +1110,9 @@ class TentativeParseCCC final : public CorrectionCandidateCallback {
 public:
   TentativeParseCCC(const Token &Next) {
     WantRemainingKeywords = false;
-    WantTypeSpecifiers = Next.isOneOf(tok::l_paren, tok::r_paren, tok::greater,
-                                      tok::l_brace, tok::identifier);
+    WantTypeSpecifiers =
+        Next.isOneOf(tok::l_paren, tok::r_paren, tok::greater, tok::l_brace,
+                     tok::identifier, tok::comma);
   }
 
   bool ValidateCandidate(const TypoCorrection &Candidate) override {
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 283a04683a32a..6203edea7112f 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -715,9 +715,8 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
                                       CallerKnownEmitted] {
     switch (IdentifyCUDAPreference(Caller, Callee)) {
     case CFP_Never:
-      return DeviceDiagBuilder::K_Immediate;
     case CFP_WrongSide:
-      assert(Caller && "WrongSide calls require a non-null caller");
+      assert(Caller && "Never/wrongSide calls require a non-null caller");
       // If we know the caller will be emitted, we know this wrong-side call
       // will be emitted, so it's an immediate error.  Otherwise, defer the
       // error until we know the caller is emitted.
@@ -740,9 +739,10 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
 
   DeviceDiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this)
       << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller);
-  DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl,
-                    Caller, *this)
-      << Callee;
+  if (!Callee->getBuiltinID())
+    DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl,
+                      Caller, *this)
+        << Callee;
   return DiagKind != DeviceDiagBuilder::K_Immediate &&
          DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack;
 }
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 1a3f94ba96557..b1f258fcfff17 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5485,6 +5485,15 @@ Sema::SemaBuiltinAtomicOverloaded(ExprResult TheCallResult) {
   // gracefully.
   TheCall->setType(ResultType);
 
+  // Prohibit use of _ExtInt with atomic builtins.
+  // The arguments would have already been converted to the first argument's
+  // type, so only need to check the first argument.
+  const auto *ExtIntValType = ValType->getAs();
+  if (ExtIntValType && !llvm::isPowerOf2_64(ExtIntValType->getNumBits())) {
+    Diag(FirstArg->getExprLoc(), diag::err_atomic_builtin_ext_int_size);
+    return ExprError();
+  }
+
   return TheCallResult;
 }
 
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 0b7339e61b0bb..03eae7bb0a740 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -6290,6 +6290,8 @@ bool Sema::inferObjCARCLifetime(ValueDecl *decl) {
 void Sema::deduceOpenCLAddressSpace(ValueDecl *Decl) {
   if (Decl->getType().hasAddressSpace())
     return;
+  if (Decl->getType()->isDependentType())
+    return;
   if (VarDecl *Var = dyn_cast(Decl)) {
     QualType Type = Var->getType();
     if (Type->isSamplerT() || Type->isVoidType())
@@ -7865,6 +7867,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
     if (NewVD->isFileVarDecl() || NewVD->isStaticLocal() ||
         NewVD->hasExternalStorage()) {
       if (!T->isSamplerT() &&
+          !T->isDependentType() &&
           !(T.getAddressSpace() == LangAS::opencl_constant ||
             (T.getAddressSpace() == LangAS::opencl_global &&
              (getLangOpts().OpenCLVersion == 200 ||
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 9cad6debc600c..22bf35dbd0cb0 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -1066,7 +1066,7 @@ static IsTupleLike isTupleLike(Sema &S, SourceLocation Loc, QualType T,
     TemplateArgumentListInfo &Args;
     ICEDiagnoser(LookupResult &R, TemplateArgumentListInfo &Args)
         : R(R), Args(Args) {}
-    void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) {
+    void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) override {
       S.Diag(Loc, diag::err_decomp_decl_std_tuple_size_not_constant)
           << printTemplateArgs(S.Context.getPrintingPolicy(), Args);
     }
@@ -3045,7 +3045,7 @@ void Sema::CheckOverrideControl(NamedDecl *D) {
       << MD->getDeclName();
 }
 
-void Sema::DiagnoseAbsenceOfOverrideControl(NamedDecl *D) {
+void Sema::DiagnoseAbsenceOfOverrideControl(NamedDecl *D, bool Inconsistent) {
   if (D->isInvalidDecl() || D->hasAttr())
     return;
   CXXMethodDecl *MD = dyn_cast(D);
@@ -3061,12 +3061,22 @@ void Sema::DiagnoseAbsenceOfOverrideControl(NamedDecl *D) {
       return;
 
   if (MD->size_overridden_methods() > 0) {
-    unsigned DiagID = isa(MD)
-                          ? diag::warn_destructor_marked_not_override_overriding
-                          : diag::warn_function_marked_not_override_overriding;
-    Diag(MD->getLocation(), DiagID) << MD->getDeclName();
-    const CXXMethodDecl *OMD = *MD->begin_overridden_methods();
-    Diag(OMD->getLocation(), diag::note_overridden_virtual_function);
+    auto EmitDiag = [&](unsigned DiagInconsistent, unsigned DiagSuggest) {
+      unsigned DiagID =
+          Inconsistent && !Diags.isIgnored(DiagInconsistent, MD->getLocation())
+              ? DiagInconsistent
+              : DiagSuggest;
+      Diag(MD->getLocation(), DiagID) << MD->getDeclName();
+      const CXXMethodDecl *OMD = *MD->begin_overridden_methods();
+      Diag(OMD->getLocation(), diag::note_overridden_virtual_function);
+    };
+    if (isa(MD))
+      EmitDiag(
+          diag::warn_inconsistent_destructor_marked_not_override_overriding,
+          diag::warn_suggest_destructor_marked_not_override_overriding);
+    else
+      EmitDiag(diag::warn_inconsistent_function_marked_not_override_overriding,
+               diag::warn_suggest_function_marked_not_override_overriding);
   }
 }
 
@@ -6749,13 +6759,10 @@ void Sema::CheckCompletedCXXClass(Scope *S, CXXRecordDecl *Record) {
     }
   }
 
-  if (HasMethodWithOverrideControl &&
-      HasOverridingMethodWithoutOverrideControl) {
-    // At least one method has the 'override' control declared.
-    // Diagnose all other overridden methods which do not have 'override'
-    // specified on them.
+  if (HasOverridingMethodWithoutOverrideControl) {
+    bool HasInconsistentOverrideControl = HasMethodWithOverrideControl;
     for (auto *M : Record->methods())
-      DiagnoseAbsenceOfOverrideControl(M);
+      DiagnoseAbsenceOfOverrideControl(M, HasInconsistentOverrideControl);
   }
 
   // Check the defaulted secondary comparisons after any other member functions.
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 550c8e0e11a03..5db96698c2a06 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -15568,7 +15568,6 @@ bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy,
   bool CheckInferredResultType = false;
   bool isInvalid = false;
   unsigned DiagKind = 0;
-  FixItHint Hint;
   ConversionFixItGenerator ConvHints;
   bool MayHaveConvFixit = false;
   bool MayHaveFunctionDiff = false;
@@ -15621,10 +15620,9 @@ bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy,
     }
     CheckInferredResultType = DstType->isObjCObjectPointerType() &&
       SrcType->isObjCObjectPointerType();
-    if (Hint.isNull() && !CheckInferredResultType) {
+    if (!CheckInferredResultType) {
       ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this);
-    }
-    else if (CheckInferredResultType) {
+    } else if (CheckInferredResultType) {
       SrcType = SrcType.getUnqualifiedType();
       DstType = DstType.getUnqualifiedType();
     }
@@ -15793,13 +15791,11 @@ bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy,
     FDiag << FirstType << SecondType << Action << SrcExpr->getSourceRange();
 
   // If we can fix the conversion, suggest the FixIts.
-  assert(ConvHints.isNull() || Hint.isNull());
   if (!ConvHints.isNull()) {
     for (FixItHint &H : ConvHints.Hints)
       FDiag << H;
-  } else {
-    FDiag << Hint;
   }
+
   if (MayHaveConvFixit) { FDiag << (unsigned) (ConvHints.Kind); }
 
   if (MayHaveFunctionDiff)
@@ -19212,9 +19208,6 @@ ExprResult Sema::ActOnObjCAvailabilityCheckExpr(
 
 ExprResult Sema::CreateRecoveryExpr(SourceLocation Begin, SourceLocation End,
                                     ArrayRef SubExprs, QualType T) {
-  // FIXME: enable it for C++, RecoveryExpr is type-dependent to suppress
-  // bogus diagnostics and this trick does not work in C.
-  // FIXME: use containsErrors() to suppress unwanted diags in C.
   if (!Context.getLangOpts().RecoveryAST)
     return ExprError();
 
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index b27abb54c170f..8bf605e5e76b8 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -53,9 +53,10 @@ static const Expr *checkMapClauseExpressionBase(
 namespace {
 /// Default data sharing attributes, which can be applied to directive.
 enum DefaultDataSharingAttributes {
-  DSA_unspecified = 0, /// Data sharing attribute not specified.
-  DSA_none = 1 << 0,   /// Default data sharing attribute 'none'.
-  DSA_shared = 1 << 1, /// Default data sharing attribute 'shared'.
+  DSA_unspecified = 0,       /// Data sharing attribute not specified.
+  DSA_none = 1 << 0,         /// Default data sharing attribute 'none'.
+  DSA_shared = 1 << 1,       /// Default data sharing attribute 'shared'.
+  DSA_firstprivate = 1 << 2, /// Default data sharing attribute 'firstprivate'.
 };
 
 /// Stack for tracking declarations used in OpenMP directives and
@@ -684,6 +685,11 @@ class DSAStackTy {
     getTopOfStack().DefaultAttr = DSA_shared;
     getTopOfStack().DefaultAttrLoc = Loc;
   }
+  /// Set default data sharing attribute to firstprivate.
+  void setDefaultDSAFirstPrivate(SourceLocation Loc) {
+    getTopOfStack().DefaultAttr = DSA_firstprivate;
+    getTopOfStack().DefaultAttrLoc = Loc;
+  }
   /// Set default data mapping attribute to Modifier:Kind
   void setDefaultDMAAttr(OpenMPDefaultmapClauseModifier M,
                          OpenMPDefaultmapClauseKind Kind,
@@ -1183,6 +1189,15 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(const_iterator &Iter,
     return DVar;
   case DSA_none:
     return DVar;
+  case DSA_firstprivate:
+    if (VD->getStorageDuration() == SD_Static &&
+        VD->getDeclContext()->isFileContext()) {
+      DVar.CKind = OMPC_unknown;
+    } else {
+      DVar.CKind = OMPC_firstprivate;
+    }
+    DVar.ImplicitDSALoc = Iter->DefaultAttrLoc;
+    return DVar;
   case DSA_unspecified:
     // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
     // in a Construct, implicitly determined, p.2]
@@ -2058,7 +2073,13 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level,
         // If the variable is artificial and must be captured by value - try to
         // capture by value.
         !(isa(D) && !D->hasAttr() &&
-          !cast(D)->getInit()->isGLValue());
+          !cast(D)->getInit()->isGLValue()) &&
+        // If the variable is implicitly firstprivate and scalar - capture by
+        // copy
+        !(DSAStack->getDefaultDSA() == DSA_firstprivate &&
+          !DSAStack->hasExplicitDSA(
+              D, [](OpenMPClauseKind K) { return K != OMPC_unknown; }, Level) &&
+          !DSAStack->isLoopControlVariable(D, Level).first);
   }
 
   // When passing data by copy, we need to make sure it fits the uintptr size
@@ -2185,10 +2206,13 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo,
         DSAStack->isClauseParsingMode());
     // Global shared must not be captured.
     if (VD && !VD->hasLocalStorage() && DVarPrivate.CKind == OMPC_unknown &&
-        (DSAStack->getDefaultDSA() != DSA_none || DVarTop.CKind == OMPC_shared))
+        ((DSAStack->getDefaultDSA() != DSA_none &&
+          DSAStack->getDefaultDSA() != DSA_firstprivate) ||
+         DVarTop.CKind == OMPC_shared))
       return nullptr;
     if (DVarPrivate.CKind != OMPC_unknown ||
-        (VD && DSAStack->getDefaultDSA() == DSA_none))
+        (VD && (DSAStack->getDefaultDSA() == DSA_none ||
+                DSAStack->getDefaultDSA() == DSA_firstprivate)))
       return VD ? VD : cast(DVarPrivate.PrivateCopy->getDecl());
   }
   return nullptr;
@@ -3333,10 +3357,19 @@ class DSAAttrChecker final : public StmtVisitor {
       // in the construct, and does not have a predetermined data-sharing
       // attribute, must have its data-sharing attribute explicitly determined
       // by being listed in a data-sharing attribute clause.
-      if (DVar.CKind == OMPC_unknown && Stack->getDefaultDSA() == DSA_none &&
+      if (DVar.CKind == OMPC_unknown &&
+          (Stack->getDefaultDSA() == DSA_none ||
+           Stack->getDefaultDSA() == DSA_firstprivate) &&
           isImplicitOrExplicitTaskingRegion(DKind) &&
           VarsWithInheritedDSA.count(VD) == 0) {
-        VarsWithInheritedDSA[VD] = E;
+        bool InheritedDSA = Stack->getDefaultDSA() == DSA_none;
+        if (!InheritedDSA && Stack->getDefaultDSA() == DSA_firstprivate) {
+          DSAStackTy::DSAVarData DVar =
+              Stack->getImplicitDSA(VD, /*FromParent=*/false);
+          InheritedDSA = DVar.CKind == OMPC_unknown;
+        }
+        if (InheritedDSA)
+          VarsWithInheritedDSA[VD] = E;
         return;
       }
 
@@ -3438,7 +3471,9 @@ class DSAAttrChecker final : public StmtVisitor {
 
       // Define implicit data-sharing attributes for task.
       DVar = Stack->getImplicitDSA(VD, /*FromParent=*/false);
-      if (isOpenMPTaskingDirective(DKind) && DVar.CKind != OMPC_shared &&
+      if (((isOpenMPTaskingDirective(DKind) && DVar.CKind != OMPC_shared) ||
+           (Stack->getDefaultDSA() == DSA_firstprivate &&
+            DVar.CKind == OMPC_firstprivate && !DVar.RefExpr)) &&
           !Stack->isLoopControlVariable(VD).first) {
         ImplicitFirstprivate.push_back(E);
         return;
@@ -5342,8 +5377,10 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
 
   ErrorFound = Res.isInvalid() || ErrorFound;
 
-  // Check variables in the clauses if default(none) was specified.
-  if (DSAStack->getDefaultDSA() == DSA_none) {
+  // Check variables in the clauses if default(none) or
+  // default(firstprivate) was specified.
+  if (DSAStack->getDefaultDSA() == DSA_none ||
+      DSAStack->getDefaultDSA() == DSA_firstprivate) {
     DSAAttrChecker DSAChecker(DSAStack, *this, nullptr);
     for (OMPClause *C : Clauses) {
       switch (C->getClauseKind()) {
@@ -5454,7 +5491,8 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
     if (P.getFirst()->isImplicit() || isa(P.getFirst()))
       continue;
     ErrorFound = true;
-    if (DSAStack->getDefaultDSA() == DSA_none) {
+    if (DSAStack->getDefaultDSA() == DSA_none ||
+        DSAStack->getDefaultDSA() == DSA_firstprivate) {
       Diag(P.second->getExprLoc(), diag::err_omp_no_dsa_for_variable)
           << P.first << P.second->getSourceRange();
       Diag(DSAStack->getDefaultDSALocation(), diag::note_omp_default_dsa_none);
@@ -12932,10 +12970,20 @@ OMPClause *Sema::ActOnOpenMPDefaultClause(DefaultKind Kind,
         << getOpenMPClauseName(OMPC_default);
     return nullptr;
   }
-  if (Kind == OMP_DEFAULT_none)
+
+  switch (Kind) {
+  case OMP_DEFAULT_none:
     DSAStack->setDefaultDSANone(KindKwLoc);
-  else if (Kind == OMP_DEFAULT_shared)
+    break;
+  case OMP_DEFAULT_shared:
     DSAStack->setDefaultDSAShared(KindKwLoc);
+    break;
+  case OMP_DEFAULT_firstprivate:
+    DSAStack->setDefaultDSAFirstPrivate(KindKwLoc);
+    break;
+  default:
+    llvm_unreachable("DSA unexpected in OpenMP default clause");
+  }
 
   return new (Context)
       OMPDefaultClause(Kind, KindKwLoc, StartLoc, LParenLoc, EndLoc);
@@ -15105,6 +15153,7 @@ static bool actOnOMPReductionKindClause(
       auto *DRDRef = DeclareReductionRef.getAs();
       auto *DRD = cast(DRDRef->getDecl());
       if (DRD->getInitializer()) {
+        S.ActOnUninitializedDecl(PrivateVD);
         Init = DRDRef;
         RHSVD->setInit(DRDRef);
         RHSVD->setInitStyle(VarDecl::CallInit);
@@ -15211,10 +15260,19 @@ static bool actOnOMPReductionKindClause(
         llvm_unreachable("Unexpected reduction operation");
       }
     }
-    if (Init && DeclareReductionRef.isUnset())
+    if (Init && DeclareReductionRef.isUnset()) {
       S.AddInitializerToDecl(RHSVD, Init, /*DirectInit=*/false);
-    else if (!Init)
+      // Store initializer for single element in private copy. Will be used
+      // during codegen.
+      PrivateVD->setInit(RHSVD->getInit());
+      PrivateVD->setInitStyle(RHSVD->getInitStyle());
+    } else if (!Init) {
       S.ActOnUninitializedDecl(RHSVD);
+      // Store initializer for single element in private copy. Will be used
+      // during codegen.
+      PrivateVD->setInit(RHSVD->getInit());
+      PrivateVD->setInitStyle(RHSVD->getInitStyle());
+    }
     if (RHSVD->isInvalidDecl())
       continue;
     if (!RHSVD->hasInit() &&
@@ -15228,10 +15286,6 @@ static bool actOnOMPReductionKindClause(
           << D;
       continue;
     }
-    // Store initializer for single element in private copy. Will be used during
-    // codegen.
-    PrivateVD->setInit(RHSVD->getInit());
-    PrivateVD->setInitStyle(RHSVD->getInitStyle());
     DeclRefExpr *PrivateDRE = buildDeclRefExpr(S, PrivateVD, PrivateTy, ELoc);
     ExprResult ReductionOp;
     if (DeclareReductionRef.isUsable()) {
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 8f4a3f9ce58ee..33184d4e2fcf6 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -3766,25 +3766,26 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
   } else if (!RetValExp && !HasDependentReturnType) {
     FunctionDecl *FD = getCurFunctionDecl();
 
-    unsigned DiagID;
     if (getLangOpts().CPlusPlus11 && FD && FD->isConstexpr()) {
       // C++11 [stmt.return]p2
-      DiagID = diag::err_constexpr_return_missing_expr;
+      Diag(ReturnLoc, diag::err_constexpr_return_missing_expr)
+          << FD << FD->isConsteval();
       FD->setInvalidDecl();
-    } else if (getLangOpts().C99) {
-      // C99 6.8.6.4p1 (ext_ since GCC warns)
-      DiagID = diag::ext_return_missing_expr;
     } else {
+      // C99 6.8.6.4p1 (ext_ since GCC warns)
       // C90 6.6.6.4p4
-      DiagID = diag::warn_return_missing_expr;
+      unsigned DiagID = getLangOpts().C99 ? diag::ext_return_missing_expr
+                                          : diag::warn_return_missing_expr;
+      // Note that at this point one of getCurFunctionDecl() or
+      // getCurMethodDecl() must be non-null (see above).
+      assert((getCurFunctionDecl() || getCurMethodDecl()) &&
+             "Not in a FunctionDecl or ObjCMethodDecl?");
+      bool IsMethod = FD == nullptr;
+      const NamedDecl *ND =
+          IsMethod ? cast(getCurMethodDecl()) : cast(FD);
+      Diag(ReturnLoc, DiagID) << ND << IsMethod;
     }
 
-    if (FD)
-      Diag(ReturnLoc, DiagID)
-          << FD->getIdentifier() << 0 /*fn*/ << FD->isConsteval();
-    else
-      Diag(ReturnLoc, DiagID) << getCurMethodDecl()->getDeclName() << 1/*meth*/;
-
     Result = ReturnStmt::Create(Context, ReturnLoc, /* RetExpr=*/nullptr,
                                 /* NRVOCandidate=*/nullptr);
   } else {
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index f3641afbbf8a0..5392be57a3aa2 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -355,7 +355,7 @@ checkDeducedTemplateArguments(ASTContext &Context,
       TemplateArgument Merged = checkDeducedTemplateArguments(
           Context, DeducedTemplateArgument(*XA, X.wasDeducedFromArrayBound()),
           DeducedTemplateArgument(*YA, Y.wasDeducedFromArrayBound()));
-      if (Merged.isNull())
+      if (Merged.isNull() && !(XA->isNull() && YA->isNull()))
         return DeducedTemplateArgument();
       NewPack.push_back(Merged);
     }
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 6df0e0e38cae9..c25de66dfc1c3 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -3721,6 +3721,9 @@ Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
   if (InsertPos)
     VarTemplate->AddSpecialization(Var, InsertPos);
 
+  if (SemaRef.getLangOpts().OpenCL)
+    SemaRef.deduceOpenCLAddressSpace(Var);
+
   // Substitute the nested name specifier, if any.
   if (SubstQualifier(D, Var))
     return nullptr;
@@ -4991,6 +4994,9 @@ VarTemplateSpecializationDecl *Sema::CompleteVarTemplateSpecializationDecl(
   // Instantiate the initializer.
   InstantiateVariableInitializer(VarSpec, PatternDecl, TemplateArgs);
 
+  if (getLangOpts().OpenCL)
+    deduceOpenCLAddressSpace(VarSpec);
+
   return VarSpec;
 }
 
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 574566df9f421..7fdebc5045eca 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -7769,6 +7769,19 @@ static bool isPermittedNeonBaseType(QualType &Ty,
          BTy->getKind() == BuiltinType::BFloat16;
 }
 
+static bool verifyValidIntegerConstantExpr(Sema &S, const ParsedAttr &Attr,
+                                           llvm::APSInt &Result) {
+  const auto *AttrExpr = Attr.getArgAsExpr(0);
+  if (AttrExpr->isTypeDependent() || AttrExpr->isValueDependent() ||
+      !AttrExpr->isIntegerConstantExpr(Result, S.Context)) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_argument_type)
+        << Attr << AANT_ArgumentIntegerConstant << AttrExpr->getSourceRange();
+    Attr.setInvalid();
+    return false;
+  }
+  return true;
+}
+
 /// HandleNeonVectorTypeAttr - The "neon_vector_type" and
 /// "neon_polyvector_type" attributes are used to create vector types that
 /// are mangled according to ARM's ABI.  Otherwise, these types are identical
@@ -7794,16 +7807,10 @@ static void HandleNeonVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr,
     return;
   }
   // The number of elements must be an ICE.
-  Expr *numEltsExpr = static_cast(Attr.getArgAsExpr(0));
   llvm::APSInt numEltsInt(32);
-  if (numEltsExpr->isTypeDependent() || numEltsExpr->isValueDependent() ||
-      !numEltsExpr->isIntegerConstantExpr(numEltsInt, S.Context)) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_type)
-        << Attr << AANT_ArgumentIntegerConstant
-        << numEltsExpr->getSourceRange();
-    Attr.setInvalid();
+  if (!verifyValidIntegerConstantExpr(S, Attr, numEltsInt))
     return;
-  }
+
   // Only certain element types are supported for Neon vectors.
   if (!isPermittedNeonBaseType(CurType, VecKind, S)) {
     S.Diag(Attr.getLoc(), diag::err_attribute_invalid_vector_type) << CurType;
@@ -7824,6 +7831,58 @@ static void HandleNeonVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr,
   CurType = S.Context.getVectorType(CurType, numElts, VecKind);
 }
 
+/// HandleArmSveVectorBitsTypeAttr - The "arm_sve_vector_bits" attribute is
+/// used to create fixed-length versions of sizeless SVE types defined by
+/// the ACLE, such as svint32_t and svbool_t.
+static void HandleArmSveVectorBitsTypeAttr(QualType &CurType,
+                                           const ParsedAttr &Attr, Sema &S) {
+  // Target must have SVE.
+  if (!S.Context.getTargetInfo().hasFeature("sve")) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_unsupported) << Attr;
+    Attr.setInvalid();
+    return;
+  }
+
+  // Attribute is unsupported if '-msve-vector-bits=' isn't specified.
+  if (!S.getLangOpts().ArmSveVectorBits) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_arm_feature_sve_bits_unsupported)
+        << Attr;
+    Attr.setInvalid();
+    return;
+  }
+
+  // Check the attribute arguments.
+  if (Attr.getNumArgs() != 1) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
+        << Attr << 1;
+    Attr.setInvalid();
+    return;
+  }
+
+  // The vector size must be an integer constant expression.
+  llvm::APSInt SveVectorSizeInBits(32);
+  if (!verifyValidIntegerConstantExpr(S, Attr, SveVectorSizeInBits))
+    return;
+
+  unsigned VecSize = static_cast(SveVectorSizeInBits.getZExtValue());
+
+  // The attribute vector size must match -msve-vector-bits.
+  if (VecSize != S.getLangOpts().ArmSveVectorBits) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_bad_sve_vector_size)
+        << VecSize << S.getLangOpts().ArmSveVectorBits;
+    Attr.setInvalid();
+    return;
+  }
+
+  // Attribute can only be attached to a single SVE vector or predicate type.
+  if (!CurType->isVLSTBuiltinType()) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_invalid_sve_type)
+        << Attr << CurType;
+    Attr.setInvalid();
+    return;
+  }
+}
+
 static void HandleArmMveStrictPolymorphismAttr(TypeProcessingState &State,
                                                QualType &CurType,
                                                ParsedAttr &Attr) {
@@ -8092,6 +8151,10 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
                                VectorType::NeonPolyVector);
       attr.setUsedAsTypeAttr();
       break;
+    case ParsedAttr::AT_ArmSveVectorBits:
+      HandleArmSveVectorBitsTypeAttr(type, attr, state.getSema());
+      attr.setUsedAsTypeAttr();
+      break;
     case ParsedAttr::AT_ArmMveStrictPolymorphism: {
       HandleArmMveStrictPolymorphismAttr(state, type, attr);
       attr.setUsedAsTypeAttr();
diff --git a/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp
index fd8cbd694b240..632de9e5dc832 100644
--- a/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp
@@ -272,6 +272,8 @@ void IteratorModeling::checkPostStmt(const BinaryOperator *BO,
     handleComparison(C, BO, Result, LVal, RVal,
                      BinaryOperator::getOverloadedOperator(OK));
   } else if (isRandomIncrOrDecrOperator(OK)) {
+    if (!BO->getRHS()->getType()->isIntegralOrEnumerationType())
+      return;
     handlePtrIncrOrDecr(C, BO->getLHS(),
                         BinaryOperator::getOverloadedOperator(OK), RVal);
   }
@@ -461,6 +463,12 @@ void IteratorModeling::handleComparison(CheckerContext &C, const Expr *CE,
     RPos = getIteratorPosition(State, RVal);
   }
 
+  // If the value for which we just tried to set a new iterator position is
+  // an `SVal`for which no iterator position can be set then the setting was
+  // unsuccessful. We cannot handle the comparison in this case.
+  if (!LPos || !RPos)
+    return;
+
   // We cannot make assumptions on `UnknownVal`. Let us conjure a symbol
   // instead.
   if (RetVal.isUnknown()) {
@@ -599,6 +607,9 @@ void IteratorModeling::handlePtrIncrOrDecr(CheckerContext &C,
                                            const Expr *Iterator,
                                            OverloadedOperatorKind OK,
                                            SVal Offset) const {
+  if (!Offset.getAs())
+    return;
+
   QualType PtrType = Iterator->getType();
   if (!PtrType->isPointerType())
     return;
@@ -612,13 +623,11 @@ void IteratorModeling::handlePtrIncrOrDecr(CheckerContext &C,
     return;
 
   SVal NewVal;
-  if (OK == OO_Plus || OK == OO_PlusEqual)
+  if (OK == OO_Plus || OK == OO_PlusEqual) {
     NewVal = State->getLValue(ElementType, Offset, OldVal);
-  else {
-    const llvm::APSInt &OffsetInt =
-      Offset.castAs().getValue();
-    auto &BVF = C.getSymbolManager().getBasicVals();
-    SVal NegatedOffset = nonloc::ConcreteInt(BVF.getValue(-OffsetInt));
+  } else {
+    auto &SVB = C.getSValBuilder();
+    SVal NegatedOffset = SVB.evalMinus(Offset.castAs());
     NewVal = State->getLValue(ElementType, NegatedOffset, OldVal);
   }
 
@@ -684,9 +693,14 @@ bool IteratorModeling::noChangeInAdvance(CheckerContext &C, SVal Iter,
 
   const auto StateBefore = N->getState();
   const auto *PosBefore = getIteratorPosition(StateBefore, Iter);
-
-  assert(PosBefore && "`std::advance() should not create new iterator "
-         "position but change existing ones");
+  // FIXME: `std::advance()` should not create a new iterator position but
+  //        change existing ones. However, in case of iterators implemented as
+  //        pointers the handling of parameters in `std::advance()`-like
+  //        functions is still incomplete which may result in cases where
+  //        the new position is assigned to the wrong pointer. This causes
+  //        crash if we use an assertion here.
+  if (!PosBefore)
+    return false;
 
   return PosBefore->getOffset() == PosAfter->getOffset();
 }
diff --git a/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp
index df8e379d1f20e..dd014648eb6fd 100644
--- a/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp
@@ -169,6 +169,8 @@ void IteratorRangeChecker::checkPreStmt(const BinaryOperator *BO,
     verifyDereference(C, LVal);
   } else if (isRandomIncrOrDecrOperator(OK)) {
     SVal RVal = State->getSVal(BO->getRHS(), C.getLocationContext());
+    if (!BO->getRHS()->getType()->isIntegralOrEnumerationType())
+      return;
     verifyRandomIncrOrDecr(C, BinaryOperator::getOverloadedOperator(OK), LVal,
                            RVal);
   }
diff --git a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
index 43f7dcd14b014..87477e96d2d16 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
@@ -66,7 +66,7 @@ class MacOSKeychainAPIChecker : public Checker,
   ProgramStateRef evalAssume(ProgramStateRef state, SVal Cond,
                              bool Assumption) const;
   void printState(raw_ostream &Out, ProgramStateRef State,
-                  const char *NL, const char *Sep) const;
+                  const char *NL, const char *Sep) const override;
 
 private:
   typedef std::pair AllocationPair;
diff --git a/clang/lib/StaticAnalyzer/Checkers/NumberObjectConversionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NumberObjectConversionChecker.cpp
index abeca596d056b..df01cc760e7e9 100644
--- a/clang/lib/StaticAnalyzer/Checkers/NumberObjectConversionChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/NumberObjectConversionChecker.cpp
@@ -57,7 +57,7 @@ class Callback : public MatchFinder::MatchCallback {
   Callback(const NumberObjectConversionChecker *C,
            BugReporter &BR, AnalysisDeclContext *ADC)
       : C(C), BR(BR), ADC(ADC) {}
-  virtual void run(const MatchFinder::MatchResult &Result);
+  void run(const MatchFinder::MatchResult &Result) override;
 };
 } // end of anonymous namespace
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
index ff19d7a2b9d4c..8c2008a7ceb44 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
@@ -58,7 +58,7 @@ class ObjCContainersChecker : public Checker< check::PreStmt,
                                      PointerEscapeKind Kind) const;
 
   void printState(raw_ostream &OS, ProgramStateRef State,
-                  const char *NL, const char *Sep) const;
+                  const char *NL, const char *Sep) const override;
 };
 } // end anonymous namespace
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h b/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h
index 89b8965e4c9ad..ec43a23e30a9e 100644
--- a/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h
+++ b/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h
@@ -20,13 +20,6 @@ namespace clang {
 namespace ento {
 namespace smartptr {
 
-/// Set of STL smart pointer class which we are trying to model.
-const llvm::StringSet<> StdSmartPtrs = {
-    "shared_ptr",
-    "unique_ptr",
-    "weak_ptr",
-};
-
 /// Returns true if the event call is on smart pointer.
 bool isStdSmartPtrCall(const CallEvent &Call);
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp
index 91f2890788141..bcc7d4103c1c6 100644
--- a/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp
@@ -73,7 +73,8 @@ bool isStdSmartPtrCall(const CallEvent &Call) {
     return false;
 
   if (RecordDecl->getDeclName().isIdentifier()) {
-    return smartptr::StdSmartPtrs.count(RecordDecl->getName().lower());
+    StringRef Name = RecordDecl->getName();
+    return Name == "shared_ptr" || Name == "unique_ptr" || Name == "weak_ptr";
   }
   return false;
 }
diff --git a/clang/lib/Tooling/FileMatchTrie.cpp b/clang/lib/Tooling/FileMatchTrie.cpp
index 88dea6bb6c9f9..3b02405da2f28 100644
--- a/clang/lib/Tooling/FileMatchTrie.cpp
+++ b/clang/lib/Tooling/FileMatchTrie.cpp
@@ -105,8 +105,13 @@ class FileMatchTrieNode {
                            StringRef FileName,
                            bool &IsAmbiguous,
                            unsigned ConsumedLength = 0) const {
+    // Note: we support only directory symlinks for performance reasons.
     if (Children.empty()) {
-      if (Comparator.equivalent(StringRef(Path), FileName))
+      // As far as we do not support file symlinks, compare
+      // basenames here to avoid request to file system.
+      if (llvm::sys::path::filename(Path) ==
+              llvm::sys::path::filename(FileName) &&
+          Comparator.equivalent(StringRef(Path), FileName))
         return StringRef(Path);
       return {};
     }
@@ -121,6 +126,13 @@ class FileMatchTrieNode {
       if (!Result.empty() || IsAmbiguous)
         return Result;
     }
+
+    // If `ConsumedLength` is zero, this is the root and we have no filename
+    // match. Give up in this case, we don't try to find symlinks with
+    // different names.
+    if (ConsumedLength == 0)
+      return {};
+
     std::vector AllChildren;
     getAll(AllChildren, MatchingChild);
     StringRef Result;
diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp
index 6d13f1ace83ba..1f192180ec451 100644
--- a/clang/lib/Tooling/Syntax/BuildTree.cpp
+++ b/clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -750,6 +750,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor {
         return new (allocator()) syntax::FloatUserDefinedLiteralExpression;
       }
     }
+    llvm_unreachable("Unknown literal operator kind.");
   }
 
   bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) {
diff --git a/clang/test/.clang-format b/clang/test/.clang-format
index 4799b66f3e9a6..a6176c2e00131 100644
--- a/clang/test/.clang-format
+++ b/clang/test/.clang-format
@@ -1,2 +1,5 @@
 BasedOnStyle: LLVM
 ColumnLimit: 0
+AlignTrailingComments: false
+CommentPragmas: "(^ ?CHECK|^ ?expected-)"
+AlwaysBreakTemplateDeclarations: No
diff --git a/clang/test/AST/ast-printer-lambda.cpp b/clang/test/AST/ast-printer-lambda.cpp
index 27a361da5cb18..08f1ff555b0b1 100644
--- a/clang/test/AST/ast-printer-lambda.cpp
+++ b/clang/test/AST/ast-printer-lambda.cpp
@@ -15,6 +15,18 @@ void test1(int i, T... t) {
   auto lambda = [&]{};
   //CHECK: [&] {
 }
+{
+  auto lambda = [k{i}] {};
+  //CHECK: [k{i}] {
+}
+{
+  auto lambda = [k(i)] {};
+  //CHECK: [k(i)] {
+}
+{
+  auto lambda = [k = i] {};
+  //CHECK: [k = i] {
+}
 {
   auto lambda = [t..., i]{};
   //CHECK: [t..., i] {
@@ -31,6 +43,14 @@ void test1(int i, T... t) {
   auto lambda = [t..., this]{};
   //CHECK: [t..., this] {
 }
+{
+  auto lambda = [k(t...)] {};
+  //CHECK: [k(t...)] {
+}
+{
+  auto lambda = [k{t...}] {};
+  //CHECK: [k{t...}] {
+}
 }
 
 };
\ No newline at end of file
diff --git a/clang/test/AST/regression-new-expr-crash.cpp b/clang/test/AST/regression-new-expr-crash.cpp
index 81dd193b93e88..e1d93f92a125b 100644
--- a/clang/test/AST/regression-new-expr-crash.cpp
+++ b/clang/test/AST/regression-new-expr-crash.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s
 
 struct Bar {int a;};
 const Bar arr[2] = {{1}};
@@ -11,3 +11,8 @@ void foo(int a) {
   Foo *foo_array;
   foo_array = new Foo[arr[0].a];
 }
+
+void Test(int N) {
+  int arr[N];
+  decltype([&arr]{}) *p; // expected-error {{lambda expression in an unevaluated operand}}
+}
diff --git a/clang/test/Analysis/Inputs/system-header-simulator-cxx.h b/clang/test/Analysis/Inputs/system-header-simulator-cxx.h
index fe4b9d081e9c8..1dee3294d7323 100644
--- a/clang/test/Analysis/Inputs/system-header-simulator-cxx.h
+++ b/clang/test/Analysis/Inputs/system-header-simulator-cxx.h
@@ -60,6 +60,11 @@ template  struct __vector_iterator {
   __vector_iterator operator+(difference_type n) {
     return ptr + n;
   }
+  friend __vector_iterator operator+(
+      difference_type n,
+      const __vector_iterator &iter) {
+    return n + iter.ptr;
+  }
   __vector_iterator operator-(difference_type n) {
     return ptr - n;
   }
@@ -118,6 +123,11 @@ template  struct __deque_iterator {
   __deque_iterator operator+(difference_type n) {
     return ptr + n;
   }
+  friend __deque_iterator operator+(
+      difference_type n,
+      const __deque_iterator &iter) {
+    return n + iter.ptr;
+  }
   __deque_iterator operator-(difference_type n) {
     return ptr - n;
   }
diff --git a/clang/test/Analysis/ctu-on-demand-parsing.c b/clang/test/Analysis/ctu-on-demand-parsing.c
index 5adce7f369639..07a72a1046467 100644
--- a/clang/test/Analysis/ctu-on-demand-parsing.c
+++ b/clang/test/Analysis/ctu-on-demand-parsing.c
@@ -19,7 +19,7 @@
 // RUN:   -verify ctu-on-demand-parsing.c
 //
 // FIXME: Path handling should work on all platforms.
-// REQUIRES: linux
+// REQUIRES: system-linux
 
 void clang_analyzer_eval(int);
 
diff --git a/clang/test/Analysis/ctu-on-demand-parsing.cpp b/clang/test/Analysis/ctu-on-demand-parsing.cpp
index 058269662fb3a..e4e998c8f64c3 100644
--- a/clang/test/Analysis/ctu-on-demand-parsing.cpp
+++ b/clang/test/Analysis/ctu-on-demand-parsing.cpp
@@ -30,7 +30,7 @@
 // CHECK: CTU loaded AST file: {{.*}}ctu-chain.cpp
 //
 // FIXME: Path handling should work on all platforms.
-// REQUIRES: linux
+// REQUIRES: system-linux
 
 #include "ctu-hdr.h"
 
diff --git a/clang/test/Analysis/diagnostics/explicit-suppression.cpp b/clang/test/Analysis/diagnostics/explicit-suppression.cpp
index 2b586add19eee..0ef01771e58bc 100644
--- a/clang/test/Analysis/diagnostics/explicit-suppression.cpp
+++ b/clang/test/Analysis/diagnostics/explicit-suppression.cpp
@@ -19,6 +19,6 @@ class C {
 void testCopyNull(C *I, C *E) {
   std::copy(I, E, (C *)0);
 #ifndef SUPPRESSED
-  // expected-warning@../Inputs/system-header-simulator-cxx.h:699 {{Called C++ object pointer is null}}
+  // expected-warning@../Inputs/system-header-simulator-cxx.h:709 {{Called C++ object pointer is null}}
 #endif
 }
diff --git a/clang/test/Analysis/iterator-modeling.cpp b/clang/test/Analysis/iterator-modeling.cpp
index f19848b8dc935..0b76b0bfa7232 100644
--- a/clang/test/Analysis/iterator-modeling.cpp
+++ b/clang/test/Analysis/iterator-modeling.cpp
@@ -1948,6 +1948,13 @@ void minus_equal_ptr_iterator(const cont_with_ptr_iterator &c) {
   clang_analyzer_express(clang_analyzer_iterator_position(i)); // expected-warning{{$c.end() - 2}}
 }
 
+void minus_equal_ptr_iterator_variable(const cont_with_ptr_iterator &c,
+                                       int n) {
+  auto i = c.end();
+
+  i -= n; // no-crash
+}
+
 void plus_ptr_iterator(const cont_with_ptr_iterator &c) {
   auto i1 = c.begin();
 
@@ -1972,6 +1979,17 @@ void minus_ptr_iterator(const cont_with_ptr_iterator &c) {
   clang_analyzer_express(clang_analyzer_iterator_position(i2)); // expected-warning{{$c.end() - 2}}
 }
 
+void ptr_iter_diff(cont_with_ptr_iterator &c) {
+  auto i0 = c.begin(), i1 = c.end();
+  ptrdiff_t len = i1 - i0; // no-crash
+}
+
+void ptr_iter_cmp_nullptr(cont_with_ptr_iterator &c) {
+  auto i0 = c.begin();
+  if (i0 != nullptr) // no-crash
+    ++i0;
+}
+
 void clang_analyzer_printState();
 
 void print_state(std::vector &V) {
diff --git a/clang/test/Analysis/iterator-range.cpp b/clang/test/Analysis/iterator-range.cpp
index 657ae89998e81..8d71039290470 100644
--- a/clang/test/Analysis/iterator-range.cpp
+++ b/clang/test/Analysis/iterator-range.cpp
@@ -935,3 +935,7 @@ void postfix_minus_assign_2_begin_ptr_iterator(
           // expected-note@-1{{Iterator decremented ahead of its valid range}}
 }
 
+void ptr_iter_diff(cont_with_ptr_iterator &c) {
+  auto i0 = c.begin(), i1 = c.end();
+  ptrdiff_t len = i1 - i0; // no-crash
+}
diff --git a/clang/test/Analysis/malloc.c b/clang/test/Analysis/malloc.c
index 714c73c3c793e..a26b511967811 100644
--- a/clang/test/Analysis/malloc.c
+++ b/clang/test/Analysis/malloc.c
@@ -791,7 +791,8 @@ void mallocEscapeMalloc() {
 void mallocMalloc() {
   int *p = malloc(12);
   p = malloc(12);
-} // expected-warning {{Potential leak of memory pointed to by}}
+} // expected-warning {{Potential leak of memory pointed to by}}\
+  // expected-warning {{Potential leak of memory pointed to by}}
 
 void mallocFreeMalloc() {
   int *p = malloc(12);
diff --git a/clang/test/Analysis/pr22954.c b/clang/test/Analysis/pr22954.c
index e88acdc29d390..093f6311a5057 100644
--- a/clang/test/Analysis/pr22954.c
+++ b/clang/test/Analysis/pr22954.c
@@ -352,6 +352,8 @@ int f19(int i) {
   memcpy(J0.s1[i].s1, input, 2);
   clang_analyzer_eval(J0.s1[0].s1[0] == 1); // expected-warning{{UNKNOWN}}\
   expected-warning{{Potential leak of memory pointed to by field 's2'}}\
+  expected-warning{{Potential leak of memory pointed to by field 's2'}}\
+  expected-warning{{Potential leak of memory pointed to by field 's2'}}\
   expected-warning{{Potential leak of memory pointed to by 'J0.s2'}}
   clang_analyzer_eval(J0.s1[0].s1[1] == 2); // expected-warning{{UNKNOWN}}
   clang_analyzer_eval(J0.s1[1].s1[0] == 3); // expected-warning{{UNKNOWN}}
diff --git a/clang/test/Analysis/scan-build/Inputs/null_dereference_and_division_by_zero.c b/clang/test/Analysis/scan-build/Inputs/null_dereference_and_division_by_zero.c
new file mode 100644
index 0000000000000..438af79c90bbd
--- /dev/null
+++ b/clang/test/Analysis/scan-build/Inputs/null_dereference_and_division_by_zero.c
@@ -0,0 +1,8 @@
+int test(int x) {
+  if (x) {
+    int *p = 0;
+    return *p; // Null dereference.
+  } else {
+    return 1 / x; // Division by zero.
+  }
+}
diff --git a/clang/test/Analysis/scan-build/silence-core-checkers.test b/clang/test/Analysis/scan-build/silence-core-checkers.test
new file mode 100644
index 0000000000000..6d9a3017fcd61
--- /dev/null
+++ b/clang/test/Analysis/scan-build/silence-core-checkers.test
@@ -0,0 +1,30 @@
+// FIXME: Actually, "perl".
+REQUIRES: shell
+
+RUN: rm -rf %t.output_dir && mkdir %t.output_dir
+RUN: %scan-build -o %t.output_dir \
+RUN:   %clang -S %S/Inputs/null_dereference_and_division_by_zero.c \
+RUN:   | FileCheck %s -check-prefix CHECK-TWO-BUGS
+
+RUN: rm -rf %t.output_dir && mkdir %t.output_dir
+RUN: %scan-build -o %t.output_dir \
+RUN:   -disable-checker core.DivideZero \
+RUN:   %clang -S %S/Inputs/null_dereference_and_division_by_zero.c \
+RUN:   | FileCheck %s -check-prefix CHECK-ONE-BUG
+
+RUN: rm -rf %t.output_dir && mkdir %t.output_dir
+RUN: %scan-build -o %t.output_dir \
+RUN:   -disable-checker core.NullDereference \
+RUN:   %clang -S %S/Inputs/null_dereference_and_division_by_zero.c \
+RUN:   | FileCheck %s -check-prefix CHECK-ONE-BUG
+
+RUN: rm -rf %t.output_dir && mkdir %t.output_dir
+RUN: %scan-build -o %t.output_dir \
+RUN:   -disable-checker core.NullDereference \
+RUN:   -disable-checker core.DivideZero \
+RUN:   %clang -S %S/Inputs/null_dereference_and_division_by_zero.c \
+RUN:   | FileCheck %s -check-prefix CHECK-NO-BUGS
+
+CHECK-NO-BUGS: scan-build: No bugs found.
+CHECK-ONE-BUG: scan-build: 1 bug found.
+CHECK-TWO-BUGS: scan-build: 2 bugs found.
diff --git a/clang/test/CXX/dcl.dcl/dcl.attr/dcl.attr.noreturn/p1.cpp b/clang/test/CXX/dcl.dcl/dcl.attr/dcl.attr.noreturn/p1.cpp
index 59cac367dbf23..0d4d34ac0e147 100644
--- a/clang/test/CXX/dcl.dcl/dcl.attr/dcl.attr.noreturn/p1.cpp
+++ b/clang/test/CXX/dcl.dcl/dcl.attr/dcl.attr.noreturn/p1.cpp
@@ -7,6 +7,12 @@ void a2 [[noreturn]] () {
   return; // expected-warning {{function 'a2' declared 'noreturn' should not return}}
 }
 
+template  void a3 [[noreturn]] () {}
+template <> void a3 () { return; } // expected-warning {{function 'a3' declared 'noreturn' should not return}}
+
+template  void a4 [[noreturn]] () { return; } // expected-warning 2{{function 'a4' declared 'noreturn' should not return}}
+void a4_test() { a4(); } // expected-note {{in instantiation of function template specialization 'a4' requested here}}
+
 [[noreturn, noreturn]] void b() { throw 0; } // expected-error {{attribute 'noreturn' cannot appear multiple times in an attribute specifier}}
 [[noreturn]] [[noreturn]] void b2() { throw 0; } // ok
 
diff --git a/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp b/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp
index af2e7cf09ceb4..52986faa4e859 100644
--- a/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp
+++ b/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify %s
 
 void h() {
   int i1 = 0;
@@ -16,4 +16,16 @@ void h() {
   const int i4 = 0;
   extern void h4(int x = sizeof(i4));         // ok, not odr-use
   extern void h5(int x = decltype(i4 + 4)()); // ok, not odr-use
+
+  union {
+    int i5;
+  };
+
+  extern void h6(int = i5);
+  // expected-error@-1 {{default argument references local variable '' of enclosing function}}
+
+  struct S { int i; };
+  auto [x] = S();
+
+  extern void h7(int = x); // FIXME: reject
 }
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/dup.c b/clang/test/CodeGen/arm-mve-intrinsics/dup.c
index b443917cb2582..283c082570056 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/dup.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/dup.c
@@ -242,8 +242,7 @@ uint32x4_t test_vdupq_m_n_u32(uint32x4_t inactive, uint32_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x half> [[DOTSPLAT]], <8 x half> undef
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NEXT:    ret <8 x half> [[DOTSPLAT]]
 //
 float16x8_t test_vdupq_x_n_f16(float16_t a, mve_pred16_t p)
 {
@@ -256,8 +255,7 @@ float16x8_t test_vdupq_x_n_f16(float16_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[DOTSPLAT]], <4 x float> undef
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NEXT:    ret <4 x float> [[DOTSPLAT]]
 //
 float32x4_t test_vdupq_x_n_f32(float32_t a, mve_pred16_t p)
 {
@@ -270,8 +268,7 @@ float32x4_t test_vdupq_x_n_f32(float32_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> undef
-// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+// CHECK-NEXT:    ret <16 x i8> [[DOTSPLAT]]
 //
 int8x16_t test_vdupq_x_n_s8(int8_t a, mve_pred16_t p)
 {
@@ -284,8 +281,7 @@ int8x16_t test_vdupq_x_n_s8(int8_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> undef
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+// CHECK-NEXT:    ret <8 x i16> [[DOTSPLAT]]
 //
 int16x8_t test_vdupq_x_n_s16(int16_t a, mve_pred16_t p)
 {
@@ -298,8 +294,7 @@ int16x8_t test_vdupq_x_n_s16(int16_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> undef
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:    ret <4 x i32> [[DOTSPLAT]]
 //
 int32x4_t test_vdupq_x_n_s32(int32_t a, mve_pred16_t p)
 {
@@ -312,8 +307,7 @@ int32x4_t test_vdupq_x_n_s32(int32_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> undef
-// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+// CHECK-NEXT:    ret <16 x i8> [[DOTSPLAT]]
 //
 uint8x16_t test_vdupq_x_n_u8(uint8_t a, mve_pred16_t p)
 {
@@ -326,8 +320,7 @@ uint8x16_t test_vdupq_x_n_u8(uint8_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> undef
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+// CHECK-NEXT:    ret <8 x i16> [[DOTSPLAT]]
 //
 uint16x8_t test_vdupq_x_n_u16(uint16_t a, mve_pred16_t p)
 {
@@ -340,8 +333,7 @@ uint16x8_t test_vdupq_x_n_u16(uint16_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> undef
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:    ret <4 x i32> [[DOTSPLAT]]
 //
 uint32x4_t test_vdupq_x_n_u32(uint32_t a, mve_pred16_t p)
 {
diff --git a/clang/test/CodeGen/builtins-arm.c b/clang/test/CodeGen/builtins-arm.c
index f3c4ecaeee903..98e4621971b71 100644
--- a/clang/test/CodeGen/builtins-arm.c
+++ b/clang/test/CodeGen/builtins-arm.c
@@ -222,19 +222,19 @@ uint64_t mrrc2() {
 }
 
 unsigned rsr() {
-  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i32 @llvm.read_register.i32(metadata ![[M0:.*]])
+  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i32 @llvm.read_volatile_register.i32(metadata ![[M0:.*]])
   // CHECK-NEXT: ret i32 [[V0]]
   return __builtin_arm_rsr("cp1:2:c3:c4:5");
 }
 
 unsigned long long rsr64() {
-  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i64 @llvm.read_register.i64(metadata ![[M1:.*]])
+  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i64 @llvm.read_volatile_register.i64(metadata ![[M1:.*]])
   // CHECK-NEXT: ret i64 [[V0]]
   return __builtin_arm_rsr64("cp1:2:c3");
 }
 
 void *rsrp() {
-  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i32 @llvm.read_register.i32(metadata ![[M2:.*]])
+  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i32 @llvm.read_volatile_register.i32(metadata ![[M2:.*]])
   // CHECK-NEXT: [[V1:[%A-Za-z0-9.]+]] = inttoptr i32 [[V0]] to i8*
   // CHECK-NEXT: ret i8* [[V1]]
   return __builtin_arm_rsrp("sysreg");
diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index f5cf997e52266..35dbb09ea7ffe 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -68,7 +68,7 @@ int32_t jcvt(double v) {
 __typeof__(__builtin_arm_rsr("1:2:3:4:5")) rsr(void);
 
 uint32_t rsr() {
-  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]])
+  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i64 @llvm.read_volatile_register.i64(metadata ![[M0:[0-9]]])
   // CHECK-NEXT: trunc i64 [[V0]] to i32
   return __builtin_arm_rsr("1:2:3:4:5");
 }
@@ -76,12 +76,12 @@ uint32_t rsr() {
 __typeof__(__builtin_arm_rsr64("1:2:3:4:5")) rsr64(void);
 
 uint64_t rsr64(void) {
-  // CHECK: call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]])
+  // CHECK: call i64 @llvm.read_volatile_register.i64(metadata ![[M0:[0-9]]])
   return __builtin_arm_rsr64("1:2:3:4:5");
 }
 
 void *rsrp() {
-  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]])
+  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i64 @llvm.read_volatile_register.i64(metadata ![[M0:[0-9]]])
   // CHECK-NEXT: inttoptr i64 [[V0]] to i8*
   return __builtin_arm_rsrp("1:2:3:4:5");
 }
diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
index 22b4e7a6f3ecf..4e804fbafb301 100644
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -402,25 +402,25 @@ vector double test_vec_blend_d(void) {
 }
 
 vector unsigned char test_vec_insertl_uc(void) {
-  // CHECK-BE: @llvm.ppc.altivec.vinsblx(<16 x i8> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-BE: @llvm.ppc.altivec.vinsblx(<16 x i8> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-BE-NEXT: ret <16 x i8>
-  // CHECK-LE: @llvm.ppc.altivec.vinsbrx(<16 x i8> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-LE: @llvm.ppc.altivec.vinsbrx(<16 x i8> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-LE-NEXT: ret <16 x i8>
   return vec_insertl(uca, vuca, uia);
 }
 
 vector unsigned short test_vec_insertl_us(void) {
-  // CHECK-BE: @llvm.ppc.altivec.vinshlx(<8 x i16> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-BE: @llvm.ppc.altivec.vinshlx(<8 x i16> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-BE-NEXT: ret <8 x i16>
-  // CHECK-LE: @llvm.ppc.altivec.vinshrx(<8 x i16> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-LE: @llvm.ppc.altivec.vinshrx(<8 x i16> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-LE-NEXT: ret <8 x i16>
   return vec_insertl(usa, vusa, uia);
 }
 
 vector unsigned int test_vec_insertl_ui(void) {
-  // CHECK-BE: @llvm.ppc.altivec.vinswlx(<4 x i32> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-BE: @llvm.ppc.altivec.vinswlx(<4 x i32> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-BE-NEXT: ret <4 x i32>
-  // CHECK-LE: @llvm.ppc.altivec.vinswrx(<4 x i32> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-LE: @llvm.ppc.altivec.vinswrx(<4 x i32> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-LE-NEXT: ret <4 x i32>
   return vec_insertl(uib, vuia, uia);
 }
@@ -458,25 +458,25 @@ vector unsigned int test_vec_insertl_uiv(void) {
 }
 
 vector unsigned char test_vec_inserth_uc(void) {
-  // CHECK-BE: @llvm.ppc.altivec.vinsbrx(<16 x i8> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-BE: @llvm.ppc.altivec.vinsbrx(<16 x i8> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-BE-NEXT: ret <16 x i8>
-  // CHECK-LE: @llvm.ppc.altivec.vinsblx(<16 x i8> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-LE: @llvm.ppc.altivec.vinsblx(<16 x i8> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-LE-NEXT: ret <16 x i8>
   return vec_inserth(uca, vuca, uia);
 }
 
 vector unsigned short test_vec_inserth_us(void) {
-  // CHECK-BE: @llvm.ppc.altivec.vinshrx(<8 x i16> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-BE: @llvm.ppc.altivec.vinshrx(<8 x i16> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-BE-NEXT: ret <8 x i16>
-  // CHECK-LE: @llvm.ppc.altivec.vinshlx(<8 x i16> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-LE: @llvm.ppc.altivec.vinshlx(<8 x i16> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-LE-NEXT: ret <8 x i16>
   return vec_inserth(usa, vusa, uia);
 }
 
 vector unsigned int test_vec_inserth_ui(void) {
-  // CHECK-BE: @llvm.ppc.altivec.vinswrx(<4 x i32> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-BE: @llvm.ppc.altivec.vinswrx(<4 x i32> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-BE-NEXT: ret <4 x i32>
-  // CHECK-LE: @llvm.ppc.altivec.vinswlx(<4 x i32> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-LE: @llvm.ppc.altivec.vinswlx(<4 x i32> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-LE-NEXT: ret <4 x i32>
   return vec_inserth(uib, vuia, uia);
 }
@@ -581,3 +581,15 @@ vector float test_vec_vec_splati_ins_f(void) {
   // CHECK: ret <4 x float>
   return vec_splati_ins(vfa, 0, 1.0f);
 }
+
+int test_vec_test_lsbb_all_ones(void) {
+  // CHECK: @llvm.ppc.vsx.xvtlsbb(<16 x i8> %{{.+}}, i1 true
+  // CHECK-NEXT: ret i32
+  return vec_test_lsbb_all_ones(vuca);
+}
+
+int test_vec_test_lsbb_all_zeros(void) {
+  // CHECK: @llvm.ppc.vsx.xvtlsbb(<16 x i8> %{{.+}}, i1 false
+  // CHECK-NEXT: ret i32
+  return vec_test_lsbb_all_zeros(vuca);
+}
diff --git a/clang/test/CodeGen/code-coverage.c b/clang/test/CodeGen/code-coverage.c
index 34ba3554f5b54..5a663135e2f03 100644
--- a/clang/test/CodeGen/code-coverage.c
+++ b/clang/test/CodeGen/code-coverage.c
@@ -51,7 +51,6 @@ int test2(int b) {
 // Check that the noredzone flag is set on the generated functions.
 
 // CHECK: void @__llvm_gcov_writeout() unnamed_addr [[NRZ:#[0-9]+]]
-// CHECK: void @__llvm_gcov_flush() unnamed_addr [[NRZ]]
 // CHECK: void @__llvm_gcov_init() unnamed_addr [[NRZ]]
 
 // CHECK: attributes [[NRZ]] = { {{.*}}noredzone{{.*}} }
diff --git a/clang/test/CodeGen/matrix-type-builtins.c b/clang/test/CodeGen/matrix-type-builtins.c
index 58fde6f01cc34..f7e9587def60a 100644
--- a/clang/test/CodeGen/matrix-type-builtins.c
+++ b/clang/test/CodeGen/matrix-type-builtins.c
@@ -1,5 +1,9 @@
 // RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
+// Also check we do not crash when running some middle-end passes. Most
+// importantly this includes the IR verifier, to ensure we emit valid IR.
+// RUN: %clang_cc1 -fenable-matrix -emit-llvm -triple x86_64-apple-darwin %s -o %t
+
 // Tests for the matrix type builtins.
 
 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
@@ -100,7 +104,7 @@ void transpose_global() {
 void column_major_load_with_const_stride_double(double *Ptr) {
   // CHECK-LABEL: define void @column_major_load_with_const_stride_double(double* %Ptr)
   // CHECK:         [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64.p0f64(double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
 
   dx5x5_t m_a1 = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
 }
@@ -108,7 +112,7 @@ void column_major_load_with_const_stride_double(double *Ptr) {
 void column_major_load_with_const_stride2_double(double *Ptr) {
   // CHECK-LABEL: define void @column_major_load_with_const_stride2_double(double* %Ptr)
   // CHECK:         [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64.p0f64(double* align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5)
 
   dx5x5_t m_a2 = __builtin_matrix_column_major_load(Ptr, 5, 5, 2 * 3 + 9);
 }
@@ -117,7 +121,7 @@ void column_major_load_with_variable_stride_ull_float(float *Ptr, unsigned long
   // CHECK-LABEL: define void @column_major_load_with_variable_stride_ull_float(float* %Ptr, i64 %S)
   // CHECK:         [[S:%.*]] = load i64, i64* %S.addr, align 8
   // CHECK-NEXT:    [[PTR:%.*]] = load float*, float** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <6 x float> @llvm.matrix.column.major.load.v6f32.p0f32(float* align 4 [[PTR]], i64 [[S]], i1 false, i32 2, i32 3)
+  // CHECK-NEXT:    call <6 x float> @llvm.matrix.column.major.load.v6f32(float* align 4 [[PTR]], i64 [[S]], i1 false, i32 2, i32 3)
 
   fx2x3_t m_b = __builtin_matrix_column_major_load(Ptr, 2, 3, S);
 }
@@ -128,7 +132,7 @@ void column_major_load_with_stride_math_int(int *Ptr, int S) {
   // CHECK-NEXT:    [[STRIDE:%.*]] = add nsw i32 [[S]], 32
   // CHECK-NEXT:    [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <80 x i32> @llvm.matrix.column.major.load.v80i32.p0i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20)
+  // CHECK-NEXT:    call <80 x i32> @llvm.matrix.column.major.load.v80i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20)
 
   ix4x20_t m_c = __builtin_matrix_column_major_load(Ptr, 4, 20, S + 32);
 }
@@ -140,7 +144,7 @@ void column_major_load_with_stride_math_s_int(int *Ptr, short S) {
   // CHECK-NEXT:    [[STRIDE:%.*]] = add nsw i32 [[S_EXT]], 32
   // CHECK-NEXT:    [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    %matrix = call <80 x i32> @llvm.matrix.column.major.load.v80i32.p0i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20)
+  // CHECK-NEXT:    %matrix = call <80 x i32> @llvm.matrix.column.major.load.v80i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20)
 
   ix4x20_t m_c = __builtin_matrix_column_major_load(Ptr, 4, 20, S + 32);
 }
@@ -148,7 +152,7 @@ void column_major_load_with_stride_math_s_int(int *Ptr, short S) {
 void column_major_load_array1(double Ptr[25]) {
   // CHECK-LABEL: define void @column_major_load_array1(double* %Ptr)
   // CHECK:         [[ADDR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64.p0f64(double* align 8 [[ADDR]], i64 5, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[ADDR]], i64 5, i1 false, i32 5, i32 5)
 
   dx5x5_t m = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
 }
@@ -158,7 +162,7 @@ void column_major_load_array2() {
   // CHECK-NEXT:  entry:
   // CHECK-NEXT:    [[PTR:%.*]] = alloca [25 x double], align 16
   // CHECK:         [[ARRAY_DEC:%.*]] = getelementptr inbounds [25 x double], [25 x double]* [[PTR]], i64 0, i64 0
-  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64.p0f64(double* align 16 [[ARRAY_DEC]], i64 5, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 16 [[ARRAY_DEC]], i64 5, i1 false, i32 5, i32 5)
 
   double Ptr[25];
   dx5x5_t m = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
@@ -167,7 +171,7 @@ void column_major_load_array2() {
 void column_major_load_const(const double *Ptr) {
   // CHECK-LABEL: define void @column_major_load_const(double* %Ptr)
   // CHECK:         [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64.p0f64(double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
 
   dx5x5_t m_a1 = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
 }
@@ -175,7 +179,7 @@ void column_major_load_const(const double *Ptr) {
 void column_major_load_volatile(volatile double *Ptr) {
   // CHECK-LABEL: define void @column_major_load_volatile(double* %Ptr)
   // CHECK:         [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64.p0f64(double* align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5)
+  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5)
 
   dx5x5_t m_a1 = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
 }
@@ -184,7 +188,7 @@ void column_major_store_with_const_stride_double(double *Ptr) {
   // CHECK-LABEL: define void @column_major_store_with_const_stride_double(double* %Ptr)
   // CHECK:         [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
   // CHECK-NEXT:    [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64.p0f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
 
   dx5x5_t m;
   __builtin_matrix_column_major_store(m, Ptr, 5);
@@ -194,7 +198,7 @@ void column_major_store_with_const_stride2_double(double *Ptr) {
   // CHECK-LABEL: define void @column_major_store_with_const_stride2_double(double* %Ptr)
   // CHECK:         [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
   // CHECK-NEXT:    [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64.p0f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5)
   //
   dx5x5_t m;
   __builtin_matrix_column_major_store(m, Ptr, 2 * 3 + 9);
@@ -207,7 +211,7 @@ void column_major_store_with_stride_math_int(int *Ptr, int S) {
   // CHECK-NEXT:    [[S:%.*]] = load i32, i32* %S.addr, align 4
   // CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[S]], 32
   // CHECK-NEXT:    [[IDX:%.*]] = sext i32 [[ADD]] to i64
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v80i32.p0i32(<80 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v80i32(<80 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20)
 
   ix4x20_t m;
   __builtin_matrix_column_major_store(m, Ptr, S + 32);
@@ -221,7 +225,7 @@ void column_major_store_with_stride_math_s_int(int *Ptr, short S) {
   // CHECK-NEXT:    [[EXT:%.*]] = sext i16 [[S]] to i32
   // CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[EXT]], 2
   // CHECK-NEXT:    [[IDX:%.*]] = sext i32 [[ADD]] to i64
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v80i32.p0i32(<80 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v80i32(<80 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20)
 
   ix4x20_t m;
   __builtin_matrix_column_major_store(m, Ptr, S + 2);
@@ -231,7 +235,7 @@ void column_major_store_array1(double Ptr[25]) {
   // CHECK-LABEL: define void @column_major_store_array1(double* %Ptr)
   // CHECK:         [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
   // CHECK-NEXT:    [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64.p0f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
 
   dx5x5_t m;
   __builtin_matrix_column_major_store(m, Ptr, 5);
@@ -241,7 +245,7 @@ void column_major_store_array2() {
   // CHECK-LABEL: define void @column_major_store_array2()
   // CHECK:         [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
   // CHECK-NEXT:    [[PTR:%.*]] = getelementptr inbounds [25 x double], [25 x double]* %Ptr, i64 0, i64 0
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64.p0f64(<25 x double> [[M]], double* align 16 [[PTR]], i64 5, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 16 [[PTR]], i64 5, i1 false, i32 5, i32 5)
 
   double Ptr[25];
   dx5x5_t m;
@@ -252,7 +256,7 @@ void column_major_store_volatile(volatile double *Ptr) {
   // CHECK-LABEL: define void @column_major_store_volatile(double* %Ptr) #0 {
   // CHECK:         [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
   // CHECK-NEXT:    [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64.p0f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5)
 
   dx5x5_t m;
   __builtin_matrix_column_major_store(m, Ptr, 5);
diff --git a/clang/test/CodeGen/ppc-aggregate-abi.cpp b/clang/test/CodeGen/ppc-aggregate-abi.cpp
new file mode 100644
index 0000000000000..94afb6ab9e176
--- /dev/null
+++ b/clang/test/CodeGen/ppc-aggregate-abi.cpp
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -x c++ \
+// RUN:   -o - %s | FileCheck %s -check-prefix=CHECK-BE
+// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm -x c++ \
+// RUN:   -o - %s | FileCheck %s -check-prefix=CHECK-LE
+
+class agg_float_class { float a; };
+// CHECK-BE-LABEL: define void @_Z20pass_agg_float_class15agg_float_class(%class.agg_float_class* noalias sret align 4 %{{.*}}, float inreg %{{.*}})
+// CHECK-LE-LABEL: define [1 x float] @_Z20pass_agg_float_class15agg_float_class(float inreg %{{.*}})
+agg_float_class pass_agg_float_class(agg_float_class arg) { return arg; }
+
+class agg_double_class { double a; };
+// CHECK-BE-LABEL: define void @_Z21pass_agg_double_class16agg_double_class(%class.agg_double_class* noalias sret align 8 %{{.*}}, double inreg %{{.*}})
+// CHECK-LE-LABEL: define [1 x double] @_Z21pass_agg_double_class16agg_double_class(double inreg %{{.*}})
+agg_double_class pass_agg_double_class(agg_double_class arg) { return arg; }
+
+struct agg_float_cpp { float a; int : 0; };
+// CHECK-BE-LABEL: define void @_Z18pass_agg_float_cpp13agg_float_cpp(%struct.agg_float_cpp* noalias sret align 4 %{{.*}}, float inreg %{{.*}})
+// CHECK-LE-LABEL: define [1 x float] @_Z18pass_agg_float_cpp13agg_float_cpp(float inreg %{{.*}})
+agg_float_cpp pass_agg_float_cpp(agg_float_cpp arg) { return arg; }
+
+struct empty { };
+struct agg_nofloat_empty { float a; empty dummy; };
+// CHECK-BE-LABEL: define void @_Z22pass_agg_nofloat_empty17agg_nofloat_empty(%struct.agg_nofloat_empty* noalias sret align 4 %{{.*}}, i64 %{{.*}})
+// CHECK-LE-LABEL: define i64 @_Z22pass_agg_nofloat_empty17agg_nofloat_empty(i64 %{{.*}})
+agg_nofloat_empty pass_agg_nofloat_empty(agg_nofloat_empty arg) { return arg; }
+
+struct agg_float_empty { float a; [[no_unique_address]] empty dummy; };
+// CHECK-BE-LABEL: define void @_Z20pass_agg_float_empty15agg_float_empty(%struct.agg_float_empty* noalias sret align 4 %{{.*}}, float inreg %{{.*}})
+// CHECK-LE-LABEL: define [1 x float] @_Z20pass_agg_float_empty15agg_float_empty(float inreg %{{.*}})
+agg_float_empty pass_agg_float_empty(agg_float_empty arg) { return arg; }
+
+struct agg_nofloat_emptyarray { float a; [[no_unique_address]] empty dummy[3]; };
+// CHECK-BE-LABEL: define void @_Z27pass_agg_nofloat_emptyarray22agg_nofloat_emptyarray(%struct.agg_nofloat_emptyarray* noalias sret align 4 %{{.*}}, i64 %{{.*}})
+// CHECK-LE-LABEL: define i64 @_Z27pass_agg_nofloat_emptyarray22agg_nofloat_emptyarray(i64 %{{.*}})
+agg_nofloat_emptyarray pass_agg_nofloat_emptyarray(agg_nofloat_emptyarray arg) { return arg; }
+
+struct noemptybase { empty dummy; };
+struct agg_nofloat_emptybase : noemptybase { float a; };
+// CHECK-BE-LABEL: define void @_Z26pass_agg_nofloat_emptybase21agg_nofloat_emptybase(%struct.agg_nofloat_emptybase* noalias sret align 4 %{{.*}}, i64 %{{.*}})
+// CHECK-LE-LABEL: define i64 @_Z26pass_agg_nofloat_emptybase21agg_nofloat_emptybase(i64 %{{.*}})
+agg_nofloat_emptybase pass_agg_nofloat_emptybase(agg_nofloat_emptybase arg) { return arg; }
+
+struct emptybase { [[no_unique_address]] empty dummy; };
+struct agg_float_emptybase : emptybase { float a; };
+// CHECK-BE-LABEL: define void @_Z24pass_agg_float_emptybase19agg_float_emptybase(%struct.agg_float_emptybase* noalias sret align 4 %{{.*}}, float inreg %{{.*}})
+// CHECK-LE-LABEL: define [1 x float] @_Z24pass_agg_float_emptybase19agg_float_emptybase(float inreg %{{.*}})
+agg_float_emptybase pass_agg_float_emptybase(agg_float_emptybase arg) { return arg; }
+
+struct noemptybasearray { [[no_unique_address]] empty dummy[3]; };
+struct agg_nofloat_emptybasearray : noemptybasearray { float a; };
+// CHECK-BE-LABEL: define void @_Z31pass_agg_nofloat_emptybasearray26agg_nofloat_emptybasearray(%struct.agg_nofloat_emptybasearray* noalias sret align 4 %{{.*}}, i64 %{{.*}})
+// CHECK-LE-LABEL: define i64 @_Z31pass_agg_nofloat_emptybasearray26agg_nofloat_emptybasearray(i64 %{{.*}})
+agg_nofloat_emptybasearray pass_agg_nofloat_emptybasearray(agg_nofloat_emptybasearray arg) { return arg; }
+
+// CHECK-BE: call void @_Z24pass_agg_float_emptybase19agg_float_emptybase(%struct.agg_float_emptybase* sret align 4 %{{.*}}, float inreg %{{.*}})
+// CHECK-LE: call [1 x float] @_Z24pass_agg_float_emptybase19agg_float_emptybase(float inreg %{{.*}})
+void pass_agg_float_emptybase_ptr(agg_float_emptybase* arg) { pass_agg_float_emptybase(*arg); }
+// CHECK-BE: call void @_Z26pass_agg_nofloat_emptybase21agg_nofloat_emptybase(%struct.agg_nofloat_emptybase* sret align 4 %{{.*}}, i64 %{{.*}})
+// CHECK-LE: call i64 @_Z26pass_agg_nofloat_emptybase21agg_nofloat_emptybase(i64 %{{.*}})
+void pass_agg_nofloat_emptybase_ptr(agg_nofloat_emptybase* arg) { pass_agg_nofloat_emptybase(*arg); }
diff --git a/clang/test/CodeGen/windows-seh-filter-inFinally.c b/clang/test/CodeGen/windows-seh-filter-inFinally.c
new file mode 100644
index 0000000000000..f9dfca14f0209
--- /dev/null
+++ b/clang/test/CodeGen/windows-seh-filter-inFinally.c
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple x86_64-windows -fms-extensions -Wno-implicit-function-declaration -S -emit-llvm %s -o - | FileCheck %s
+
+// CHECK: %[[dst:[0-9-]+]] = call i8* @llvm.eh.recoverfp(i8* bitcast (void (i8, i8*)* @"?fin$0@0@main@@" to i8*), i8* %frame_pointer)
+// CHECK-NEXT: %[[dst1:[0-9-]+]] = call i8* @llvm.localrecover(i8* bitcast (void (i8, i8*)* @"?fin$0@0@main@@" to i8*), i8* %[[dst]], i32 0)
+// CHECK-NEXT: %[[dst2:[0-9-]+]] = bitcast i8* %[[dst1]] to i8**
+// CHECK-NEXT: = load i8*, i8** %[[dst2]], align 8
+
+int
+main(int argc, char *argv[])
+{
+    int Counter = 0;
+    //
+    // Try/except within the finally clause of a try/finally.
+    //
+    __try {
+      Counter -= 1;
+    }
+    __finally {
+      __try {
+        Counter += 2;
+        // RtlRaiseStatus(STATUS_INTEGER_OVERFLOW);
+      } __except(Counter) {
+        __try {
+          Counter += 3;
+        }
+        __finally {
+          if (abnormal_termination() == 1) {
+            Counter += 5;
+          }
+        }
+      }
+    }
+    // expect Counter == 9
+    return 1;
+}
+
diff --git a/clang/test/CodeGenCXX/aix-sinit-register-global-dtors-with-atexit.cpp b/clang/test/CodeGenCXX/aix-sinit-register-global-dtors-with-atexit.cpp
new file mode 100644
index 0000000000000..4cec83d461ade
--- /dev/null
+++ b/clang/test/CodeGenCXX/aix-sinit-register-global-dtors-with-atexit.cpp
@@ -0,0 +1,14 @@
+// RUN: not %clang_cc1 -triple powerpc-ibm-aix-xcoff -S -emit-llvm -x c++ \
+// RUN:     -fregister-global-dtors-with-atexit < %s 2>&1 | \
+// RUN:   FileCheck %s
+
+// RUN: not %clang_cc1 -triple powerpc64-ibm-aix-xcoff -S -emit-llvm -x c++ \
+// RUN:     -fregister-global-dtors-with-atexit < %s 2>&1 | \
+// RUN:   FileCheck %s
+
+struct T {
+  T();
+  ~T();
+} t;
+
+// CHECK: error in backend: register global dtors with atexit() is not supported yet
diff --git a/clang/test/CodeGenCXX/aix-static-init-debug-info.cpp b/clang/test/CodeGenCXX/aix-static-init-debug-info.cpp
new file mode 100644
index 0000000000000..39de0cdd513ae
--- /dev/null
+++ b/clang/test/CodeGenCXX/aix-static-init-debug-info.cpp
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -emit-llvm -x c++ \
+// RUN:     -debug-info-kind=limited < %s | \
+// RUN:   FileCheck --check-prefixes=CHECK,CHECK64 %s
+
+// RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -emit-llvm -x c++ \
+// RUN:     -debug-info-kind=limited  < %s | \
+// RUN:   FileCheck --check-prefixes=CHECK,CHECK64 %s
+
+struct X {
+  X();
+  ~X();
+};
+
+X v;
+
+// CHECK: define internal void @__cxx_global_var_init() [[ATTR:#[0-9]+]] !dbg ![[DBGVAR16:[0-9]+]] {
+// CHECK: entry:
+// CHECK:   call void @_ZN1XC1Ev(%struct.X* @v), !dbg ![[DBGVAR19:[0-9]+]]
+// CHECK:   %0 = call i32 @atexit(void ()* @__dtor_v) [[ATTR:#[0-9]+]], !dbg ![[DBGVAR19]]
+// CHECK:   ret void, !dbg ![[DBGVAR19]]
+// CHECK: }
+
+// CHECK: define internal void @__dtor_v() [[ATTR:#[0-9]+]] !dbg ![[DBGVAR20:[0-9]+]] {
+// CHECK: entry:
+// CHECK:   call void @_ZN1XD1Ev(%struct.X* @v), !dbg ![[DBGVAR21:[0-9]+]]
+// CHECK:   ret void, !dbg ![[DBGVAR21]]
+// CHECK: }
+
+// CHECK: define internal void @__finalize_v() [[ATTR:#[0-9]+]] !dbg ![[DBGVAR22:[0-9]+]] {
+// CHECK: entry:
+// CHECK:   %0 = call i32 @unatexit(void ()* @__dtor_v) [[ATTR:#[0-9]+]], !dbg ![[DBGVAR24:[0-9]+]]
+// CHECK:   %needs_destruct = icmp eq i32 %0, 0, !dbg ![[DBGVAR24]]
+// CHECK:   br i1 %needs_destruct, label %destruct.call, label %destruct.end, !dbg ![[DBGVAR24]]
+
+// CHECK: destruct.call:
+// CHECK:   call void @__dtor_v(), !dbg ![[DBGVAR24]]
+// CHECK:   br label %destruct.end, !dbg ![[DBGVAR24]]
+
+// CHECK: destruct.end:
+// CHECK:   ret void, !dbg ![[DBGVAR24]]
+// CHECK: }
+
+// CHECK: define void @__sinit80000000_clang_c3236cbaa79f2bae3a15e6379a05f625() [[ATTR:#[0-9]+]] !dbg ![[DBGVAR25:[0-9]+]] {
+// CHECK: entry:
+// CHECK:   call void @__cxx_global_var_init(), !dbg ![[DBGVAR26:[0-9]+]]
+// CHECK:   ret void
+// CHECK: }
+
+// CHECK: define void @__sterm80000000_clang_c3236cbaa79f2bae3a15e6379a05f625() [[ATTR:#[0-9]+]] !dbg ![[DBGVAR27:[0-9]+]] {
+// CHECK: entry:
+// CHECK:   call void @__finalize_v(), !dbg ![[DBGVAR28:[0-9]+]]
+// CHECK:   ret void
+// CHECK: }
+
+// CHECK: ![[DBGVAR16]] = distinct !DISubprogram(name: "__cxx_global_var_init", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: 14, type: !{{[0-9]+}}, scopeLine: 14, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !{{[0-9]+}}, retainedNodes: !{{[0-9]+}})
+// CHECK: ![[DBGVAR19]] = !DILocation(line: 14, column: 3, scope: ![[DBGVAR16]])
+// CHECK: ![[DBGVAR20]] = distinct !DISubprogram(name: "__dtor_v", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: 14, type: !{{[0-9]+}}, scopeLine: 14, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !{{[0-9]+}}, retainedNodes: !{{[0-9]+}})
+// CHECK: ![[DBGVAR21]] = !DILocation(line: 14, column: 3, scope: ![[DBGVAR20]])
+// CHECK: ![[DBGVAR22]] = distinct !DISubprogram(linkageName: "__finalize_v", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: 14, type: !{{[0-9]+}}, scopeLine: 14, flags: DIFlagArtificial, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !{{[0-9]+}}, retainedNodes: !{{[0-9]+}})
+// CHECK: ![[DBGVAR24]] = !DILocation(line: 14, column: 3, scope: ![[DBGVAR22]])
+// CHECK: ![[DBGVAR25]] = distinct !DISubprogram(linkageName: "__sinit80000000_clang_c3236cbaa79f2bae3a15e6379a05f625", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, type: !{{[0-9]+}}, flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: !{{[0-9]+}}, retainedNodes: !{{[0-9]+}})
+// CHECK: ![[DBGVAR26]] = !DILocation(line: 0, scope: ![[DBGVAR25]])
+// CHECK: ![[DBGVAR27]] = distinct !DISubprogram(linkageName: "__sterm80000000_clang_c3236cbaa79f2bae3a15e6379a05f625", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, type: !{{[0-9]+}}, flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: !{{[0-9]+}}, retainedNodes: !{{[0-9]+}})
+// CHECK: ![[DBGVAR28]] = !DILocation(line: 0, scope: ![[DBGVAR27]])
diff --git a/clang/test/CodeGenCXX/debug-info-class.cpp b/clang/test/CodeGenCXX/debug-info-class.cpp
index b3e79c37923dd..94d5a0f1f0820 100644
--- a/clang/test/CodeGenCXX/debug-info-class.cpp
+++ b/clang/test/CodeGenCXX/debug-info-class.cpp
@@ -13,7 +13,7 @@ class B {
   virtual ~B();
 };
 
-B::~B() {
+B::~B() { extern void mayThrow(); mayThrow();
 }
 
 struct C {
diff --git a/clang/test/CodeGenCXX/matrix-type-builtins.cpp b/clang/test/CodeGenCXX/matrix-type-builtins.cpp
index 314168701793b..dd341d2cf62cd 100644
--- a/clang/test/CodeGenCXX/matrix-type-builtins.cpp
+++ b/clang/test/CodeGenCXX/matrix-type-builtins.cpp
@@ -94,7 +94,7 @@ void test_column_major_load_with_stride_template_double(double *Ptr) {
 
   // CHECK-LABEL:  define linkonce_odr <40 x double> @_Z29column_major_load_with_strideIdLj10ELj4ELj15EEU11matrix_typeXT0_EXT1_ET_PS0_(double* %Ptr)
   // CHECK:         [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <40 x double> @llvm.matrix.column.major.load.v40f64.p0f64(double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4)
+  // CHECK-NEXT:    call <40 x double> @llvm.matrix.column.major.load.v40f64(double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4)
 
   matrix_t M1 = column_major_load_with_stride(Ptr);
 }
@@ -106,7 +106,7 @@ void test_column_major_load_with_stride_template_int(int *Ptr) {
 
   // CHECK-LABEL: define linkonce_odr <6 x i32> @_Z29column_major_load_with_strideIiLj3ELj2ELj12EEU11matrix_typeXT0_EXT1_ET_PS0_(i32* %Ptr)
   // CHECK:         [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <6 x i32> @llvm.matrix.column.major.load.v6i32.p0i32(i32* align 4 [[PTR]], i64 12, i1 false, i32 3, i32 2)
+  // CHECK-NEXT:    call <6 x i32> @llvm.matrix.column.major.load.v6i32(i32* align 4 [[PTR]], i64 12, i1 false, i32 3, i32 2)
 
   matrix_t M1 = column_major_load_with_stride(Ptr);
 }
@@ -124,7 +124,7 @@ void test_column_major_load_stride_wrapper(int *Ptr, UnsignedWrapper &W) {
   // CHECK-NEXT:    [[STRIDE:%.*]] = call i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* [[W]])
   // CHECK-NEXT:    [[STRIDE_EXT:%.*]] = zext i32 [[STRIDE]] to i64
   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <4 x i32> @llvm.matrix.column.major.load.v4i32.p0i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2)
+  // CHECK-NEXT:    call <4 x i32> @llvm.matrix.column.major.load.v4i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2)
   matrix_t M1 = __builtin_matrix_column_major_load(Ptr, 2, 2, W);
 }
 
@@ -133,7 +133,7 @@ constexpr int constexpr3() { return 3; }
 void test_column_major_load_constexpr_num_rows(int *Ptr) {
   // CHECK-LABEL: define void @_Z41test_column_major_load_constexpr_num_rowsPi(i32* %Ptr)
   // CHECK:         [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <6 x i32> @llvm.matrix.column.major.load.v6i32.p0i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2)
+  // CHECK-NEXT:    call <6 x i32> @llvm.matrix.column.major.load.v6i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2)
 
   matrix_t M1 = __builtin_matrix_column_major_load(Ptr, constexpr3(), 2, 3);
 }
@@ -143,7 +143,7 @@ constexpr int constexpr1() { return 1; }
 void test_column_major_load_constexpr_num_columns(int *Ptr) {
   // CHECK-LABEL: define void @_Z44test_column_major_load_constexpr_num_columnsPi(i32* %Ptr)
   // CHECK:         [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <2 x i32> @llvm.matrix.column.major.load.v2i32.p0i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 1)
+  // CHECK-NEXT:    call <2 x i32> @llvm.matrix.column.major.load.v2i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 1)
   matrix_t M1 = __builtin_matrix_column_major_load(Ptr, 2, constexpr1(), 3);
 }
 
@@ -153,7 +153,7 @@ constexpr int constexpr_plus1() { return N + 1; }
 void test_column_major_load_constexpr_num_columns_temp(int *Ptr) {
   // CHECK-LABEL:  define void @_Z49test_column_major_load_constexpr_num_columns_tempPi(i32* %Ptr)
   // CHECK:         [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <10 x i32> @llvm.matrix.column.major.load.v10i32.p0i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 5)
+  // CHECK-NEXT:    call <10 x i32> @llvm.matrix.column.major.load.v10i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 5)
   matrix_t M1 = __builtin_matrix_column_major_load(Ptr, 2, constexpr_plus1<4>(), 3);
 }
 
@@ -162,7 +162,7 @@ void test_column_major_load_constexpr_stride_constexpr(int *Ptr) {
   // CHECK:         [[STRIDE:%.*]] = call i32 @_Z10constexpr3v()
   // CHECK-NEXT:    [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <4 x i32> @llvm.matrix.column.major.load.v4i32.p0i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2)
+  // CHECK-NEXT:    call <4 x i32> @llvm.matrix.column.major.load.v4i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2)
 
   matrix_t M1 = __builtin_matrix_column_major_load(Ptr, 2, 2, constexpr3());
 }
@@ -200,7 +200,7 @@ void test_column_major_store_with_stride_template_double(double *Ptr) {
   // CHECK-LABEL:  define linkonce_odr void @_Z30column_major_store_with_strideIdLj10ELj4ELj15EEvRU11matrix_typeXT0_EXT1_ET_PS0_([40 x double]* nonnull align 8 dereferenceable(320) %m, double* %Ptr)
   // CHECK:         [[M:%.*]] = load <40 x double>, <40 x double>* {{.*}}, align 8
   // CHECK-NEXT:    [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v40f64.p0f64(<40 x double> [[M]], double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v40f64(<40 x double> [[M]], double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4)
 
   matrix_t M1;
   column_major_store_with_stride(M1, Ptr);
@@ -214,7 +214,7 @@ void test_column_major_store_with_stride_template_int(int *Ptr) {
   // CHECK-LABEL:  define linkonce_odr void @_Z30column_major_store_with_strideIiLj3ELj2ELj3EEvRU11matrix_typeXT0_EXT1_ET_PS0_([6 x i32]* nonnull align 4 dereferenceable(24) %m, i32* %Ptr)
   // CHECK:         [[M:%.*]] = load <6 x i32>, <6 x i32>* {{.*}}, align 4
   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v6i32.p0i32(<6 x i32> [[M]], i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v6i32(<6 x i32> [[M]], i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2)
 
   matrix_t M1;
   column_major_store_with_stride(M1, Ptr);
@@ -227,7 +227,7 @@ void test_column_major_store_stride_wrapper(int *Ptr, UnsignedWrapper &W) {
   // CHECK-NEXT:    [[W:%.*]] = load %struct.UnsignedWrapper*, %struct.UnsignedWrapper** %W.addr, align 8
   // CHECK-NEXT:    [[IDX:%.*]] = call i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* [[W]])
   // CHECK-NEXT:    [[IDX_EXT:%.*]] = zext i32 [[IDX]] to i64
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v4i32.p0i32(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v4i32(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2)
 
   matrix_t M1;
   __builtin_matrix_column_major_store(M1, Ptr, W);
@@ -239,7 +239,7 @@ void test_column_major_store_constexpr_stride_constexpr(int *Ptr) {
   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
   // CHECK-NEXT:    [[IDX:%.*]] = call i32 @_Z10constexpr3v()
   // CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[IDX]] to i64
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v4i32.p0i32(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v4i32(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2)
 
   matrix_t M;
   __builtin_matrix_column_major_store(M, Ptr, constexpr3());
diff --git a/clang/test/CodeGenObjC/exceptions.m b/clang/test/CodeGenObjC/exceptions.m
index 3bb4f86cf0256..55a117bcc3dd5 100644
--- a/clang/test/CodeGenObjC/exceptions.m
+++ b/clang/test/CodeGenObjC/exceptions.m
@@ -25,12 +25,12 @@ void f1() {
     // CHECK-NEXT: icmp
     // CHECK-NEXT: br i1
     @try {
+    // CHECK:      call void asm sideeffect "", "=*m"
     // CHECK:      call void asm sideeffect "", "*m"
     // CHECK-NEXT: call void @foo()
       foo();
     // CHECK:      call void @objc_exception_try_exit
 
-    // CHECK:      call void asm sideeffect "", "=*m"
     } @finally {
       break;
     }
@@ -53,14 +53,6 @@ int f2() {
   // CHECK-NEXT:   [[CAUGHT:%.*]] = icmp eq i32 [[SETJMP]], 0
   // CHECK-NEXT:   br i1 [[CAUGHT]]
   @try {
-    // CHECK: store i32 6, i32* [[X]]
-    x++;
-    // CHECK-NEXT: call void asm sideeffect "", "*m,*m"(i32* nonnull [[X]]
-    // CHECK-NEXT: call void @foo()
-    // CHECK-NEXT: call void @objc_exception_try_exit
-    // CHECK-NEXT: [[T:%.*]] = load i32, i32* [[X]]
-    foo();
-  } @catch (id) {
     // Landing pad.  Note that we elide the re-enter.
     // CHECK:      call void asm sideeffect "", "=*m,=*m"(i32* nonnull [[X]]
     // CHECK-NEXT: call i8* @objc_exception_extract
@@ -69,6 +61,15 @@ int f2() {
 
     // This store is dead.
     // CHECK-NEXT: store i32 [[T2]], i32* [[X]]
+
+    // CHECK: store i32 6, i32* [[X]]
+    x++;
+    // CHECK-NEXT: call void asm sideeffect "", "*m,*m"(i32* nonnull [[X]]
+    // CHECK-NEXT: call void @foo()
+    // CHECK-NEXT: call void @objc_exception_try_exit
+    // CHECK-NEXT: [[T:%.*]] = load i32, i32* [[X]]
+    foo();
+  } @catch (id) {
     x--;
   }
 
@@ -89,23 +90,23 @@ void f3() {
 
   // CHECK:      call void @objc_exception_try_enter(
   // CHECK:      call i32 @_setjmp
-  // CHECK-NEXT: icmp eq
-  // CHECK-NEXT: br i1
+  // CHECK-NEXT: [[DEST1:%.*]] = icmp eq
+  // CHECK-NEXT: br i1 [[DEST1]]
 
   @try {
     // CHECK:    call void @f3_helper(i32 0, i32* nonnull [[X]])
     // CHECK:    call void @objc_exception_try_exit(
     f3_helper(0, &x);
   } @finally {
-    // CHECK:    [[DEST1:%.*]] = phi i1 [ true, {{%.*}} ], [ false, {{%.*}} ]
     // CHECK:    call void @objc_exception_try_enter
     // CHECK:    call i32 @_setjmp
+    // CHECK-NEXT: [[DEST2:%.*]] = icmp eq
+    // CHECK-NEXT: br i1 [[DEST2]]
     @try {
       // CHECK:  call void @f3_helper(i32 1, i32* nonnull [[X]])
       // CHECK:  call void @objc_exception_try_exit(
       f3_helper(1, &x);
     } @finally {
-      // CHECK:  [[DEST2:%.*]] = phi i1 [ true, {{%.*}} ], [ false, {{%.*}} ]
       // CHECK:  call void @f3_helper(i32 2, i32* nonnull [[X]])
       f3_helper(2, &x);
 
diff --git a/clang/test/CodeGenObjC/for-in.m b/clang/test/CodeGenObjC/for-in.m
index 26fe7922aee9f..20e89b33affaf 100644
--- a/clang/test/CodeGenObjC/for-in.m
+++ b/clang/test/CodeGenObjC/for-in.m
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -emit-llvm %s -o %t
+// RUN: %clang_cc1 %s -verify -o /dev/null
+// RUN: %clang_cc1 %s -triple x86_64-apple-darwin -emit-llvm -fsanitize=objc-cast -o - | FileCheck %s
 
 void p(const char*, ...);
 
@@ -18,12 +19,26 @@ -(const char*) cString;
 #define L5(n) L4(n+0),L4(n+16)
 #define L6(n) L5(n+0),L5(n+32)
 
+// CHECK-LABEL: define void @t0
 void t0() {
   NSArray *array = [NSArray arrayWithObjects: L1(0), (void*)0];
 
   p("array.length: %d\n", [array count]);
   unsigned index = 0;
   for (NSString *i in array) {	// expected-warning {{collection expression type 'NSArray *' may not respond}}
+
+    // CHECK:      [[expectedCls:%.*]] = load %struct._class_t*, {{.*}}, !nosanitize
+    // CHECK-NEXT: [[kindOfClassSel:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES{{.*}}, !nosanitize
+    // CHECK-NEXT: [[expectedClsI8:%.*]] = bitcast %struct._class_t* [[expectedCls]] to i8*, !nosanitize
+    // CHECK-NEXT: [[isCls:%.*]] = call zeroext i1 bitcast {{.*}}@objc_msgSend to i1 (i8*, i8*, {{.*}})(i8* [[theItem:%.*]], i8* [[kindOfClassSel]], i8* [[expectedClsI8]]), !nosanitize
+    // CHECK: br i1 [[isCls]]
+
+    // CHECK: ptrtoint i8* [[theItem]] to i64, !nosanitize
+    // CHECK-NEXT: call void @__ubsan_handle_invalid_objc_cast
+    // CHECK-NEXT: unreachable, !nosanitize
+
+    // CHECK: bitcast i8* [[theItem]]
+
     p("element %d: %s\n", index++, [i cString]);
   }
 }
diff --git a/clang/test/CodeGenObjC/matrix-type-builtins.m b/clang/test/CodeGenObjC/matrix-type-builtins.m
index 78dfad262b91a..19bd957bb6821 100644
--- a/clang/test/CodeGenObjC/matrix-type-builtins.m
+++ b/clang/test/CodeGenObjC/matrix-type-builtins.m
@@ -56,7 +56,7 @@ void test_column_major_load(PtrValue *Ptr, IntValue *Stride) {
   // CHECK:         [[STRIDE:%.*]] = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)
   // CHECK-NEXT:    [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
   // CHECK:         [[PTR:%.*]] = call i32* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32* (i8*, i8*)*)
-  // CHECK-NEXT:    call <12 x i32> @llvm.matrix.column.major.load.v12i32.p0i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 3, i32 4)
+  // CHECK-NEXT:    call <12 x i32> @llvm.matrix.column.major.load.v12i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 3, i32 4)
 
   u3x4 m = __builtin_matrix_column_major_load(Ptr.value, 3, 4, Stride.value);
 }
@@ -67,7 +67,7 @@ void test_column_major_store(UnsignedMatrixValue *M, PtrValue *Ptr, IntValue *St
   // CHECK:         [[PTR:%.*]] = call i32* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32* (i8*, i8*)*)
   // CHECK:         [[IDX:%.*]] = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)
   // CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[IDX]] to i64
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v12i32.p0i32(<12 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 3, i32 4)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v12i32(<12 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 3, i32 4)
 
   __builtin_matrix_column_major_store(M.value, Ptr.value, Stride.value);
 }
diff --git a/clang/test/CodeGenObjCXX/arc-list-init-destruct.mm b/clang/test/CodeGenObjCXX/arc-list-init-destruct.mm
index 09a66458c2600..513af64d5203c 100644
--- a/clang/test/CodeGenObjCXX/arc-list-init-destruct.mm
+++ b/clang/test/CodeGenObjCXX/arc-list-init-destruct.mm
@@ -16,6 +16,8 @@ @interface Class0;
 };
 
 bool getBool() {
+  extern void mayThrow();
+  mayThrow();
   return false;
 }
 
diff --git a/clang/test/CodeGenObjCXX/exceptions-legacy.mm b/clang/test/CodeGenObjCXX/exceptions-legacy.mm
index bfc8d640b7104..563569478679b 100644
--- a/clang/test/CodeGenObjCXX/exceptions-legacy.mm
+++ b/clang/test/CodeGenObjCXX/exceptions-legacy.mm
@@ -63,18 +63,20 @@ void test1(id obj, bool *failed) {
 //   Body.
 // CHECK:      invoke void @_Z3foov()
 
+//   Catch handler.  Reload of 'failed' address is unnecessary.
+// CHECK:      [[T0:%.*]] = load i8*, i8**
+// CHECK-NEXT: store i8 1, i8* [[T0]],
+// CHECK-NEXT: br label
+
 //   Leave the @try.
 // CHECK:      call void @objc_exception_try_exit([[BUF_T]]* nonnull [[BUF]])
 // CHECK-NEXT: br label
 // CHECK:      ret void
 
+
 //   Real EH cleanup.
 // CHECK:      [[T0:%.*]] = landingpad
 // CHECK-NEXT:    cleanup
 // CHECK-NEXT: call void @objc_exception_try_exit([[BUF_T]]* nonnull [[BUF]])
 // CHECK-NEXT: resume
 
-//   Catch handler.  Reload of 'failed' address is unnecessary.
-// CHECK:      [[T0:%.*]] = load i8*, i8**
-// CHECK-NEXT: store i8 1, i8* [[T0]],
-// CHECK-NEXT: br label
diff --git a/clang/test/CodeGenObjCXX/os_log.mm b/clang/test/CodeGenObjCXX/os_log.mm
index b6e0bc25ca807..c9efe329b587b 100644
--- a/clang/test/CodeGenObjCXX/os_log.mm
+++ b/clang/test/CodeGenObjCXX/os_log.mm
@@ -6,13 +6,14 @@
   void release(int *lock);
 
   // CHECK-LABEL: define {{.*}} @_ZN13no_eh_cleanup3logERiPcS1_(
+  // CHECK: call void @__os_log_helper_1_2_2_4_0_8_34(
+
   void log(int &i, char *data, char *buf) {
       int lock __attribute__((cleanup(release)));
       __builtin_os_log_format(buf, "%d %{public}s", i, data);
   }
 
-  // An `invoke` of a `nounwind` callee is simplified to a direct
-  // call by an optimization in llvm. Just check that we emit `nounwind`.
+  // Check that the os_log_helper is marked `nounwind`.
   // CHECK: define {{.*}} @__os_log_helper_1_2_2_4_0_8_34({{.*}} [[NUW:#[0-9]+]]
 }
 
diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl
index ad47607beb173..9471f93a42dc2 100644
--- a/clang/test/CodeGenOpenCL/convergent.cl
+++ b/clang/test/CodeGenOpenCL/convergent.cl
@@ -70,7 +70,8 @@ void test_merge_if(int a) {
 // CHECK-NOT: call spir_func void @g()
 // CHECK: br label %[[if_end]]
 // CHECK: [[if_end]]:
-// CHECK:  %[[tobool_not_pr:.+]] = phi i1 [ true, %{{.+}} ], [ false, %[[if_then]] ]
+// FIXME: SimplifyCFG is being stupid inserting this Phi. It is not supposed to be here.
+// CHECK:  %[[tobool_not_pr:.+]] = phi i1
 // CHECK:  tail call spir_func void @convfun() #[[attr4:.+]]
 // CHECK:  br i1 %[[tobool_not_pr]], label %[[if_end3:.+]], label %[[if_then2:.+]]
 // CHECK: [[if_then2]]:
diff --git a/clang/test/CodeGenSYCL/noexcept.cpp b/clang/test/CodeGenSYCL/noexcept.cpp
index c1299229ef92f..816f2c43ebfe2 100644
--- a/clang/test/CodeGenSYCL/noexcept.cpp
+++ b/clang/test/CodeGenSYCL/noexcept.cpp
@@ -18,15 +18,15 @@ void f3() {}
 
 void foo_noexcept() noexcept {
   // CHECK-DEVICE: call spir_func void @_Z2f1v()
-  // CHECK-HOST-LIN: invoke void @_Z2f1v()
-  // CHECK-HOST-WIN: invoke void @"?f1@@YAXXZ"()
+  // CHECK-HOST-LIN: call void @_Z2f1v()
+  // CHECK-HOST-WIN: call void @"?f1@@YAXXZ"()
   f1();
 }
 
 void foo_throw() throw() {
   // CHECK-DEVICE: call spir_func void @_Z2f2v()
-  // CHECK-HOST-LIN: invoke void @_Z2f2v()
-  // CHECK-HOST-WIN: invoke void @"?f3@@YAXXZ"()
+  // CHECK-HOST-LIN: call void @_Z2f2v()
+  // CHECK-HOST-WIN: call void @"?f3@@YAXXZ"()
   f2();
 }
 
@@ -38,14 +38,13 @@ struct A {
 void foo_cleanup() {
   A a;
   // CHECK-DEVICE: call spir_func void @_Z2f3v()
-  // CHECK-HOST: invoke void @_Z2f3v()
+  // CHECK-HOST: call void @_Z2f3v()
   f3();
   // CHECK-DEVICE: call spir_func void @_ZN1AD1Ev
   // Regular + exception cleanup
   // CHECK-HOST-LIN: call void @_ZN1AD1Ev
-  // CHECK-HOST-LIN: call void @_ZN1AD1Ev
+  // CHECK-HOST-LIN: call void @_ZN1AD2Ev
   // CHECK-HOST-WIN: call void @"??1A@@QEAA@XZ"(%struct.A* %a)
-  // CHECK-HOST-WIN: call void @"??1A@@QEAA@XZ"(%struct.A* %a) #4 [ "funclet"(token %0) ]
 }
 
 template 
diff --git a/clang/test/Driver/B-opt.c b/clang/test/Driver/B-opt.c
index 5e5ff42fd0956..df85dee4b7040 100644
--- a/clang/test/Driver/B-opt.c
+++ b/clang/test/Driver/B-opt.c
@@ -1,9 +1,10 @@
 // Check -B driver option.
-//
+
+/// Target triple prefix is not detected for -B.
 // RUN: %clang %s -### -o %t.o -target i386-unknown-linux \
 // RUN:     -B %S/Inputs/B_opt_tree/dir1 -fuse-ld=ld 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-B-OPT-TRIPLE %s
-// CHECK-B-OPT-TRIPLE: "{{.*}}/Inputs/B_opt_tree/dir1{{/|\\\\}}i386-unknown-linux-ld"
+// CHECK-B-OPT-TRIPLE-NOT: "{{.*}}/Inputs/B_opt_tree/dir1{{/|\\\\}}i386-unknown-linux-ld"
 //
 // RUN: %clang %s -### -o %t.o -target i386-unknown-linux \
 // RUN:     -B %S/Inputs/B_opt_tree/dir2 -fuse-ld=ld 2>&1 \
diff --git a/clang/test/Driver/Inputs/Windows/ARM/8.1/usr/bin/armv7-windows-itanium-ld b/clang/test/Driver/Inputs/Windows/ARM/8.1/usr/bin/ld
similarity index 100%
rename from clang/test/Driver/Inputs/Windows/ARM/8.1/usr/bin/armv7-windows-itanium-ld
rename to clang/test/Driver/Inputs/Windows/ARM/8.1/usr/bin/ld
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtbegin.o
similarity index 100%
rename from clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtbegin.o
rename to clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtbegin.o
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtbegin_no_eh.o
similarity index 100%
rename from clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtend.o
rename to clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtbegin_no_eh.o
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/430/crtn.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtend.o
similarity index 100%
rename from clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/430/crtn.o
rename to clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtend.o
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtbegin.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtbegin_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtbegin_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtend.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtbegin.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtbegin_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtbegin_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtend.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtbegin.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtbegin_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtbegin_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtend.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtbegin.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtbegin_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtbegin_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtend.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtbegin.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtbegin_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtbegin_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtend.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtbegin.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtbegin_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtbegin_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtend.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtbegin.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtbegin_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtbegin_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtend.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/include/stdio.h b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/include/stdio.h
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/430/exceptions/crt0.o b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/430/exceptions/crt0.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/crt0.o b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/crt0.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/exceptions/crt0.o b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/exceptions/crt0.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/crt0.o b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/crt0.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/exceptions/crt0.o b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/exceptions/crt0.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/full-memory-range/crt0.o b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/full-memory-range/crt0.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/full-memory-range/exceptions/crt0.o b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/full-memory-range/exceptions/crt0.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/aarch64-sve-vector-bits.c b/clang/test/Driver/aarch64-sve-vector-bits.c
new file mode 100644
index 0000000000000..c3d0d05bb9b6d
--- /dev/null
+++ b/clang/test/Driver/aarch64-sve-vector-bits.c
@@ -0,0 +1,63 @@
+// -----------------------------------------------------------------------------
+// Tests for the -msve-vector-bits flag
+// -----------------------------------------------------------------------------
+
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=128 2>&1 | FileCheck --check-prefix=CHECK-128 %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=256 2>&1 | FileCheck --check-prefix=CHECK-256 %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=512 2>&1 | FileCheck --check-prefix=CHECK-512 %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=1024 2>&1 | FileCheck --check-prefix=CHECK-1024 %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=2048 2>&1 | FileCheck --check-prefix=CHECK-2048 %s
+
+// CHECK-128: "-msve-vector-bits=128"
+// CHECK-256: "-msve-vector-bits=256"
+// CHECK-512: "-msve-vector-bits=512"
+// CHECK-1024: "-msve-vector-bits=1024"
+// CHECK-2048: "-msve-vector-bits=2048"
+
+// Bail out if -msve-vector-bits is specified without SVE enabled
+// -----------------------------------------------------------------------------
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -msve-vector-bits=128 \
+// RUN:  2>&1 | FileCheck --check-prefix=CHECK-NO-SVE-ERROR %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -msve-vector-bits=256 \
+// RUN:  2>&1 | FileCheck --check-prefix=CHECK-NO-SVE-ERROR %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -msve-vector-bits=512 \
+// RUN:  2>&1 | FileCheck --check-prefix=CHECK-NO-SVE-ERROR %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -msve-vector-bits=1024 \
+// RUN:  2>&1 | FileCheck --check-prefix=CHECK-NO-SVE-ERROR %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -msve-vector-bits=2048 \
+// RUN:  2>&1 | FileCheck --check-prefix=CHECK-NO-SVE-ERROR %s
+
+// CHECK-NO-SVE-ERROR: error: '-msve-vector-bits' is not supported without SVE enabled
+
+// Error out if an unsupported value is passed to -msve-vector-bits.
+// -----------------------------------------------------------------------------
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=64 2>&1 | FileCheck --check-prefix=CHECK-BAD-VALUE-ERROR %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=A 2>&1 | FileCheck --check-prefix=CHECK-BAD-VALUE-ERROR %s
+
+// CHECK-BAD-VALUE-ERROR: error: unsupported argument '{{.*}}' to option 'msve-vector-bits='
+
+// Error if using attribute without -msve-vector-bits
+// -----------------------------------------------------------------------------
+// RUN: not %clang -c %s -o /dev/null -target aarch64-none-linux-gnu \
+// RUN:  -march=armv8-a+sve 2>&1 | FileCheck --check-prefix=CHECK-NO-FLAG-ERROR %s
+
+typedef __SVInt32_t svint32_t;
+typedef svint32_t noflag __attribute__((arm_sve_vector_bits(256)));
+
+// CHECK-NO-FLAG-ERROR: error: 'arm_sve_vector_bits' is not supported when '-msve-vector-bits=' is not specified
+
+// Error if attribute vector size != -msve-vector-bits
+// -----------------------------------------------------------------------------
+// RUN: not %clang -c %s -o /dev/null -target aarch64-none-linux-gnu \
+// RUN:  -march=armv8-a+sve -msve-vector-bits=128 2>&1 | FileCheck --check-prefix=CHECK-BAD-VECTOR-SIZE-ERROR %s
+
+typedef svint32_t bad_vector_size __attribute__((arm_sve_vector_bits(256)));
+
+// CHECK-BAD-VECTOR-SIZE-ERROR: error: invalid SVE vector size '256', must match value set by '-msve-vector-bits' ('128')
diff --git a/clang/test/Driver/cxa-atexit.cpp b/clang/test/Driver/cxa-atexit.cpp
index e81af6cd5963d..537a11a35f51b 100644
--- a/clang/test/Driver/cxa-atexit.cpp
+++ b/clang/test/Driver/cxa-atexit.cpp
@@ -36,6 +36,7 @@
 // RUN: FileCheck --check-prefix=WITHATEXIT %s
 // RUN: %clang -target x86_64-apple-darwin -c -mkernel -### %s 2>&1 | \
 // RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
+
 // RUN: %clang -target x86_64-pc-linux-gnu -fregister-global-dtors-with-atexit -fno-register-global-dtors-with-atexit -c -### %s 2>&1 | \
 // RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
 // RUN: %clang -target x86_64-pc-linux-gnu -fno-register-global-dtors-with-atexit -fregister-global-dtors-with-atexit -c -### %s 2>&1 | \
@@ -43,5 +44,18 @@
 // RUN: %clang -target x86_64-pc-linux-gnu -c -### %s 2>&1 | \
 // RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
 
+// RUN: %clang -target powerpc-ibm-aix-xcoff -fregister-global-dtors-with-atexit -fno-register-global-dtors-with-atexit -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
+// RUN: %clang -target powerpc-ibm-aix-xcoff -fno-register-global-dtors-with-atexit -fregister-global-dtors-with-atexit -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=WITHATEXIT %s
+// RUN: %clang -target powerpc-ibm-aix-xcoff -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
+// RUN: %clang -target powerpc64-ibm-aix-xcoff -fregister-global-dtors-with-atexit -fno-register-global-dtors-with-atexit -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
+// RUN: %clang -target powerpc64-ibm-aix-xcoff -fno-register-global-dtors-with-atexit -fregister-global-dtors-with-atexit -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=WITHATEXIT %s
+// RUN: %clang -target powerpc64-ibm-aix-xcoff -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
+
 // WITHATEXIT: -fregister-global-dtors-with-atexit
 // WITHOUTATEXIT-NOT: -fregister-global-dtors-with-atexit
diff --git a/clang/test/Driver/darwin-ld.c b/clang/test/Driver/darwin-ld.c
index 3fc0556a2bde9..ea71142e88c19 100644
--- a/clang/test/Driver/darwin-ld.c
+++ b/clang/test/Driver/darwin-ld.c
@@ -351,7 +351,8 @@
 // RUN: FileCheck -check-prefix=GCOV_EXPORT %s < %t.log
 // RUN: %clang -target x86_64-apple-darwin12 -fprofile-arcs -Xlinker -exported_symbols_list -Xlinker /dev/null -### %t.o 2> %t.log
 // RUN: FileCheck -check-prefix=GCOV_EXPORT %s < %t.log
-// GCOV_EXPORT: "-exported_symbol" "___gcov_flush"
+// GCOV_EXPORT: "-exported_symbol" "___gcov_dump"
+// GCOV_EXPORT: "-exported_symbol" "___gcov_reset"
 //
 // Check that we can pass the outliner down to the linker.
 // RUN: env IPHONEOS_DEPLOYMENT_TARGET=7.0 \
diff --git a/clang/test/Driver/fuse-ld.c b/clang/test/Driver/fuse-ld.c
index 13e709ccfdfa4..f2ca9fb36194e 100644
--- a/clang/test/Driver/fuse-ld.c
+++ b/clang/test/Driver/fuse-ld.c
@@ -31,23 +31,21 @@
 // RUN:   | FileCheck %s -check-prefix=CHECK-FREEBSD-PLIB
 // CHECK-FREEBSD-PLIB: error: invalid linker name
 
-
-
 // RUN: %clang %s -### -fuse-ld=ld \
 // RUN:     -target arm-linux-androideabi \
-// RUN:     -B%S/Inputs/basic_android_tree/bin 2>&1 \
+// RUN:     -B%S/Inputs/basic_android_tree/bin/arm-linux-androideabi- 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-ANDROID-ARM-LD
 // CHECK-ANDROID-ARM-LD: Inputs/basic_android_tree/bin{{/|\\+}}arm-linux-androideabi-ld
 
 // RUN: %clang %s -### -fuse-ld=bfd \
 // RUN:     -target arm-linux-androideabi \
-// RUN:     -B%S/Inputs/basic_android_tree/bin 2>&1 \
+// RUN:     -B%S/Inputs/basic_android_tree/bin/arm-linux-androideabi- 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=CHECK-ANDROID-ARM-BFD
 // CHECK-ANDROID-ARM-BFD: Inputs/basic_android_tree/bin{{/|\\+}}arm-linux-androideabi-ld.bfd
 
 // RUN: %clang %s -### -fuse-ld=gold \
 // RUN:     -target arm-linux-androideabi \
-// RUN:     -B%S/Inputs/basic_android_tree/bin 2>&1 \
+// RUN:     -B%S/Inputs/basic_android_tree/bin/arm-linux-androideabi- 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=CHECK-ANDROID-ARM-GOLD
 // CHECK-ANDROID-ARM-GOLD: Inputs/basic_android_tree/bin{{/|\\+}}arm-linux-androideabi-ld.gold
 
diff --git a/clang/test/Driver/hip-version.hip b/clang/test/Driver/hip-version.hip
index cf80ae15ac6da..eb1295210cfc1 100644
--- a/clang/test/Driver/hip-version.hip
+++ b/clang/test/Driver/hip-version.hip
@@ -5,6 +5,10 @@
 // RUN: %clang -v --rocm-path=%S/Inputs/rocm 2>&1 \
 // RUN:   | FileCheck -check-prefixes=FOUND %s
 
+// RUN: %clang -v --rocm-path=%S/Inputs/rocm 2>&1 \
+// RUN:   -target amdgcn-amd-amdhsa \
+// RUN:   | FileCheck -check-prefixes=FOUND %s
+
 // FOUND: Found HIP installation: {{.*Inputs.*rocm}}, version 3.6.20214-a2917cd
 
 // When --rocm-path is set and .hipVersion is not found, use default version
@@ -12,11 +16,19 @@
 // RUN: %clang -v --rocm-path=%S 2>&1 \
 // RUN:   | FileCheck -check-prefixes=DEFAULT %s
 
+// RUN: %clang -v --rocm-path=%S 2>&1 \
+// RUN:   -target amdgcn-amd-amdhsa \
+// RUN:   | FileCheck -check-prefixes=DEFAULT %s
+
 // DEFAULT: Found HIP installation: {{.*Driver}}, version 3.5.
 
 // RUN: %clang -v --rocm-path=%S --hip-version=3.7.0 2>&1 \
 // RUN:   | FileCheck -check-prefixes=SPECIFIED %s
 
+// RUN: %clang -v --rocm-path=%S --hip-version=3.7.0 2>&1 \
+// RUN:   -target amdgcn-amd-amdhsa \
+// RUN:   | FileCheck -check-prefixes=SPECIFIED %s
+
 // SPECIFIED: Found HIP installation: {{.*Driver}}, version 3.7.0
 
 // RUN: %clang -v --rocm-path=%S --hip-version=3.7 2>&1 \
diff --git a/clang/test/Driver/immediate-options.c b/clang/test/Driver/immediate-options.c
index 71494eec616f9..d7cd6be408016 100644
--- a/clang/test/Driver/immediate-options.c
+++ b/clang/test/Driver/immediate-options.c
@@ -9,10 +9,6 @@
 // RUN: %clang -dumpversion | FileCheck %s -check-prefix=DUMPVERSION
 // DUMPVERSION: {{[0-9]+\.[0-9.]+}}
 
-// RUN: %clang -print-search-dirs | FileCheck %s -check-prefix=PRINT-SEARCH-DIRS
-// PRINT-SEARCH-DIRS: programs: ={{.*}}
-// PRINT-SEARCH-DIRS: libraries: ={{.*}}
-
 // Test if the -print-resource-dir option is accepted without error.
 // Allow unspecified output because the value of CLANG_RESOURCE_DIR is unknown.
 // RUN: %clang -print-resource-dir | FileCheck %s -check-prefix=PRINT-RESOURCE-DIR
diff --git a/clang/test/Driver/msp430-toolchain.c b/clang/test/Driver/msp430-toolchain.c
index f741491c94e8c..1aee2f75a9d9d 100644
--- a/clang/test/Driver/msp430-toolchain.c
+++ b/clang/test/Driver/msp430-toolchain.c
@@ -1,78 +1,264 @@
-// A basic clang -cc1 command-line, and simple environment check.
-
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=CC1 %s
-// CC1: clang{{.*}} "-cc1" "-triple" "msp430"
-
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 \
-// RUN:   --gcc-toolchain=%S/Inputs/basic_msp430_tree --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430 %s
-
-// MSP430: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
-// MSP430: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430"
-// MSP430: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
-// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
-// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtbegin.o"
-// MSP430: "--start-group" "-lmul_none" "-lgcc" "-lc" "-lcrt" "-lnosys" "--end-group"
-// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtend.o"
-// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crtn.o"
-
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nodefaultlibs \
-// RUN:   --gcc-toolchain=%S/Inputs/basic_msp430_tree --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-NO-DFT-LIB %s
-
-// MSP430-NO-DFT-LIB: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
-// MSP430-NO-DFT-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430"
-// MSP430-NO-DFT-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
-// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
-// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtbegin.o"
-// MSP430-NO-DFT-LIB: "--start-group" "-lmul_none" "-lgcc" "--end-group"
-// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtend.o"
-// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crtn.o"
-
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nostartfiles \
-// RUN:   --gcc-toolchain=%S/Inputs/basic_msp430_tree --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-NO-START %s
-
-// MSP430-NO-START: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
-// MSP430-NO-START: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430"
-// MSP430-NO-START: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
-// MSP430-NO-START: "--start-group" "-lmul_none" "-lgcc" "-lc" "-lcrt" "-lnosys" "--end-group"
+// Splitting some tests into POS and NEG parts so the latter can validate
+// output fragments as large as possible for absence of some text.
+
+// Test for include paths and other cc1 flags
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -E \
+// RUN:   --sysroot="%S/Inputs/basic_msp430_tree" 2>&1 \
+// RUN:   | FileCheck -check-prefix=INCLUDE-DIRS %s
+// INCLUDE-DIRS: "{{.*}}clang{{.*}}" "-cc1" "-triple" "msp430"
+// INCLUDE-DIRS: "-internal-isystem" "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}include"
+
+// Tests for passing flags to msp430-elf-ld (not file-related)
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 --sysroot="" > %t 2>&1
+// RUN: FileCheck -check-prefix=DEFAULT-POS %s < %t
+// RUN: FileCheck -check-prefix=DEFAULT-NEG %s < %t
+// DEFAULT-POS: "{{.*}}msp430-elf-ld"
+// DEFAULT-POS: "--gc-sections"
+// DEFAULT-NEG-NOT: "--relax"
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 --sysroot="" \
+// RUN:   -r 2>&1 | FileCheck --check-prefixes=NO-GC-SECTIONS,RELOCATABLE-OBJECT %s
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 --sysroot="" \
+// RUN:   -g 2>&1 | FileCheck -check-prefix=NO-GC-SECTIONS %s
+// NO-GC-SECTIONS: "{{.*}}msp430-elf-ld"
+// NO-GC-SECTIONS-NOT: "--gc-sections"
+// RELOCATABLE-OBJECT-NOT: crt0.o
+// RELOCATABLE-OBJECT-NOT: crtbegin
+// RELOCATABLE-OBJECT-NOT: crtend
+// RELOCATABLE-OBJECT-NOT: "-l{{.*}}"
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 --sysroot="" \
+// RUN:   -Wl,--some-linker-arg 2>&1 | FileCheck -check-prefix=WL-ARG %s
+// WL-ARG: "{{.*}}msp430-elf-ld"
+// WL-ARG: "--some-linker-arg"
+
+// Trivially mapped options: arbitrarily split into two disjoint groups
+// to check both "on"/present and "off"/absent state (when appropriate).
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 --sysroot="" \
+// RUN:   -o /tmp/test.elf -r -t -z muldefs -mrelax > %t 2>&1
+// RUN: FileCheck -check-prefix=MISC-FLAGS-1-POS %s < %t
+// RUN: FileCheck -check-prefix=MISC-FLAGS-1-NEG %s < %t
+// MISC-FLAGS-1-POS: "{{.*}}msp430-elf-ld"
+// MISC-FLAGS-1-POS-DAG: "--relax"
+// MISC-FLAGS-1-POS-DAG: "-o" "/tmp/test.elf"
+// MISC-FLAGS-1-POS-DAG: "-r"
+// MISC-FLAGS-1-POS-DAG: "-t"
+// MISC-FLAGS-1-POS-DAG: "-z" "muldefs"
+// MISC-FLAGS-1-NEG: "{{.*}}msp430-elf-ld"
+// MISC-FLAGS-1-NEG-NOT: "-e{{.*}}"
+// MISC-FLAGS-1-NEG-NOT: "-s"
+// MISC-FLAGS-1-NEG-NOT: "-u"
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 --sysroot="" \
+// RUN:   -e EntryPoint -s -u __undef > %t 2>&1
+// RUN: FileCheck -check-prefix=MISC-FLAGS-2-POS %s < %t
+// RUN: FileCheck -check-prefix=MISC-FLAGS-2-NEG %s < %t
+// MISC-FLAGS-2-POS: "{{.*}}msp430-elf-ld"
+// MISC-FLAGS-2-POS: "-e" "EntryPoint" "-s" "-u" "__undef"
+// MISC-FLAGS-2-NEG: "{{.*}}msp430-elf-ld"
+// MISC-FLAGS-2-NEG-NOT: "-r"
+// MISC-FLAGS-2-NEG-NOT: "-t"
+// MISC-FLAGS-2-NEG-NOT: "-z"
+// MISC-FLAGS-2-NEG-NOT: "--relax"
+
+// Tests for -nostdlib, -nostartfiles, -nodefaultfiles and -f(no-)exceptions
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=libgcc \
+// RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
+// RUN: FileCheck -check-prefix=LIBS-DEFAULT-POS %s < %t
+// RUN: FileCheck -check-prefix=LIBS-DEFAULT-NEG %s < %t
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=libgcc \
+// RUN:   --gcc-toolchain="%S/Inputs/basic_msp430_tree" --sysroot="" 2>&1 \
+// RUN:   | FileCheck -check-prefix=LIBS-DEFAULT-GCC-TOOLCHAIN %s
+// LIBS-DEFAULT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-DEFAULT-POS: "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// LIBS-DEFAULT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o"
+// LIBS-DEFAULT-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-DEFAULT-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-DEFAULT-POS: "-lgcc" "--start-group" "-lmul_none" "-lc" "-lgcc" "-lcrt" "-lnosys" "--end-group" "-lgcc"
+// LIBS-DEFAULT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "-lgcc"
+// LIBS-DEFAULT-GCC-TOOLCHAIN: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-DEFAULT-GCC-TOOLCHAIN: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// LIBS-DEFAULT-GCC-TOOLCHAIN: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o"
+// LIBS-DEFAULT-GCC-TOOLCHAIN: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-DEFAULT-GCC-TOOLCHAIN: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-DEFAULT-GCC-TOOLCHAIN: "-lgcc" "--start-group" "-lmul_none" "-lc" "-lgcc" "-lcrt" "-lnosys" "--end-group" "-lgcc"
+// LIBS-DEFAULT-GCC-TOOLCHAIN: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "-lgcc"
+// LIBS-DEFAULT-NEG-NOT: crtbegin.o
+// LIBS-DEFAULT-NEG-NOT: -lssp_nonshared
+// LIBS-DEFAULT-NEG-NOT: -lssp
+// LIBS-DEFAULT-NEG-NOT: clang_rt
+// LIBS-DEFAULT-NEG-NOT: crtend.o
+// LIBS-DEFAULT-NEG-NOT: /exceptions
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=compiler-rt \
+// RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
+// RUN: FileCheck -check-prefix=LIBS-COMPILER-RT-POS %s < %t
+// RUN: FileCheck -check-prefix=LIBS-COMPILER-RT-NEG %s < %t
+// LIBS-COMPILER-RT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-COMPILER-RT-POS: "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// LIBS-COMPILER-RT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o"
+// LIBS-COMPILER-RT-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-COMPILER-RT-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-COMPILER-RT-POS: "{{[^"]*}}libclang_rt.builtins-msp430.a" "--start-group" "-lmul_none" "-lc" "{{[^"]*}}libclang_rt.builtins-msp430.a" "-lcrt" "-lnosys" "--end-group" "{{[^"]*}}libclang_rt.builtins-msp430.a"
+// LIBS-COMPILER-RT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "{{[^"]*}}libclang_rt.builtins-msp430.a"
+// LIBS-COMPILER-RT-NEG-NOT: crtbegin.o
+// LIBS-COMPILER-RT-NEG-NOT: -lssp_nonshared
+// LIBS-COMPILER-RT-NEG-NOT: -lssp
+// LIBS-COMPILER-RT-NEG-NOT: -lgcc
+// LIBS-COMPILER-RT-NEG-NOT: crtend.o
+// LIBS-COMPILER-RT-NEG-NOT: /exceptions
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=libgcc -fexceptions \
+// RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
+// RUN: FileCheck -check-prefix=LIBS-EXC-POS %s < %t
+// RUN: FileCheck -check-prefix=LIBS-EXC-NEG %s < %t
+// LIBS-EXC-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-EXC-POS: "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430/exceptions{{/|\\\\}}crt0.o"
+// LIBS-EXC-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions{{/|\\\\}}crtbegin.o"
+// LIBS-EXC-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions"
+// LIBS-EXC-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430/exceptions"
+// LIBS-EXC-POS: "-lgcc" "--start-group" "-lmul_none" "-lc" "-lgcc" "-lcrt" "-lnosys" "--end-group"
+// LIBS-EXC-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions{{/|\\\\}}crtend.o" "-lgcc"
+// LIBS-EXC-NEG-NOT: "{{.*}}/430"
+// LIBS-EXC-NEG-NOT: "{{.*}}430/crt{{.*}}"
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=libgcc \
+// RUN:   -fstack-protector  --sysroot="%S/Inputs/basic_msp430_tree" 2>&1 \
+// RUN:   | FileCheck -check-prefix=LIBS-SSP %s
+// LIBS-SSP: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-SSP: "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// LIBS-SSP: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o"
+// LIBS-SSP: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-SSP: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-SSP: "-lssp_nonshared" "-lssp"
+// LIBS-SSP: "-lgcc" "--start-group" "-lmul_none" "-lc" "-lgcc" "-lcrt" "-lnosys" "--end-group"
+// LIBS-SSP: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "-lgcc"
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=libgcc -nodefaultlibs \
+// RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
+// RUN: FileCheck -check-prefix=LIBS-NO-DFT-POS %s < %t
+// RUN: FileCheck -check-prefix=LIBS-NO-DFT-NEG %s < %t
+// LIBS-NO-DFT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-NO-DFT-POS: "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// LIBS-NO-DFT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o"
+// LIBS-NO-DFT-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-NO-DFT-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-NO-DFT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "-lgcc"
+// LIBS-NO-DFT-NEG-NOT: "-lc"
+// LIBS-NO-DFT-NEG-NOT: "-lcrt"
+// LIBS-NO-DFT-NEG-NOT: "-lsim"
+// LIBS-NO-DFT-NEG-NOT: "-lnosys"
+// LIBS-NO-DFT-NEG-NOT: "--start-group"
+// LIBS-NO-DFT-NEG-NOT: "--end-group"
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=libgcc -nolibc \
+// RUN:   -fstack-protector --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
+// RUN: FileCheck -check-prefix=LIBS-NO-LIBC-POS %s < %t
+// RUN: FileCheck -check-prefix=LIBS-NO-LIBC-NEG %s < %t
+// LIBS-NO-LIBC-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-NO-LIBC-POS: "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// LIBS-NO-LIBC-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o"
+// LIBS-NO-LIBC-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-NO-LIBC-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-NO-LIBC-POS: "-lssp_nonshared" "-lssp" "-lgcc"
+// LIBS-NO-LIBC-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "-lgcc"
+// LIBS-NO-LIBC-NEG-NOT: "-lc"
+// LIBS-NO-LIBC-NEG-NOT: "-lcrt"
+// LIBS-NO-LIBC-NEG-NOT: "-lsim"
+// LIBS-NO-LIBC-NEG-NOT: "-lnosys"
+// LIBS-NO-LIBC-NEG-NOT: "--start-group"
+// LIBS-NO-LIBC-NEG-NOT: "--end-group"
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=libgcc -nostartfiles \
+// RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
+// RUN: FileCheck -check-prefix=LIBS-NO-START-POS %s < %t
+// RUN: FileCheck -check-prefix=LIBS-NO-START-NEG %s < %t
+// LIBS-NO-START-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-NO-START-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-NO-START-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-NO-START-POS: "-lgcc" "--start-group" "-lmul_none" "-lc" "-lgcc" "-lcrt" "-lnosys" "--end-group"
+// LIBS-NO-START-NEG-NOT: crt0.o
+// LIBS-NO-START-NEG-NOT: crtbegin
+// LIBS-NO-START-NEG-NOT: crtend
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nostdlib \
-// RUN:   --gcc-toolchain=%S/Inputs/basic_msp430_tree --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-NO-STD-LIB %s
+// RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
+// RUN: FileCheck -check-prefix=LIBS-NO-STD-POS %s < %t
+// RUN: FileCheck -check-prefix=LIBS-NO-STD-NEG %s < %t
+// LIBS-NO-STD-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-NO-STD-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-NO-STD-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-NO-STD-NEG-NOT: crt0.o
+// LIBS-NO-STD-NEG-NOT: crtbegin
+// LIBS-NO-STD-NEG-NOT: crtend
+// LIBS-NO-STD-NEG-NOT: "-lc"
+// LIBS-NO-STD-NEG-NOT: "-lcrt"
+// LIBS-NO-STD-NEG-NOT: "-lnosys"
+// LIBS-NO-STD-NEG-NOT: "--start-group"
+// LIBS-NO-STD-NEG-NOT: "--end-group"
+
+// Test for linker script autodiscovery
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430g2553 \
+// RUN:   --sysroot=%S/Inputs/basic_msp430_tree 2>&1 \
+// RUN:   | FileCheck -check-prefix=LD-SCRIPT %s
+// LD-SCRIPT: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LD-SCRIPT: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}include"
+// LD-SCRIPT: "-Tmsp430g2553.ld"
 
-// MSP430-NO-STD-LIB: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
-// MSP430-NO-STD-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430"
-// MSP430-NO-STD-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
-// MSP430-NO-STD-LIB: "--start-group" "-lmul_none" "-lgcc" "--end-group"
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430g2553 \
+// RUN:   --sysroot=%S/Inputs/basic_msp430_tree \
+// RUN:   -T custom_script.ld 2>&1 \
+// RUN:   | FileCheck -check-prefix=CUSTOM-LD-SCRIPT %s
+// CUSTOM-LD-SCRIPT: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// CUSTOM-LD_SCRIPT-NOT: "-Tmsp430g2553.ld"
+// CUSTOM-LD-SCRIPT: "-T" "custom_script.ld"
+// CUSTOM-LD_SCRIPT-NOT: "-Tmsp430g2553.ld"
+
+// Test for compiling for simulator
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430g2553 \
+// RUN:   -msim -rtlib=libgcc --sysroot=%S/Inputs/basic_msp430_tree > %t 2>&1
+// RUN: FileCheck -check-prefix=SIMULATOR-POS %s < %t
+// RUN: FileCheck -check-prefix=SIMULATOR-NEG %s < %t
+// SIMULATOR-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// SIMULATOR-POS: "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// SIMULATOR-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o"
+// SIMULATOR-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// SIMULATOR-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// SIMULATOR-POS: "-lgcc" "--start-group" "-lmul_none" "-lc" "-lgcc" "-lcrt" "-lsim" "--undefined=__crt0_call_exit" "--end-group"
+// SIMULATOR-POS: "-Tmsp430-sim.ld"
+// SIMULATOR-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "-lgcc"
+// SIMULATOR-NEG-NOT: "-lnosys"
+
+// Tests for HWMult
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430f147 --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-16BIT %s
+// RUN:   | FileCheck -check-prefix=HWMult-16BIT %s
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430f147 -mhwmult=auto --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-16BIT %s
+// RUN:   | FileCheck -check-prefix=HWMult-16BIT %s
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mhwmult=16bit --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-16BIT %s
-
-// MSP430-HWMult-16BIT: "--start-group" "-lmul_16"
+// RUN:   | FileCheck -check-prefix=HWMult-16BIT %s
+// HWMult-16BIT: "--start-group" "-lmul_16"
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430f4783 --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-32BIT %s
+// RUN:   | FileCheck -check-prefix=HWMult-32BIT %s
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430f4783 -mhwmult=auto --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-32BIT %s
+// RUN:   | FileCheck -check-prefix=HWMult-32BIT %s
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mhwmult=32bit --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-32BIT %s
-
-// MSP430-HWMult-32BIT: "--start-group" "-lmul_32"
+// RUN:   | FileCheck -check-prefix=HWMult-32BIT %s
+// HWMult-32BIT: "--start-group" "-lmul_32"
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mhwmult=f5series --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-F5 %s
-// MSP430-HWMult-F5: "--start-group" "-lmul_f5"
+// RUN:   | FileCheck -check-prefix=HWMult-F5 %s
+// HWMult-F5: "--start-group" "-lmul_f5"
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mhwmult=none --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-NONE %s
+// RUN:   | FileCheck -check-prefix=HWMult-NONE %s
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mhwmult=none -mmcu=msp430f4783 --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-NONE %s
-
-// MSP430-HWMult-NONE: "--start-group" "-lmul_none"
+// RUN:   | FileCheck -check-prefix=HWMult-NONE %s
+// HWMult-NONE: "--start-group" "-lmul_none"
diff --git a/clang/test/Driver/prefixed-tools.c b/clang/test/Driver/prefixed-tools.c
index 63f7f29ae9635..0252a2f701436 100644
--- a/clang/test/Driver/prefixed-tools.c
+++ b/clang/test/Driver/prefixed-tools.c
@@ -1,8 +1,8 @@
-// RUN: %clang -### -B%S/Inputs/prefixed_tools_tree -o %t.o -no-integrated-as -fuse-ld=ld \
+// RUN: %clang -### -B%S/Inputs/prefixed_tools_tree/x86_64--linux- -o %t.o -no-integrated-as -fuse-ld=ld \
 // RUN:        -target x86_64--linux %s 2>&1 | \
 // RUN: FileCheck --check-prefix=CHECK-M64 %s
 
-// RUN: %clang -### -B%S/Inputs/prefixed_tools_tree -o %t.o -no-integrated-as -fuse-ld=ld \
+// RUN: %clang -### -B%S/Inputs/prefixed_tools_tree/x86_64--linux- -o %t.o -no-integrated-as -fuse-ld=ld \
 // RUN:        -m32 -target x86_64--linux %s 2>&1 | \
 // RUN: FileCheck --check-prefix=CHECK-M32 %s
 
diff --git a/clang/test/Driver/print-search-dirs.c b/clang/test/Driver/print-search-dirs.c
new file mode 100644
index 0000000000000..0ac13125c9a13
--- /dev/null
+++ b/clang/test/Driver/print-search-dirs.c
@@ -0,0 +1,6 @@
+// UNSUPPORTED: system-windows
+
+// RUN: env COMPILER_PATH=cpath1:cpath2 %clang %s -target x86_64-pc-freebsd --sysroot=%S/Inputs/basic_freebsd64_tree \
+// RUN:   -B b1 -B b2 -print-search-dirs | FileCheck %s
+// CHECK:      programs: =b1:b2:cpath1:cpath2:{{.*}}
+// CHECK-NEXT: libraries: ={{.*}}Inputs/basic_freebsd64_tree/usr/lib
diff --git a/clang/test/Driver/program-path-priority.c b/clang/test/Driver/program-path-priority.c
index ba893e7e2e2cd..cba5f9f4d7430 100644
--- a/clang/test/Driver/program-path-priority.c
+++ b/clang/test/Driver/program-path-priority.c
@@ -13,6 +13,11 @@
 /// so only name priority is accounted for, unless we fail to find
 /// anything at all in the prefix.
 
+/// Note: All matches are expected to be at the end of file paths.
+/// So we match " on the end to account for build systems that
+/// put the name of the compiler in the build path.
+/// E.g. /build/gcc_X.Y.Z/0/...
+
 /// Symlink clang to a new dir which will be its
 /// "program path" for these tests
 // RUN: rm -rf %t && mkdir -p %t
@@ -21,14 +26,18 @@
 /// No gccs at all, nothing is found
 // RUN: env "PATH=" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=NO_NOTREAL_GCC %s
-// NO_NOTREAL_GCC-NOT: notreal-none-elf-gcc
-// NO_NOTREAL_GCC-NOT: /gcc
+// NO_NOTREAL_GCC-NOT: notreal-none-elf-gcc"
+/// Some systems will have "gcc-x.y.z" so for this first check
+/// make sure we don't find "gcc" or "gcc-x.y.z". If we do find either
+/// then there is no point continuing as this copy of clang is not
+/// isolated as we expected.
+// NO_NOTREAL_GCC-NOT: {{/gcc[^/]*"}}
 
 /// -gcc in program path is found
 // RUN: touch %t/notreal-none-elf-gcc && chmod +x %t/notreal-none-elf-gcc
 // RUN: env "PATH=" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=PROG_PATH_NOTREAL_GCC %s
-// PROG_PATH_NOTREAL_GCC: notreal-none-elf-gcc
+// PROG_PATH_NOTREAL_GCC: notreal-none-elf-gcc"
 
 /// -gcc on the PATH is found
 // RUN: mkdir -p %t/env
@@ -36,74 +45,89 @@
 // RUN: touch %t/env/notreal-none-elf-gcc && chmod +x %t/env/notreal-none-elf-gcc
 // RUN: env "PATH=%t/env/" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=ENV_PATH_NOTREAL_GCC %s
-// ENV_PATH_NOTREAL_GCC: env/notreal-none-elf-gcc
+// ENV_PATH_NOTREAL_GCC: env/notreal-none-elf-gcc"
 
 /// -gcc in program path is preferred to one on the PATH
 // RUN: touch %t/notreal-none-elf-gcc && chmod +x %t/notreal-none-elf-gcc
 // RUN: env "PATH=%t/env/" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=BOTH_NOTREAL_GCC %s
-// BOTH_NOTREAL_GCC: notreal-none-elf-gcc
-// BOTH_NOTREAL_GCC-NOT: env/notreal-none-elf-gcc
+// BOTH_NOTREAL_GCC: notreal-none-elf-gcc"
+// BOTH_NOTREAL_GCC-NOT: env/notreal-none-elf-gcc"
 
 /// On program path, -gcc is preferred to plain gcc
 // RUN: touch %t/gcc && chmod +x %t/gcc
 // RUN: env "PATH=" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=NOTREAL_GCC_PREFERRED %s
-// NOTREAL_GCC_PREFERRED: notreal-none-elf-gcc
-// NOTREAL_GCC_PREFERRED-NOT: /gcc
+// NOTREAL_GCC_PREFERRED: notreal-none-elf-gcc"
+// NOTREAL_GCC_PREFERRED-NOT: /gcc"
 
 /// -gcc on the PATH is preferred to gcc in program path
 // RUN: rm %t/notreal-none-elf-gcc
 // RUN: env "PATH=%t/env/" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=NOTREAL_PATH_OVER_GCC_PROG %s
-// NOTREAL_PATH_OVER_GCC_PROG: env/notreal-none-elf-gcc
-// NOTREAL_PATH_OVER_GCC_PROG-NOT: /gcc
+// NOTREAL_PATH_OVER_GCC_PROG: env/notreal-none-elf-gcc"
+// NOTREAL_PATH_OVER_GCC_PROG-NOT: /gcc"
 
 /// -gcc on the PATH is preferred to gcc on the PATH
 // RUN: rm %t/gcc
 // RUN: touch %t/env/gcc && chmod +x %t/env/gcc
 // RUN: env "PATH=%t/env/" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=NOTREAL_PATH_OVER_GCC_PATH %s
-// NOTREAL_PATH_OVER_GCC_PATH: env/notreal-none-elf-gcc
-// NOTREAL_PATH_OVER_GCC_PATH-NOT: /gcc
+// NOTREAL_PATH_OVER_GCC_PATH: env/notreal-none-elf-gcc"
+// NOTREAL_PATH_OVER_GCC_PATH-NOT: /gcc"
+
+/// We cannot trust clang --version, or cmake's LLVM_DEFAULT_TARGET_TRIPLE
+/// to give us the one and only default triple.
+/// Can't trust cmake because on Darwin, triples have a verison appended to them.
+/// (and clang uses the versioned string to search)
+/// Can't trust --version because it will pad 3 item triples to 4 e.g.
+/// powerpc64le-linux-gnu -> powerpc64le-unknown-linux-gnu
+/// (and clang uses the former to search)
+/// So we write to both names which is a bit odd but still proves that the
+/// lookup is working.
 
 /// -gcc has lowest priority so -gcc
 /// on PATH beats default triple in program path
-/// Darwin triples have a version appended to them, even if set via
-/// LLVM_DEFAULT_TARGET_TRIPLE. So the only way to know for sure is to ask clang.
 // RUN: DEFAULT_TRIPLE=`%t/clang --version | grep "Target:" | cut -d ' ' -f2`
 // RUN: touch %t/$DEFAULT_TRIPLE-gcc && chmod +x %t/$DEFAULT_TRIPLE-gcc
+// RUN: touch %t/%target_triple-gcc && chmod +x %t/%target_triple-gcc
 // RUN: env "PATH=%t/env/" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=DEFAULT_TRIPLE_GCC %s
-// DEFAULT_TRIPLE_GCC: env/notreal-none-elf-gcc
+// DEFAULT_TRIPLE_GCC: env/notreal-none-elf-gcc"
 
 /// plain gcc on PATH beats default triple in program path
 // RUN: rm %t/env/notreal-none-elf-gcc
 // RUN: env "PATH=%t/env/" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=DEFAULT_TRIPLE_NO_NOTREAL %s
-// DEFAULT_TRIPLE_NO_NOTREAL: env/gcc
-// DEFAULT_TRIPLE_NO_NOTREAL-NOT: -gcc
+// DEFAULT_TRIPLE_NO_NOTREAL: env/gcc"
+// DEFAULT_TRIPLE_NO_NOTREAL-NOT: -gcc"
 
 /// default triple only chosen when no others are present
 // RUN: rm %t/env/gcc
 // RUN: env "PATH=%t/env/" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=DEFAULT_TRIPLE_NO_OTHERS %s
-// DEFAULT_TRIPLE_NO_OTHERS: -gcc
-// DEFAULT_TRIPLE_NO_OTHERS-NOT: notreal-none-elf-gcc
-// DEFAULT_TRIPLE_NO_OTHERS-NOT: /gcc
+// DEFAULT_TRIPLE_NO_OTHERS: -gcc"
+// DEFAULT_TRIPLE_NO_OTHERS-NOT: notreal-none-elf-gcc"
+// DEFAULT_TRIPLE_NO_OTHERS-NOT: /gcc"
 
 /// -B paths are searched separately so default triple will win
 /// if put in one of those even if other paths have higher priority names
 // RUN: mkdir -p %t/prefix
-// RUN: mv %t/$DEFAULT_TRIPLE-gcc %t/prefix
+/// One of these will fail when $DEFAULT_TRIPLE == %target_triple
+// RUN: test -f %t/$DEFAULT_TRIPLE-gcc && \
+// RUN:   mv %t/$DEFAULT_TRIPLE-gcc %t/prefix || true
+// RUN: test -f %t/%target_triple-gcc && \
+// RUN:   mv %t/%target_triple-gcc %t/prefix || true
 // RUN: touch %t/notreal-none-elf-gcc && chmod +x %t/notreal-none-elf-gcc
+// RUN: touch %t/prefix/gcc && chmod +x %t/prefix/gcc
 // RUN: env "PATH=" %t/clang -### -target notreal-none-elf %s -B %t/prefix 2>&1 | \
 // RUN:   FileCheck --check-prefix=DEFAULT_TRIPLE_IN_PREFIX %s
-// DEFAULT_TRIPLE_IN_PREFIX: prefix/{{.*}}-gcc
-// DEFAULT_TRIPLE_IN_PREFIX-NOT: notreal-none-elf-gcc
+// DEFAULT_TRIPLE_IN_PREFIX: prefix/gcc"
+// DEFAULT_TRIPLE_IN_PREFIX-NOT: notreal-none-elf-gcc"
 
 /// Only if there is nothing in the prefix will we search other paths
-// RUN: rm %t/prefix/$DEFAULT_TRIPLE-gcc
+/// -f in case $DEFAULT_TRIPLE == %target_triple
+// RUN: rm -f %t/prefix/$DEFAULT_TRIPLE-gcc %t/prefix/%target_triple-gcc %t/prefix/gcc
 // RUN: env "PATH=" %t/clang -### -target notreal-none-elf %s -B %t/prefix 2>&1 | \
 // RUN:   FileCheck --check-prefix=EMPTY_PREFIX_DIR %s
-// EMPTY_PREFIX_DIR: notreal-none-elf-gcc
+// EMPTY_PREFIX_DIR: notreal-none-elf-gcc"
diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c
index e3062feb7dee0..725201a77ba7c 100644
--- a/clang/test/Driver/riscv-arch.c
+++ b/clang/test/Driver/riscv-arch.c
@@ -156,9 +156,9 @@
 // RV32-LOWER: error: invalid arch name 'rv32imC',
 // RV32-LOWER: string must be lowercase
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32 -### %s \
+// RUN: %clang -target riscv32-unknown-elf -march=unknown -### %s \
 // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-STR %s
-// RV32-STR: error: invalid arch name 'rv32',
+// RV32-STR: error: invalid arch name 'unknown',
 // RV32-STR: string must begin with rv32{i,e,g} or rv64{i,g}
 
 // RUN: %clang -target riscv32-unknown-elf -march=rv32q -### %s \
@@ -361,6 +361,10 @@
 // RV32-EXPERIMENTAL-ZBB-ZBP: "-target-feature" "+experimental-zbb"
 // RV32-EXPERIMENTAL-ZBB-ZBP: "-target-feature" "+experimental-zbp"
 
+// RUN: %clang -target riscv32-unknown-elf -march=rv32izbb0p92zbp0p92 -menable-experimental-extensions -### %s \
+// RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-EXPERIMENTAL-ZBB-ZBP-UNDERSCORE %s
+// RV32-EXPERIMENTAL-ZBB-ZBP-UNDERSCORE: error: invalid arch name 'rv32izbb0p92zbp0p92', multi-character extensions must be separated by underscores
+
 // RUN: %clang -target riscv32-unknown-elf -march=rv32iv -### %s -c 2>&1 | \
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-V-NOFLAG %s
 // RV32-EXPERIMENTAL-V-NOFLAG: error: invalid arch name 'rv32iv'
diff --git a/clang/test/Driver/riscv-cpus.c b/clang/test/Driver/riscv-cpus.c
new file mode 100644
index 0000000000000..c6281a0b64335
--- /dev/null
+++ b/clang/test/Driver/riscv-cpus.c
@@ -0,0 +1,38 @@
+// Check target CPUs are correctly passed.
+
+// RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=rocket-rv32 | FileCheck -check-prefix=MCPU-ROCKETCHIP32 %s
+// MCPU-ROCKETCHIP32: "-nostdsysteminc" "-target-cpu" "rocket-rv32"
+
+// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=rocket-rv64 | FileCheck -check-prefix=MCPU-ROCKETCHIP64 %s
+// MCPU-ROCKETCHIP64: "-nostdsysteminc" "-target-cpu" "rocket-rv64"
+// MCPU-ROCKETCHIP64: "-target-feature" "+64bit"
+
+// mcpu with default march
+// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=sifive-u54 | FileCheck -check-prefix=MCPU-SIFIVE-U54 %s
+// MCPU-SIFIVE-U54: "-nostdsysteminc" "-target-cpu" "sifive-u54"
+// MCPU-SIFIVE-U54: "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+f" "-target-feature" "+d"
+// MCPU-SIFIVE-U54: "-target-feature" "+c" "-target-feature" "+64bit"
+// MCPU-SIFIVE-U54: "-target-abi" "lp64d"
+
+// mcpu with mabi option
+// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=sifive-u54 -mabi=lp64 | FileCheck -check-prefix=MCPU-ABI-SIFIVE-U54 %s
+// MCPU-ABI-SIFIVE-U54: "-nostdsysteminc" "-target-cpu" "sifive-u54"
+// MCPU-ABI-SIFIVE-U54: "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+f" "-target-feature" "+d"
+// MCPU-ABI-SIFIVE-U54: "-target-feature" "+c" "-target-feature" "+64bit"
+// MCPU-ABI-SIFIVE-U54: "-target-abi" "lp64"
+
+// march overwirte mcpu's default march
+// RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=sifive-e31 -march=rv32imc | FileCheck -check-prefix=MCPU-MARCH %s
+// MCPU-MARCH: "-nostdsysteminc" "-target-cpu" "sifive-e31" "-target-feature" "+m" "-target-feature" "+c"
+// MCPU-MARCH: "-target-abi" "ilp32"
+
+// Check failed cases
+
+// RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=generic-rv321 | FileCheck -check-prefix=FAIL-MCPU-NAME %s
+// FAIL-MCPU-NAME: error: the clang compiler does not support '-mcpu=generic-rv321'
+
+// RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=generic-rv32 -march=rv64i | FileCheck -check-prefix=MISMATCH-ARCH %s
+// MISMATCH-ARCH: error: the clang compiler does not support '-mcpu=generic-rv32'
+
+// RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=generic-rv64 | FileCheck -check-prefix=MISMATCH-MCPU %s
+// MISMATCH-MCPU: error: the clang compiler does not support '-mcpu=generic-rv64'
diff --git a/clang/test/Driver/windows-cross.c b/clang/test/Driver/windows-cross.c
index d96b0283a90ec..96497da61c0d5 100644
--- a/clang/test/Driver/windows-cross.c
+++ b/clang/test/Driver/windows-cross.c
@@ -1,47 +1,47 @@
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -stdlib=libstdc++ -rtlib=compiler-rt -o /dev/null %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-BASIC-LIBCXX
 
-// CHECK-BASIC-LIBCXX: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{[^"]*}}.o" "-lmsvcrt"
+// CHECK-BASIC-LIBCXX: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{[^"]*}}.o" "-lmsvcrt"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -stdlib=libstdc++ -rtlib=compiler-rt -static -o /dev/null %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-STATIC
 
-// CHECK-STATIC: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bstatic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{[^"]*}}.o" "-lmsvcrt"
+// CHECK-STATIC: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bstatic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{[^"]*}}.o" "-lmsvcrt"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %s/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -rtlib=compiler-rt -stdlib=libstdc++ -o /dev/null %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-RTLIB
 
-// CHECK-RTLIB: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
+// CHECK-RTLIB: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -rtlib=compiler-rt -stdlib=libc++ -o /dev/null %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-C-LIBCXX
 
-// CHECK-C-LIBCXX: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
+// CHECK-C-LIBCXX: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
 
 // RUN: %clangxx -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -rtlib=compiler-rt -stdlib=libc++ -o /dev/null %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-LIBCXX
 
-// CHECK-LIBCXX: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lc++" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
+// CHECK-LIBCXX: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lc++" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -o shared.dll %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-SHARED
 
-// CHECK-SHARED: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
+// CHECK-SHARED: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -static -o shared.dll %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-SHARED-STATIC
 
-// CHECK-SHARED-STATIC: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bstatic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
+// CHECK-SHARED-STATIC: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bstatic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %s/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -nostartfiles -o shared.dll %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-NOSTARTFILES
 
-// CHECK-NOSTARTFILES: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
+// CHECK-NOSTARTFILES: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -nostartfiles -nodefaultlibs -o shared.dll %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-STANDALONE
 
-// CHECK-STANDALONE: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o"
+// CHECK-STANDALONE: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=lld-link2 -shared -o shared.dll -x c++ %s 2>&1 \
 // RUN:    | FileCheck %s --check-prefix CHECK-FUSE-LD
diff --git a/clang/test/Frontend/embed-bitcode.ll b/clang/test/Frontend/embed-bitcode.ll
index bd2afb44bb0fd..75cdc5f657fc0 100644
--- a/clang/test/Frontend/embed-bitcode.ll
+++ b/clang/test/Frontend/embed-bitcode.ll
@@ -37,6 +37,11 @@
 ; CHECK: @llvm.cmdline = private constant
 ; CHECK: section "__LLVM,__cmdline"
 
+; check warning options are not embedded
+; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \
+; RUN:    -fembed-bitcode=all -x ir %s -o - -Wall -Wundef-prefix=TEST \
+; RUN:    | FileCheck %s -check-prefix=CHECK-WARNING
+
 ; CHECK-ELF: @llvm.embedded.module
 ; CHECK-ELF: section ".llvmbc"
 ; CHECK-ELF: @llvm.cmdline
@@ -54,6 +59,9 @@
 ; CHECK-MARKER: @llvm.cmdline
 ; CHECK-MARKER: section "__LLVM,__cmdline"
 
+; CHECK-WARNING-NOT: Wall
+; CHECK-WARNING-NOT: Wundef-prefix
+
 define i32 @f0() {
   ret i32 0
 }
diff --git a/clang/test/Headers/nvptx_device_math_complex.c b/clang/test/Headers/nvptx_device_math_complex.c
index 0e212592dd2ba..6e3e8bffbd24e 100644
--- a/clang/test/Headers/nvptx_device_math_complex.c
+++ b/clang/test/Headers/nvptx_device_math_complex.c
@@ -11,12 +11,34 @@
 #include 
 #endif
 
-// CHECK-DAG: define weak {{.*}} @__mulsc3
-// CHECK-DAG: define weak {{.*}} @__muldc3
-// CHECK-DAG: define weak {{.*}} @__divsc3
-// CHECK-DAG: define weak {{.*}} @__divdc3
+// CHECK: define weak {{.*}} @__muldc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call double @__nv_copysign(
 
+// CHECK: define weak {{.*}} @__mulsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call float @__nv_copysignf(
+
+// CHECK: define weak {{.*}} @__divdc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call i32 @__nv_isfinited(
+// CHECK-DAG: call double @__nv_copysign(
+// CHECK-DAG: call double @__nv_scalbn(
+// CHECK-DAG: call double @__nv_fabs(
+// CHECK-DAG: call double @__nv_logb(
+
+// CHECK: define weak {{.*}} @__divsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call i32 @__nv_finitef(
+// CHECK-DAG: call float @__nv_copysignf(
 // CHECK-DAG: call float @__nv_scalbnf(
+// CHECK-DAG: call float @__nv_fabsf(
+// CHECK-DAG: call float @__nv_logbf(
+
 void test_scmplx(float _Complex a) {
 #pragma omp target
   {
@@ -24,7 +46,6 @@ void test_scmplx(float _Complex a) {
   }
 }
 
-// CHECK-DAG: call double @__nv_scalbn(
 void test_dcmplx(double _Complex a) {
 #pragma omp target
   {
diff --git a/clang/test/Headers/nvptx_device_math_complex.cpp b/clang/test/Headers/nvptx_device_math_complex.cpp
index 58ed24b74b0e4..e4b78deb05d7b 100644
--- a/clang/test/Headers/nvptx_device_math_complex.cpp
+++ b/clang/test/Headers/nvptx_device_math_complex.cpp
@@ -5,12 +5,34 @@
 
 #include 
 
-// CHECK-DAG: define weak {{.*}} @__mulsc3
-// CHECK-DAG: define weak {{.*}} @__muldc3
-// CHECK-DAG: define weak {{.*}} @__divsc3
-// CHECK-DAG: define weak {{.*}} @__divdc3
+// CHECK: define weak {{.*}} @__muldc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call double @__nv_copysign(
 
+// CHECK: define weak {{.*}} @__mulsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call float @__nv_copysignf(
+
+// CHECK: define weak {{.*}} @__divdc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call i32 @__nv_isfinited(
+// CHECK-DAG: call double @__nv_copysign(
+// CHECK-DAG: call double @__nv_scalbn(
+// CHECK-DAG: call double @__nv_fabs(
+// CHECK-DAG: call double @__nv_logb(
+
+// CHECK: define weak {{.*}} @__divsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call i32 @__nv_finitef(
+// CHECK-DAG: call float @__nv_copysignf(
 // CHECK-DAG: call float @__nv_scalbnf(
+// CHECK-DAG: call float @__nv_fabsf(
+// CHECK-DAG: call float @__nv_logbf(
+
 void test_scmplx(std::complex a) {
 #pragma omp target
   {
@@ -18,7 +40,6 @@ void test_scmplx(std::complex a) {
   }
 }
 
-// CHECK-DAG: call double @__nv_scalbn(
 void test_dcmplx(std::complex a) {
 #pragma omp target
   {
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index 5c571fb458ec5..3a376a7caab46 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -156,3 +156,10 @@
 // AVR-SAME: ttiny4, attiny5, attiny9, attiny10, attiny20, attiny40, attiny102,
 // AVR-SAME: attiny104
 
+// RUN: not %clang_cc1 -triple riscv32 -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix RISCV32
+// RISCV32: error: unknown target CPU 'not-a-cpu'
+// RISCV32: note: valid target CPU values are: generic-rv32, rocket-rv32, sifive-e31
+
+// RUN: not %clang_cc1 -triple riscv64 -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix RISCV64
+// RISCV64: error: unknown target CPU 'not-a-cpu'
+// RISCV64: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-u54
diff --git a/clang/test/OpenMP/atomic_codegen.cpp b/clang/test/OpenMP/atomic_codegen.cpp
index 4377213e6d824..47371fe912aa3 100644
--- a/clang/test/OpenMP/atomic_codegen.cpp
+++ b/clang/test/OpenMP/atomic_codegen.cpp
@@ -82,7 +82,7 @@ void parallel_atomic_ewc() {
     }
 }
 
-int &foo() { return a; }
+int &foo() { extern void mayThrow(); mayThrow(); return a; }
 
 // TERM_DEBUG-LABEL: parallel_atomic
 void parallel_atomic() {
diff --git a/clang/test/OpenMP/barrier_codegen.cpp b/clang/test/OpenMP/barrier_codegen.cpp
index f84a26380df9e..35b2ed7212761 100644
--- a/clang/test/OpenMP/barrier_codegen.cpp
+++ b/clang/test/OpenMP/barrier_codegen.cpp
@@ -46,7 +46,7 @@ int main(int argc, char **argv) {
 // IRBUILDER:          ; Function Attrs: nounwind
 // IRBUILDER-NEXT:     declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #
 // IRBUILDER_OPT:      ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
-// IRBUILDER_OPT-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #
+// IRBUILDER_OPT-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture nofree readonly) #
 
 // CHECK: define {{.+}} [[TMAIN_INT]](
 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* [[LOC]])
diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp
index b7d1cea56721f..a21a9db1e39a8 100644
--- a/clang/test/OpenMP/cancel_codegen.cpp
+++ b/clang/test/OpenMP/cancel_codegen.cpp
@@ -175,7 +175,7 @@ for (int i = 0; i < argc; ++i) {
 
 // IRBUILDER: define internal void @main
 
-// IRBUILDER: [[RETURN:omp.par.exit[^:]*]]
+// IRBUILDER: [[RETURN:omp.par.outlined.exit[^:]*]]
 // IRBUILDER-NEXT: ret void
 // IRBUILDER: [[FLAG:%.+]] = load float, float* @{{.+}},
 
@@ -192,10 +192,8 @@ for (int i = 0; i < argc; ++i) {
 // IRBUILDER: [[CMP:%.+]] = icmp eq i32 [[RES]], 0
 // IRBUILDER: br i1 [[CMP]], label %[[CONTINUE:[^,].+]], label %[[EXIT:.+]]
 // IRBUILDER: [[EXIT]]
-// IRBUILDER: br label %[[EXIT2:.+]]
-// IRBUILDER: [[CONTINUE]]
-// IRBUILDER: br label %[[ELSE:.+]]
-// IRBUILDER: [[EXIT2]]
 // IRBUILDER: br label %[[RETURN]]
+// IRBUILDER: [[CONTINUE]]
+// IRBUILDER: br label %[[ELSE2:.+]]
 
 #endif
diff --git a/clang/test/OpenMP/capturing_in_templates.cpp b/clang/test/OpenMP/capturing_in_templates.cpp
index 97a935214bdb8..53a711da11137 100644
--- a/clang/test/OpenMP/capturing_in_templates.cpp
+++ b/clang/test/OpenMP/capturing_in_templates.cpp
@@ -18,7 +18,7 @@ pair make_pair(T1 &&t1, T2 &&t2) {
 
 // CHECK-LABEL: @main
 int main(int argc, char **argv) {
-// CHECK: call i32 @__tgt_target(i64 -1, i8* @{{.+}}.region_id, i32 0, i8** null, i8** null, i64* null, i64* null)
+// CHECK: call i32 @__tgt_target_mapper(i64 -1, i8* @{{.+}}.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null)
 #pragma omp target
  {
     for (int i = 0; i < 64; ++i) {
diff --git a/clang/test/OpenMP/critical_codegen.cpp b/clang/test/OpenMP/critical_codegen.cpp
index f49c9cc9c21a4..4b2566bbf364b 100644
--- a/clang/test/OpenMP/critical_codegen.cpp
+++ b/clang/test/OpenMP/critical_codegen.cpp
@@ -22,7 +22,7 @@
 
 // ALL:       define {{.*}}void [[FOO:@.+]]()
 
-void foo() {}
+void foo() { extern void mayThrow(); mayThrow(); }
 
 // ALL-LABEL: @main
 // TERM_DEBUG-LABEL: @main
diff --git a/clang/test/OpenMP/declare_mapper_codegen.cpp b/clang/test/OpenMP/declare_mapper_codegen.cpp
index f2ed4d2b9c487..ee64fe2099007 100644
--- a/clang/test/OpenMP/declare_mapper_codegen.cpp
+++ b/clang/test/OpenMP/declare_mapper_codegen.cpp
@@ -20,17 +20,42 @@
 // RUN: %clang_cc1 -DCK0 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 
 #ifdef CK0
+// Mapper function code generation and runtime interface.
 
 // CK0-LABEL: @.__omp_offloading_{{.*}}foo{{.*}}.region_id = weak constant i8 0
 // CK0-64: [[SIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
 // CK0-32: [[SIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
 // CK0: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 35]
+// CK0-64: [[NWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[NWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[NWTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 35]
+// CK0-64: [[TEAMSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[TEAMSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[TEAMTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 33]
+// CK0-64: [[TEAMNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[TEAMNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[TEAMNWTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 33]
+// CK0-64: [[EDSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[EDSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[EDTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 33]
+// CK0-64: [[EDNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[EDNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[EDNWTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 33]
+// CK0-64: [[EXDSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[EXDSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[EXDTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 34]
+// CK0-64: [[EXDNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[EXDNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[EXDNWTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 34]
 // CK0-64: [[TSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
 // CK0-32: [[TSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
 // CK0: [[TTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 33]
 // CK0-64: [[FSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
 // CK0-32: [[FSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
 // CK0: [[FTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 34]
+// CK0-64: [[FNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[FNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[FNWTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 34]
 
 class C {
 public:
@@ -40,13 +65,15 @@ class C {
 
 #pragma omp declare mapper(id: C s) map(s.a, s.b[0:2])
 
-// CK0-LABEL: define {{.*}}void @.omp_mapper.{{.*}}C.id{{.*}}(i8*{{.*}}, i8*{{.*}}, i8*{{.*}}, i64{{.*}}, i64{{.*}})
+// CK0: define {{.*}}void [[MPRFUNC:@[.]omp_mapper[.].*C[.]id]](i8*{{.*}}, i8*{{.*}}, i8*{{.*}}, i64{{.*}}, i64{{.*}})
 // CK0: store i8* %{{[^,]+}}, i8** [[HANDLEADDR:%[^,]+]]
 // CK0: store i8* %{{[^,]+}}, i8** [[BPTRADDR:%[^,]+]]
 // CK0: store i8* %{{[^,]+}}, i8** [[VPTRADDR:%[^,]+]]
 // CK0: store i64 %{{[^,]+}}, i{{64|32}}* [[SIZEADDR:%[^,]+]]
 // CK0: store i64 %{{[^,]+}}, i64* [[TYPEADDR:%[^,]+]]
-// CK0-DAG: [[SIZE:%.+]] = load i64, i64* [[SIZEADDR]]
+// CK0-DAG: [[BYTESIZE:%.+]] = load i64, i64* [[SIZEADDR]]
+// CK0-64-DAG: [[SIZE:%.+]] = udiv exact i64 [[BYTESIZE]], 16
+// CK0-32-DAG: [[SIZE:%.+]] = udiv exact i64 [[BYTESIZE]], 8
 // CK0-DAG: [[TYPE:%.+]] = load i64, i64* [[TYPEADDR]]
 // CK0-DAG: [[HANDLE:%.+]] = load i8*, i8** [[HANDLEADDR]]
 // CK0-DAG: [[PTRBEGIN:%.+]] = bitcast i8** [[VPTRADDR]] to %class.C**
@@ -209,42 +236,175 @@ void foo(int a){
   C c;
   c.a = a;
 
-  // CK0-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK0-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** [[MPRGEP:%.+]])
   // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
   // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
   // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
   // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
   // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
   // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
   // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
   // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]])
   #pragma omp target map(mapper(id),tofrom: c)
   {
-   ++c.a;
+    ++c.a;
   }
 
-  // CK0-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[TGEPBP:%.+]], i8** [[TGEPP:%.+]], i64* getelementptr {{.+}}[1 x i64]* [[TSIZES]], i32 0, i32 0), {{.+}}getelementptr {{.+}}[1 x i64]* [[TTYPES]]{{.+}})
+  // CK0-DAG: call i32 @__tgt_target_nowait_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[NWSIZES]]{{.+}}, {{.+}}[[NWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]])
+  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+  // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]])
+  #pragma omp target map(mapper(id),tofrom: c) nowait
+  {
+    ++c.a;
+  }
+
+  // CK0-DAG: call i32 @__tgt_target_teams_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[TEAMSIZES]]{{.+}}, {{.+}}[[TEAMTYPES]]{{.+}}, i8** [[MPRGEP:%.+]], i32 0, i32 0)
+  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+  // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]])
+  #pragma omp target teams map(mapper(id),to: c)
+  {
+    ++c.a;
+  }
+
+  // CK0-DAG: call i32 @__tgt_target_teams_nowait_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[TEAMNWSIZES]]{{.+}}, {{.+}}[[TEAMNWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]], i32 0, i32 0)
+  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+  // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]])
+  #pragma omp target teams map(mapper(id),to: c) nowait
+  {
+    ++c.a;
+  }
+
+  // CK0-DAG: call void @__tgt_target_data_begin_mapper(i64 {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[EDSIZES]]{{.+}}, {{.+}}[[EDTYPES]]{{.+}}, i8** [[MPRGEP:%.+]])
+  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+  #pragma omp target enter data map(mapper(id),to: c)
+
+  // CK0-DAG: call void @__tgt_target_data_begin_nowait_mapper(i64 {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[EDNWSIZES]]{{.+}}, {{.+}}[[EDNWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]])
+  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+  #pragma omp target enter data map(mapper(id),to: c) nowait
+
+  // CK0-DAG: call void @__tgt_target_data_end_mapper(i64 {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[EXDSIZES]]{{.+}}, {{.+}}[[EXDTYPES]]{{.+}}, i8** [[MPRGEP:%.+]])
+  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+  #pragma omp target exit data map(mapper(id),from: c)
+
+  // CK0-DAG: call void @__tgt_target_data_end_nowait_mapper(i64 {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[EXDNWSIZES]]{{.+}}, {{.+}}[[EXDNWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]])
+  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+  #pragma omp target exit data map(mapper(id),from: c) nowait
+
+  // CK0-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[TGEPBP:%.+]], i8** [[TGEPP:%.+]], i64* getelementptr {{.+}}[1 x i64]* [[TSIZES]], i32 0, i32 0), {{.+}}getelementptr {{.+}}[1 x i64]* [[TTYPES]]{{.+}}, i8** [[TMPRGEP:%.+]])
   // CK0-DAG: [[TGEPBP]] = getelementptr inbounds {{.+}}[[TBP:%[^,]+]], i{{.+}} 0, i{{.+}} 0
   // CK0-DAG: [[TGEPP]] = getelementptr inbounds {{.+}}[[TP:%[^,]+]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[TMPRGEP]] = bitcast [1 x i8*]* [[TMPR:%[^,]+]] to i8**
   // CK0-DAG: [[TBP0:%.+]] = getelementptr inbounds {{.+}}[[TBP]], i{{.+}} 0, i{{.+}} 0
   // CK0-DAG: [[TP0:%.+]] = getelementptr inbounds {{.+}}[[TP]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[TMPR1:%.+]] = getelementptr inbounds {{.+}}[[TMPR]], i[[sz]] 0, i[[sz]] 0
   // CK0-DAG: [[TCBP0:%.+]] = bitcast i8** [[TBP0]] to %class.C**
   // CK0-DAG: [[TCP0:%.+]] = bitcast i8** [[TP0]] to %class.C**
   // CK0-DAG: store %class.C* [[VAL]], %class.C** [[TCBP0]]
   // CK0-DAG: store %class.C* [[VAL]], %class.C** [[TCP0]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[TMPR1]]
   #pragma omp target update to(mapper(id): c)
 
-  // CK0-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[FGEPBP:%.+]], i8** [[FGEPP:%.+]], i64* getelementptr {{.+}}[1 x i64]* [[FSIZES]], i32 0, i32 0), {{.+}}getelementptr {{.+}}[1 x i64]* [[FTYPES]]{{.+}})
+  // CK0-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[FGEPBP:%.+]], i8** [[FGEPP:%.+]], i64* getelementptr {{.+}}[1 x i64]* [[FSIZES]], i32 0, i32 0), {{.+}}getelementptr {{.+}}[1 x i64]* [[FTYPES]]{{.+}}, i8** [[FMPRGEP:%.+]])
   // CK0-DAG: [[FGEPBP]] = getelementptr inbounds {{.+}}[[FBP:%[^,]+]], i{{.+}} 0, i{{.+}} 0
   // CK0-DAG: [[FGEPP]] = getelementptr inbounds {{.+}}[[FP:%[^,]+]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[FMPRGEP]] = bitcast [1 x i8*]* [[FMPR:%[^,]+]] to i8**
   // CK0-DAG: [[FBP0:%.+]] = getelementptr inbounds {{.+}}[[FBP]], i{{.+}} 0, i{{.+}} 0
   // CK0-DAG: [[FP0:%.+]] = getelementptr inbounds {{.+}}[[FP]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[FMPR1:%.+]] = getelementptr inbounds {{.+}}[[FMPR]], i[[sz]] 0, i[[sz]] 0
   // CK0-DAG: [[FCBP0:%.+]] = bitcast i8** [[FBP0]] to %class.C**
   // CK0-DAG: [[FCP0:%.+]] = bitcast i8** [[FP0]] to %class.C**
   // CK0-DAG: store %class.C* [[VAL]], %class.C** [[FCBP0]]
   // CK0-DAG: store %class.C* [[VAL]], %class.C** [[FCP0]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[FMPR1]]
   #pragma omp target update from(mapper(id): c)
+
+  // CK0-DAG: call void @__tgt_target_data_update_nowait_mapper(i64 -1, i32 1, i8** [[FGEPBP:%.+]], i8** [[FGEPP:%.+]], i64* getelementptr {{.+}}[1 x i64]* [[FNWSIZES]], i32 0, i32 0), {{.+}}getelementptr {{.+}}[1 x i64]* [[FNWTYPES]]{{.+}}, i8** [[FMPRGEP:%.+]])
+  // CK0-DAG: [[FGEPBP]] = getelementptr inbounds {{.+}}[[FBP:%[^,]+]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[FGEPP]] = getelementptr inbounds {{.+}}[[FP:%[^,]+]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[FMPRGEP]] = bitcast [1 x i8*]* [[FMPR:%[^,]+]] to i8**
+  // CK0-DAG: [[FBP0:%.+]] = getelementptr inbounds {{.+}}[[FBP]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[FP0:%.+]] = getelementptr inbounds {{.+}}[[FP]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[FMPR1:%.+]] = getelementptr inbounds {{.+}}[[FMPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[FCBP0:%.+]] = bitcast i8** [[FBP0]] to %class.C**
+  // CK0-DAG: [[FCP0:%.+]] = bitcast i8** [[FP0]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[FCBP0]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[FCP0]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[FMPR1]]
+  #pragma omp target update from(mapper(id): c) nowait
 }
 
 
@@ -257,7 +417,7 @@ void foo(int a){
 // CK0: {{.+}} = add nsw i32 [[VAL]], 1
 // CK0: }
 
-#endif
+#endif // CK0
 
 
 ///==========================================================================///
@@ -276,6 +436,7 @@ void foo(int a){
 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 
 #ifdef CK1
+// C++ template
 
 template 
 class C {
@@ -291,7 +452,8 @@ class C {
 // CK1: store i8* %{{[^,]+}}, i8** [[VPTRADDR:%[^,]+]]
 // CK1: store i64 %{{[^,]+}}, i{{64|32}}* [[SIZEADDR:%[^,]+]]
 // CK1: store i64 %{{[^,]+}}, i64* [[TYPEADDR:%[^,]+]]
-// CK1-DAG: [[SIZE:%.+]] = load i64, i64* [[SIZEADDR]]
+// CK1-DAG: [[BYTESIZE:%.+]] = load i64, i64* [[SIZEADDR]]
+// CK1-DAG: [[SIZE:%.+]] = udiv exact i64 [[BYTESIZE]], 4
 // CK1-DAG: [[TYPE:%.+]] = load i64, i64* [[TYPEADDR]]
 // CK1-DAG: [[HANDLE:%.+]] = load i8*, i8** [[HANDLEADDR]]
 // CK1-DAG: [[PTRBEGIN:%.+]] = bitcast i8** [[VPTRADDR]] to %class.C**
@@ -409,6 +571,272 @@ class C {
 // CK1: [[DONE]]
 // CK1: ret void
 
-#endif
+#endif // CK1
+
+
+///==========================================================================///
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix CK2 --check-prefix CK2-64 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix CK2 --check-prefix CK2-64 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix CK2 --check-prefix CK2-32 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix CK2 --check-prefix CK2-32 %s
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+
+#ifdef CK2
+// Nested mappers.
+
+class B {
+public:
+  double a;
+};
+
+class C {
+public:
+  double a;
+  B b;
+};
+
+#pragma omp declare mapper(B s) map(s.a)
+
+#pragma omp declare mapper(id: C s) map(s.b)
+
+// CK2: define {{.*}}void [[BMPRFUNC:@[.]omp_mapper[.].*B[.]default]](i8*{{.*}}, i8*{{.*}}, i8*{{.*}}, i64{{.*}}, i64{{.*}})
+
+// CK2-LABEL: define {{.*}}void @.omp_mapper.{{.*}}C{{.*}}.id(i8*{{.*}}, i8*{{.*}}, i8*{{.*}}, i64{{.*}}, i64{{.*}})
+// CK2: store i8* %{{[^,]+}}, i8** [[HANDLEADDR:%[^,]+]]
+// CK2: store i8* %{{[^,]+}}, i8** [[BPTRADDR:%[^,]+]]
+// CK2: store i8* %{{[^,]+}}, i8** [[VPTRADDR:%[^,]+]]
+// CK2: store i64 %{{[^,]+}}, i{{64|32}}* [[SIZEADDR:%[^,]+]]
+// CK2: store i64 %{{[^,]+}}, i64* [[TYPEADDR:%[^,]+]]
+// CK2-DAG: [[BYTESIZE:%.+]] = load i64, i64* [[SIZEADDR]]
+// CK2-DAG: [[SIZE:%.+]] = udiv exact i64 [[BYTESIZE]], 16
+// CK2-DAG: [[TYPE:%.+]] = load i64, i64* [[TYPEADDR]]
+// CK2-DAG: [[HANDLE:%.+]] = load i8*, i8** [[HANDLEADDR]]
+// CK2-DAG: [[PTRBEGIN:%.+]] = bitcast i8** [[VPTRADDR]] to %class.C**
+// CK2-DAG: [[PTREND:%.+]] = getelementptr %class.C*, %class.C** [[PTRBEGIN]], i64 [[SIZE]]
+// CK2-DAG: [[BPTR:%.+]] = load i8*, i8** [[BPTRADDR]]
+// CK2-DAG: [[BEGIN:%.+]] = load i8*, i8** [[VPTRADDR]]
+// CK2: [[ISARRAY:%.+]] = icmp sge i64 [[SIZE]], 1
+// CK2: br i1 [[ISARRAY]], label %[[INITEVALDEL:[^,]+]], label %[[LHEAD:[^,]+]]
+
+// CK2: [[INITEVALDEL]]
+// CK2: [[TYPEDEL:%.+]] = and i64 [[TYPE]], 8
+// CK2: [[ISNOTDEL:%.+]] = icmp eq i64 [[TYPEDEL]], 0
+// CK2: br i1 [[ISNOTDEL]], label %[[INIT:[^,]+]], label %[[LHEAD:[^,]+]]
+// CK2: [[INIT]]
+// CK2-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 16
+// CK2-DAG: [[ITYPE:%.+]] = and i64 [[TYPE]], -4
+// CK2: call void @__tgt_push_mapper_component(i8* [[HANDLE]], i8* [[BPTR]], i8* [[BEGIN]], i64 [[ARRSIZE]], i64 [[ITYPE]])
+// CK2: br label %[[LHEAD:[^,]+]]
+
+// CK2: [[LHEAD]]
+// CK2: [[ISEMPTY:%.+]] = icmp eq %class.C** [[PTRBEGIN]], [[PTREND]]
+// CK2: br i1 [[ISEMPTY]], label %[[DONE:[^,]+]], label %[[LBODY:[^,]+]]
+// CK2: [[LBODY]]
+// CK2: [[PTR:%.+]] = phi %class.C** [ [[PTRBEGIN]], %[[LHEAD]] ], [ [[PTRNEXT:%.+]], %[[LCORRECT:[^,]+]] ]
+// CK2: [[OBJ:%.+]] = load %class.C*, %class.C** [[PTR]]
+// CK2-DAG: [[BBEGIN:%.+]] = getelementptr inbounds %class.C, %class.C* [[OBJ]], i32 0, i32 1
+// CK2-DAG: [[BEND:%.+]] = getelementptr %class.B, %class.B* [[BBEGIN]], i32 1
+// CK2-DAG: [[BBEGINV:%.+]] = bitcast %class.B* [[BBEGIN]] to i8*
+// CK2-DAG: [[BENDV:%.+]] = bitcast %class.B* [[BEND]] to i8*
+// CK2-DAG: [[BBEGINI:%.+]] = ptrtoint i8* [[BBEGINV]] to i64
+// CK2-DAG: [[BENDI:%.+]] = ptrtoint i8* [[BENDV]] to i64
+// CK2-DAG: [[BSIZE:%.+]] = sub i64 [[BENDI]], [[BBEGINI]]
+// CK2-DAG: [[BUSIZE:%.+]] = sdiv exact i64 [[BSIZE]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
+// CK2-DAG: [[BPTRADDR0BC:%.+]] = bitcast %class.C* [[OBJ]] to i8*
+// CK2-DAG: [[PTRADDR0BC:%.+]] = bitcast %class.B* [[BBEGIN]] to i8*
+// CK2-DAG: [[PRESIZE:%.+]] = call i64 @__tgt_mapper_num_components(i8* [[HANDLE]])
+// CK2-DAG: [[SHIPRESIZE:%.+]] = shl i64 [[PRESIZE]], 48
+// CK2-DAG: br label %[[MEMBER:[^,]+]]
+// CK2-DAG: [[MEMBER]]
+// CK2-DAG: br i1 true, label %[[LTYPE:[^,]+]], label %[[MEMBERCOM:[^,]+]]
+// CK2-DAG: [[MEMBERCOM]]
+// CK2-DAG: [[MEMBERCOMTYPE:%.+]] = add nuw i64 32, [[SHIPRESIZE]]
+// CK2-DAG: br label %[[LTYPE]]
+// CK2-DAG: [[LTYPE]]
+// CK2-DAG: [[MEMBERTYPE:%.+]] = phi i64 [ 32, %[[MEMBER]] ], [ [[MEMBERCOMTYPE]], %[[MEMBERCOM]] ]
+// CK2-DAG: [[TYPETF:%.+]] = and i64 [[TYPE]], 3
+// CK2-DAG: [[ISALLOC:%.+]] = icmp eq i64 [[TYPETF]], 0
+// CK2-DAG: br i1 [[ISALLOC]], label %[[ALLOC:[^,]+]], label %[[ALLOCELSE:[^,]+]]
+// CK2-DAG: [[ALLOC]]
+// CK2-DAG: [[ALLOCTYPE:%.+]] = and i64 [[MEMBERTYPE]], -4
+// CK2-DAG: br label %[[TYEND:[^,]+]]
+// CK2-DAG: [[ALLOCELSE]]
+// CK2-DAG: [[ISTO:%.+]] = icmp eq i64 [[TYPETF]], 1
+// CK2-DAG: br i1 [[ISTO]], label %[[TO:[^,]+]], label %[[TOELSE:[^,]+]]
+// CK2-DAG: [[TO]]
+// CK2-DAG: [[TOTYPE:%.+]] = and i64 [[MEMBERTYPE]], -3
+// CK2-DAG: br label %[[TYEND]]
+// CK2-DAG: [[TOELSE]]
+// CK2-DAG: [[ISFROM:%.+]] = icmp eq i64 [[TYPETF]], 2
+// CK2-DAG: br i1 [[ISFROM]], label %[[FROM:[^,]+]], label %[[TYEND]]
+// CK2-DAG: [[FROM]]
+// CK2-DAG: [[FROMTYPE:%.+]] = and i64 [[MEMBERTYPE]], -2
+// CK2-DAG: br label %[[TYEND]]
+// CK2-DAG: [[TYEND]]
+// CK2-DAG: [[TYPE0:%.+]] = phi i64 [ [[ALLOCTYPE]], %[[ALLOC]] ], [ [[TOTYPE]], %[[TO]] ], [ [[FROMTYPE]], %[[FROM]] ], [ [[MEMBERTYPE]], %[[TOELSE]] ]
+// CK2-64: call void @__tgt_push_mapper_component(i8* [[HANDLE]], i8* [[BPTRADDR0BC]], i8* [[PTRADDR0BC]], i64 [[BUSIZE]], i64 [[TYPE0]])
+// CK2-DAG: [[BPTRADDR1BC:%.+]] = bitcast %class.C* [[OBJ]] to i8*
+// CK2-DAG: [[PTRADDR1BC:%.+]] = bitcast %class.B* [[BBEGIN]] to i8*
+// CK2-DAG: br label %[[MEMBER:[^,]+]]
+// CK2-DAG: [[MEMBER]]
+// CK2-DAG: br i1 false, label %[[LTYPE:[^,]+]], label %[[MEMBERCOM:[^,]+]]
+// CK2-DAG: [[MEMBERCOM]]
+// 281474976710659 == 0x1,000,000,003
+// CK2-DAG: [[MEMBERCOMTYPE:%.+]] = add nuw i64 281474976710659, [[SHIPRESIZE]]
+// CK2-DAG: br label %[[LTYPE]]
+// CK2-DAG: [[LTYPE]]
+// CK2-DAG: [[MEMBERTYPE:%.+]] = phi i64 [ 281474976710659, %[[MEMBER]] ], [ [[MEMBERCOMTYPE]], %[[MEMBERCOM]] ]
+// CK2-DAG: [[TYPETF:%.+]] = and i64 [[TYPE]], 3
+// CK2-DAG: [[ISALLOC:%.+]] = icmp eq i64 [[TYPETF]], 0
+// CK2-DAG: br i1 [[ISALLOC]], label %[[ALLOC:[^,]+]], label %[[ALLOCELSE:[^,]+]]
+// CK2-DAG: [[ALLOC]]
+// CK2-DAG: [[ALLOCTYPE:%.+]] = and i64 [[MEMBERTYPE]], -4
+// CK2-DAG: br label %[[TYEND:[^,]+]]
+// CK2-DAG: [[ALLOCELSE]]
+// CK2-DAG: [[ISTO:%.+]] = icmp eq i64 [[TYPETF]], 1
+// CK2-DAG: br i1 [[ISTO]], label %[[TO:[^,]+]], label %[[TOELSE:[^,]+]]
+// CK2-DAG: [[TO]]
+// CK2-DAG: [[TOTYPE:%.+]] = and i64 [[MEMBERTYPE]], -3
+// CK2-DAG: br label %[[TYEND]]
+// CK2-DAG: [[TOELSE]]
+// CK2-DAG: [[ISFROM:%.+]] = icmp eq i64 [[TYPETF]], 2
+// CK2-DAG: br i1 [[ISFROM]], label %[[FROM:[^,]+]], label %[[TYEND]]
+// CK2-DAG: [[FROM]]
+// CK2-DAG: [[FROMTYPE:%.+]] = and i64 [[MEMBERTYPE]], -2
+// CK2-DAG: br label %[[TYEND]]
+// CK2-DAG: [[TYEND]]
+// CK2-DAG: [[TYPE1:%.+]] = phi i64 [ [[ALLOCTYPE]], %[[ALLOC]] ], [ [[TOTYPE]], %[[TO]] ], [ [[FROMTYPE]], %[[FROM]] ], [ [[MEMBERTYPE]], %[[TOELSE]] ]
+// CK2: call void [[BMPRFUNC]](i8* [[HANDLE]], i8* [[BPTRADDR1BC]], i8* [[PTRADDR1BC]], i64 8, i64 [[TYPE1]])
+// CK2: [[PTRNEXT]] = getelementptr %class.C*, %class.C** [[PTR]], i32 1
+// CK2: [[ISDONE:%.+]] = icmp eq %class.C** [[PTRNEXT]], [[PTREND]]
+// CK2: br i1 [[ISDONE]], label %[[LEXIT:[^,]+]], label %[[LBODY]]
+
+// CK2: [[LEXIT]]
+// CK2: [[ISARRAY:%.+]] = icmp sge i64 [[SIZE]], 1
+// CK2: br i1 [[ISARRAY]], label %[[EVALDEL:[^,]+]], label %[[DONE]]
+// CK2: [[EVALDEL]]
+// CK2: [[TYPEDEL:%.+]] = and i64 [[TYPE]], 8
+// CK2: [[ISDEL:%.+]] = icmp ne i64 [[TYPEDEL]], 0
+// CK2: br i1 [[ISDEL]], label %[[DEL:[^,]+]], label %[[DONE]]
+// CK2: [[DEL]]
+// CK2-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 16
+// CK2-DAG: [[DTYPE:%.+]] = and i64 [[TYPE]], -4
+// CK2: call void @__tgt_push_mapper_component(i8* [[HANDLE]], i8* [[BPTR]], i8* [[BEGIN]], i64 [[ARRSIZE]], i64 [[DTYPE]])
+// CK2: br label %[[DONE]]
+// CK2: [[DONE]]
+// CK2: ret void
+
+#endif // CK2
+
+
+///==========================================================================///
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix CK3 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix CK3 %s
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix CK3 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix CK3 %s
+
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+
+#ifdef CK3
+// map of array sections and nested components.
+
+// CK3-LABEL: @.__omp_offloading_{{.*}}foo{{.*}}.region_id = weak constant i8 0
+// CK3: [[TYPES:@.+]] = {{.+}}constant [3 x i64] [i64 32, i64 281474976710659, i64 35]
+
+class C {
+public:
+  int a;
+  double *b;
+};
+
+class B {
+public:
+  C c;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a, s.b[0:2])
+
+// CK3: define {{.*}}void [[MPRFUNC:@[.]omp_mapper[.].*C[.]id]](i8*{{.*}}, i8*{{.*}}, i8*{{.*}}, i64{{.*}}, i64{{.*}})
+
+// CK3-LABEL: define {{.*}}void @{{.*}}foo{{.*}}
+void foo(int a){
+  // CK3-DAG: [[CVAL:%.+]] = alloca [10 x %class.C]
+  // CK3-DAG: [[BVAL:%.+]] = alloca %class.B
+  C c[10];
+  B b;
+
+  // CK3-DAG: [[BC:%.+]] = getelementptr inbounds %class.B, %class.B* [[BVAL]], i32 0, i32 0
+  // CK3-DAG: [[BCEND:%.+]] = getelementptr %class.C, %class.C* [[BC]], i32 1
+  // CK3-DAG: [[BCC:%.+]] = bitcast %class.C* [[BC]] to i8*
+  // CK3-DAG: [[BCENDC:%.+]] = bitcast %class.C* [[BCEND]] to i8*
+  // CK3-DAG: [[BCI:%.+]] = ptrtoint i8* [[BCC]] to i64
+  // CK3-DAG: [[BCENDI:%.+]] = ptrtoint i8* [[BCENDC]] to i64
+  // CK3-DAG: [[BSIZE:%.+]] = sub i64 [[BCENDI]], [[BCI]]
+  // CK3-DAG: [[BSIZED:%.+]] = sdiv exact i64 [[BSIZE]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
+
+  // CK3-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}}, i8** [[MPRGEP:%.+]])
+  // CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK3-DAG: [[SGEP]] = getelementptr inbounds {{.+}}[[SIZES:%[^,]+]], i32 0, i32 0
+  // CK3-DAG: [[MPRGEP]] = bitcast [3 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK3-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK3-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[SIZES]], i32 0, i32 0
+  // CK3-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i{{64|32}} 0, i{{64|32}} 0
+  // CK3-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.B**
+  // CK3-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK3-DAG: store %class.B* [[BVAL]], %class.B** [[CBP1]]
+  // CK3-DAG: store %class.C* [[BC]], %class.C** [[CP1]]
+  // CK3-DAG: store i64 [[BSIZED]], i64* [[S1]]
+  // CK3-DAG: store i8* null, i8** [[MPR1]]
+  // CK3-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
+  // CK3-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
+  // CK3-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[SIZES]], i32 0, i32 1
+  // CK3-DAG: [[MPR2:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i{{64|32}} 0, i{{64|32}} 1
+  // CK3-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to %class.B**
+  // CK3-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to %class.C**
+  // CK3-DAG: store %class.B* [[BVAL]], %class.B** [[CBP2]]
+  // CK3-DAG: store %class.C* [[BC]], %class.C** [[CP2]]
+  // CK3-64-DAG: store i64 16, i64* [[S2]]
+  // CK3-32-DAG: store i64 8, i64* [[S2]]
+  // CK3-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR2]]
+  // CK3-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 2
+  // CK3-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 2
+  // CK3-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[SIZES]], i32 0, i32 2
+  // CK3-DAG: [[MPR3:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i{{64|32}} 0, i{{64|32}} 2
+  // CK3-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to [10 x %class.C]**
+  // CK3-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to %class.C**
+  // CK3-DAG: store [10 x %class.C]* [[CVAL]], [10 x %class.C]** [[CBP3]]
+  // CK3-DAG: [[CVALGEP:%.+]] = getelementptr inbounds {{.+}}[[CVAL]], i{{64|32}} 0, i{{64|32}} 0
+  // CK3-DAG: store %class.C* [[CVALGEP]], %class.C** [[CP3]]
+  // CK3-64-DAG: store i64 160, i64* [[S3]]
+  // CK3-32-DAG: store i64 80, i64* [[S3]]
+  // CK3-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR3]]
+  // CK3: call void [[KERNEL:@.+]](%class.B* [[BVAL]], [10 x %class.C]* [[CVAL]])
+  #pragma omp target map(mapper(id),tofrom: c[0:10], b.c)
+  for (int i = 0; i < 10; i++) {
+    b.c.a += ++c[i].a;
+  }
+}
+
+
+// CK3: define internal void [[KERNEL]](%class.B* {{[^,]+}}, [10 x %class.C]* {{[^,]+}})
+
+#endif // CK3
 
-#endif
+#endif // HEADER
diff --git a/clang/test/OpenMP/declare_target_link_codegen.cpp b/clang/test/OpenMP/declare_target_link_codegen.cpp
index ff0c6096c8422..dd62fa9d67308 100644
--- a/clang/test/OpenMP/declare_target_link_codegen.cpp
+++ b/clang/test/OpenMP/declare_target_link_codegen.cpp
@@ -77,9 +77,9 @@ int maini1() {
 
 // HOST: [[BP0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASEPTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // HOST: [[P0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// HOST: call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP0]], i8** [[P0]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPTYPES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0))
+// HOST: call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP0]], i8** [[P0]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPTYPES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0), i8** null)
 // HOST: call void @__omp_offloading_{{.*}}_{{.*}}_{{.*}}maini1{{.*}}_l42(i32* %{{[^,]+}})
-// HOST: call i32 @__tgt_target_teams(i64 -1, i8* @.__omp_offloading_{{.+}}_l47.region_id, i32 2, {{.+}})
+// HOST: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.__omp_offloading_{{.+}}_l47.region_id, i32 2, {{.+}})
 
 // HOST: define internal void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l42(i32* nonnull align {{[0-9]+}} dereferenceable{{.*}})
 // HOST: [[C:%.*]] = load i32, i32* @c,
diff --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp
index 4e8bcb44f63df..d4484ce377c87 100644
--- a/clang/test/OpenMP/distribute_codegen.cpp
+++ b/clang/test/OpenMP/distribute_codegen.cpp
@@ -278,7 +278,7 @@ void test_precond() {
 
 // HCHECK: load i16, i16*
 // HCHECK: store i16 %
-// HCHECK: call i32 @__tgt_target_teams(
+// HCHECK: call i32 @__tgt_target_teams_mapper(
 // HCHECK: call void @__kmpc_for_static_init_4(
 template 
 T ftemplate() {
diff --git a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp
index 9aac97da86c21..5d0905231c0aa 100644
--- a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -209,7 +209,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](
 // CHECK: ret
 
@@ -310,7 +310,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
index 278e18ec31157..9c45186522697 100644
--- a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -200,7 +200,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -304,7 +304,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
index 6b57290f32ef2..e0e2515fe630b 100644
--- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
@@ -110,25 +110,25 @@ int main() {
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]](
 
     // no schedule clauses
@@ -877,25 +877,25 @@ int main() {
 #else
   // CHECK-LABEL: @main
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
 
   // CHECK: call{{.+}} [[TMAIN:@.+]]()
@@ -1627,25 +1627,25 @@ int main() {
 // check code
 // CHECK: define{{.+}} [[TMAIN]]()
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
 
 // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]](
diff --git a/clang/test/OpenMP/distribute_parallel_for_default_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_default_messages.cpp
index 0629ba096d0c2..67e4615ae8c01 100644
--- a/clang/test/OpenMP/distribute_parallel_for_default_messages.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_default_messages.cpp
@@ -2,8 +2,17 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 template 
 T tmain(T argc) {
   int i;
@@ -14,12 +23,12 @@ T tmain(T argc) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp distribute parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -34,7 +43,7 @@ T tmain(T argc) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -62,12 +71,12 @@ int main(int argc, char **argv) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp distribute parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -82,7 +91,7 @@ int main(int argc, char **argv) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -98,5 +107,15 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i) // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
     foo();
 
+#ifdef OMP51
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  for (i = 0; i < argc; ++i) {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return (tmain(argc) + tmain(argv[0][0])); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}}
 }
diff --git a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
index ed1b9774149aa..ee24b1cc18251 100644
--- a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
@@ -69,7 +69,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -275,7 +275,7 @@ int main() {
 // CHECK-LABEL: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_0:@.+]](
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
 
@@ -462,7 +462,7 @@ int main() {
 // CHECK-LABEL: define{{.*}} i{{[0-9]+}} @{{.+}}tmain{{.+}}()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_0:@.+]](
 // CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]])
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
index 7e7f24f3c9e8e..770a93bef5f73 100644
--- a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
@@ -23,9 +23,9 @@ int Arg;
 void gtid_test() {
 #pragma omp target
 #pragma omp teams
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #pragma omp distribute parallel for
   for(int i = 0 ; i < 100; i++) {}
@@ -87,11 +87,11 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
 #pragma omp target
diff --git a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
index 087aef8b2b41a..5b24ede6adde3 100644
--- a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -269,7 +269,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -484,7 +484,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
index 8d941391c75b4..61763660b7dfa 100644
--- a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
@@ -22,7 +22,7 @@ void foo();
 struct S {
   intptr_t a, b, c;
   S(intptr_t a) : a(a) {}
-  operator char() { return a; }
+  operator char() { extern void mayThrow(); mayThrow(); return a; }
   ~S() {}
 };
 
@@ -44,9 +44,9 @@ int tmain() {
 int main() {
   S s(0);
   char a = s;
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 // CHECK: invoke{{.+}} [[TMAIN_5:@.+]]()
 // CHECK: invoke{{.+}} [[TMAIN_1:@.+]]()
@@ -82,16 +82,16 @@ int main() {
 
 // tmain 5
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_1:@.+]](
 
 // tmain 1
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_2:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_3:@.+]](
 
 // CHECK: define internal void [[T_OFFLOADING_FUN_0]](
diff --git a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
index a731031db7341..40bd67d685ab4 100644
--- a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]()
@@ -164,7 +164,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_0:@.+]](
 
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
@@ -247,7 +247,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]])
 // CHECK: ret
diff --git a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
index 3e2a65e47f0e9..b04948acab1b6 100644
--- a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
@@ -49,9 +49,9 @@ int main() {
   return tmain();
 }
 
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL1:@.+]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL2:@.+]]()
 // CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
 // CHECK: ret i32 [[CALL_RET]]
@@ -81,7 +81,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.+}} [[TMAIN]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL3:@.+]]()
 
 // CHECK: define{{.+}} [[OFFL3]]()
diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp
index dc2a3ca5350fd..6aa2f4b3ef129 100644
--- a/clang/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp
@@ -45,7 +45,7 @@ int main() {
 }
 
 // CHECK-LABEL: main
-// CHECK: call{{.+}} @__tgt_target_teams(
+// CHECK: call{{.+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFL:@.+]](
 // CHECK: call{{.+}} [[TMAIN:@.+]](i{{32|64}}
 // CHECK: ret
@@ -64,7 +64,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.+}} [[TMAIN]](i{{32|64}}
-// CHECK: call{{.+}} @__tgt_target_teams(
+// CHECK: call{{.+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[TOFFL:@.+]](
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
index 66576e1e8e241..fba510bea51f0 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
@@ -109,25 +109,25 @@ int main() {
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]](
 
     // no schedule clauses
@@ -876,25 +876,25 @@ int main() {
 #else
   // CHECK-LABEL: @main
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
 
   // CHECK: call{{.+}} [[TMAIN:@.+]]()
@@ -1626,25 +1626,25 @@ int main() {
 // check code
 // CHECK: define{{.+}} [[TMAIN]]()
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
 
 // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]](
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp
index b9c5546ec5d95..9aab00f16c48f 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp
@@ -2,8 +2,17 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s -Wuninitialized -DOMP51 -fopenmp-version=51
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized -DOMP51 -fopenmp-version=51
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 template 
 T tmain(T argc) {
   int i;
@@ -14,12 +23,12 @@ T tmain(T argc) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp distribute parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -34,7 +43,7 @@ T tmain(T argc) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -62,12 +71,12 @@ int main(int argc, char **argv) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp distribute parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -82,7 +91,7 @@ int main(int argc, char **argv) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -90,6 +99,15 @@ int main(int argc, char **argv) {
 #pragma omp distribute parallel for simd default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (i = 0; i < argc; ++i)  // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
     foo();
+#ifdef OpenMP51
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  for (i = 0; i < argc; ++i) {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
 
 #pragma omp parallel default(none) // expected-note 2 {{explicit data sharing attribute requested here}}
 #pragma omp target
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
index 8e96aa51c5323..9a605e0ce0211 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -69,7 +69,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -274,7 +274,7 @@ int main() {
 // CHECK-LABEL: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_0:@.+]](
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
 
@@ -463,7 +463,7 @@ int main() {
 // CHECK-LABEL: define{{.*}} i{{[0-9]+}} @{{.+}}tmain{{.+}}()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_0:@.+]](
 // CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]])
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
index dc8230d42ccac..cb10008f2d8a8 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
@@ -29,9 +29,9 @@ int Arg;
 void gtid_test() {
 #pragma omp target
 #pragma omp teams
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #pragma omp distribute parallel for simd
   for(int i = 0 ; i < 100; i++) {}
@@ -93,11 +93,11 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
 #pragma omp target
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
index 9a36bbf29cae5..ff766b5e1a921 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -279,7 +279,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -494,7 +494,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
index 318fc1401963c..375c1d2fb8f59 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
@@ -22,7 +22,7 @@ void foo();
 struct S {
   intptr_t a, b, c;
   S(intptr_t a) : a(a) {}
-  operator char() { return a; }
+  operator char() {  extern void mayThrow(); mayThrow(); return a; }
   ~S() {}
 };
 
@@ -44,9 +44,9 @@ int tmain() {
 int main() {
   S s(0);
   char a = s;
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 // CHECK: invoke{{.+}} [[TMAIN_5:@.+]]()
 // CHECK: invoke{{.+}} [[TMAIN_1:@.+]]()
@@ -82,16 +82,16 @@ int main() {
 
 // tmain 5
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_1:@.+]](
 
 // tmain 1
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_2:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_3:@.+]](
 
 // CHECK: define internal void [[T_OFFLOADING_FUN_0]](
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
index 629e023b6597f..9900cb3726014 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]()
@@ -164,7 +164,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_0:@.+]](
 
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
@@ -247,7 +247,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]])
 // CHECK: ret
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
index 716d7d7fa2e9a..5af0d797c0017 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -49,9 +49,9 @@ int main() {
   return tmain();
 }
 
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL1:@.+]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL2:@.+]]()
 // CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
 // CHECK: ret i32 [[CALL_RET]]
@@ -81,7 +81,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.+}} [[TMAIN]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL3:@.+]]()
 
 // CHECK: define{{.+}} [[OFFL3]]()
diff --git a/clang/test/OpenMP/distribute_private_codegen.cpp b/clang/test/OpenMP/distribute_private_codegen.cpp
index c470e8f012227..02b267734fa0d 100644
--- a/clang/test/OpenMP/distribute_private_codegen.cpp
+++ b/clang/test/OpenMP/distribute_private_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]()
@@ -159,7 +159,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](
 // CHECK: ret
 
@@ -191,7 +191,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_simd_codegen.cpp b/clang/test/OpenMP/distribute_simd_codegen.cpp
index 7229c8095f0e2..32257a1ac119d 100644
--- a/clang/test/OpenMP/distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_codegen.cpp
@@ -312,7 +312,7 @@ void test_precond() {
 
 // HCHECK: load i16, i16*
 // HCHECK: store i16 %
-// HCHECK: call i32 @__tgt_target_teams(
+// HCHECK: call i32 @__tgt_target_teams_mapper(
 // HCHECK: call void @__kmpc_for_static_init_4(
 template 
 T ftemplate() {
diff --git a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
index 60557c1bfb650..2857e56022d3a 100644
--- a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -208,7 +208,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](
 // CHECK: ret
 
@@ -306,7 +306,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
index 264b0e14ac449..2c6869feb2fc4 100644
--- a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -208,7 +208,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -311,7 +311,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_simd_private_codegen.cpp b/clang/test/OpenMP/distribute_simd_private_codegen.cpp
index f675158be694f..30773ea489a1a 100644
--- a/clang/test/OpenMP/distribute_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_private_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]()
@@ -159,7 +159,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](
 // CHECK: ret
 
@@ -191,7 +191,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp
index 63fb75e000cbb..d24a47be3e355 100644
--- a/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp
@@ -46,7 +46,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -123,7 +123,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -169,7 +169,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/driver.c b/clang/test/OpenMP/driver.c
index fa5bd1a8b5f8d..047478256f9f5 100644
--- a/clang/test/OpenMP/driver.c
+++ b/clang/test/OpenMP/driver.c
@@ -47,6 +47,7 @@
 // RUN: %clang %s -c -E -dM -fopenmp-simd -fopenmp-version=31 | FileCheck --check-prefix=CHECK-VERSION %s
 // RUN: %clang %s -c -E -dM -fopenmp-simd -fopenmp-version=40 | FileCheck --check-prefix=CHECK-VERSION %s
 // RUN: %clang %s -c -E -dM -fopenmp-simd -fopenmp-version=45 | FileCheck --check-prefix=CHECK-VERSION %s
+// RUN: %clang %s -c -E -dM -fopenmp-simd -fopenmp-version=51 | FileCheck --check-prefix=CHECK-VERSION %s
 
 // CHECK-VERSION-NOT: #define _OPENMP
 
diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp
index 26b09c574f3c7..71e481b3cd78b 100644
--- a/clang/test/OpenMP/for_codegen.cpp
+++ b/clang/test/OpenMP/for_codegen.cpp
@@ -536,7 +536,7 @@ void test_precond() {
 }
 
 // TERM_DEBUG-LABEL: foo
-int foo() {return 0;};
+int foo() { extern void mayThrow(); mayThrow(); return 0;};
 
 // TERM_DEBUG-LABEL: parallel_for
 void parallel_for(float *a) {
diff --git a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp
index 45962b3ed2b1d..31168bc325e3a 100644
--- a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp
+++ b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp
@@ -203,9 +203,11 @@ int main() {
 // For + reduction operation initial value of private variable is -1.
 // CHECK: call void [[RED_INIT1:@.+]](float* %{{.+}}, float* %{{.+}})
 
+// CHECK: call void @_ZN1SIfEC1Ev([[S_FLOAT_TY]]* [[VAR_PRIV]]
 // For & reduction operation initial value of private variable is defined by call of 'init()' function.
 // CHECK: call void [[RED_INIT2:@.+]](
 
+// CHECK: call void @_ZN1SIfEC1Ev([[S_FLOAT_TY]]* [[VAR1_PRIV]]
 // For && reduction operation initial value of private variable is 1.0.
 // CHECK: call void [[RED_INIT3:@.+]](
 
@@ -598,6 +600,17 @@ int main() {
 // CHECK: br i1 [[DONE]],
 
 // Check initialization of private copy.
+// CHECK: [[BEGIN:%.+]] = getelementptr inbounds [10 x [4 x [[S_FLOAT_TY]]]], [10 x [4 x [[S_FLOAT_TY]]]]* [[ARRS_PRIV]], i32 0, i32 0, i32 0
+// CHECK: [[END:%.+]] = getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[BEGIN]], i64 40
+// CHECK: br label %[[CTOR:[^,]+]]
+// CHECK: [[CTOR]]:
+// CHECK: [[CUR:%.+]] = phi [[S_FLOAT_TY]]* [ [[BEGIN]], %{{.+}} ], [ [[NEXT:%.+]], %[[CTOR]] ]
+// CHECK: call void @_ZN1SIfEC1Ev([[S_FLOAT_TY]]* [[CUR]])
+// CHECK: [[NEXT:%.+]] = getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[CUR]], i64 1
+// CHECK: [[IS_DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* [[NEXT]], [[END]]
+// CHECK: br i1 [[IS_DONE]], label %[[DONE:[^,]+]], label %[[CTOR]]
+// CHECK: [[DONE]]:
+
 // CHECK: [[BEGIN:%.+]] = getelementptr inbounds [10 x [4 x [[S_FLOAT_TY]]]], [10 x [4 x [[S_FLOAT_TY]]]]* [[ARRS_PRIV]], i32 0, i32 0, i32 0
 // CHECK: [[LHS_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* %{{.+}} to [[S_FLOAT_TY]]*
 // CHECK: [[END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[BEGIN]], i64 40
@@ -901,9 +914,11 @@ int main() {
 // For + reduction operation initial value of private variable is 0.
 // CHECK: call void [[RED_INIT6:@.+]](
 
+// CHECK: call void @_ZN1SIiEC1Ev([[S_INT_TY]]* [[VAR_PRIV]]
 // For & reduction operation initial value of private variable is ones in all bits.
 // CHECK: call void [[RED_INIT2:@.+]](
 
+// CHECK: call void @_ZN1SIiEC1Ev([[S_INT_TY]]* [[VAR1_PRIV]]
 // For && reduction operation initial value of private variable is 1.0.
 // CHECK: call void [[RED_INIT7:@.+]](
 
diff --git a/clang/test/OpenMP/for_simd_codegen.cpp b/clang/test/OpenMP/for_simd_codegen.cpp
index a668cb77a0683..5bb9811bcedf4 100644
--- a/clang/test/OpenMP/for_simd_codegen.cpp
+++ b/clang/test/OpenMP/for_simd_codegen.cpp
@@ -20,7 +20,7 @@
 #ifndef HEADER
 #define HEADER
 
-long long get_val() { return 0; }
+long long get_val() { extern void mayThrow(); mayThrow(); return 0; }
 double *g_ptr;
 
 // CHECK-LABEL: define {{.*void}} @{{.*}}simple{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
@@ -785,7 +785,7 @@ void widened(float *a, float *b, float *c, float *d) {
 }
 
 // TERM_DEBUG-LABEL: bar
-int bar() {return 0;};
+int bar() { extern void mayThrow(); mayThrow(); return 0; };
 
 // TERM_DEBUG-LABEL: parallel_simd
 void parallel_simd(float *a) {
diff --git a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
new file mode 100644
index 0000000000000..552455eb97791
--- /dev/null
+++ b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
@@ -0,0 +1,110 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
+//  %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o /tmp/t1 %s
+//  %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch /tmp/t1 -verify %s -emit-llvm -o - | FileCheck --check-prefixes=ALL-DEBUG,IRBUILDER-DEBUG %s
+
+// expected-no-diagnostics
+
+// TODO: Teach the update script to check new functions too.
+
+#ifndef HEADER
+#define HEADER
+
+// ALL-LABEL: @_Z17nested_parallel_0v(
+// ALL-NEXT:  entry:
+// ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// ALL-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// ALL:       omp_parallel:
+// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z17nested_parallel_0v..omp_par.1 to void (i32*, i32*, ...)*))
+// ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT12:%.*]]
+// ALL:       omp.par.outlined.exit12:
+// ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// ALL:       omp.par.exit.split:
+// ALL-NEXT:    ret void
+//
+void nested_parallel_0(void) {
+#pragma omp parallel
+  {
+#pragma omp parallel
+    {
+    }
+  }
+}
+
+// ALL-LABEL: @_Z17nested_parallel_1Pfid(
+// ALL-NEXT:  entry:
+// ALL-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
+// ALL-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// ALL-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// ALL-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
+// ALL-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+// ALL-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+// ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// ALL-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// ALL:       omp_parallel:
+// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT13:%.*]]
+// ALL:       omp.par.outlined.exit13:
+// ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// ALL:       omp.par.exit.split:
+// ALL-NEXT:    ret void
+//
+void nested_parallel_1(float *r, int a, double b) {
+#pragma omp parallel
+  {
+#pragma omp parallel
+    {
+      *r = a + b;
+    }
+  }
+}
+
+// ALL-LABEL: @_Z17nested_parallel_2Pfid(
+// ALL-NEXT:  entry:
+// ALL-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
+// ALL-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// ALL-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// ALL-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
+// ALL-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+// ALL-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+// ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// ALL-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// ALL:       omp_parallel:
+// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT55:%.*]]
+// ALL:       omp.par.outlined.exit55:
+// ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// ALL:       omp.par.exit.split:
+// ALL-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// ALL-NEXT:    [[CONV56:%.*]] = sitofp i32 [[TMP0]] to double
+// ALL-NEXT:    [[TMP1:%.*]] = load double, double* [[B_ADDR]], align 8
+// ALL-NEXT:    [[ADD57:%.*]] = fadd double [[CONV56]], [[TMP1]]
+// ALL-NEXT:    [[CONV58:%.*]] = fptrunc double [[ADD57]] to float
+// ALL-NEXT:    [[TMP2:%.*]] = load float*, float** [[R_ADDR]], align 8
+// ALL-NEXT:    store float [[CONV58]], float* [[TMP2]], align 4
+// ALL-NEXT:    ret void
+//
+void nested_parallel_2(float *r, int a, double b) {
+#pragma omp parallel
+  {
+    *r = a + b;
+#pragma omp parallel
+    {
+      *r = a + b;
+#pragma omp parallel
+      {
+        *r = a + b;
+      }
+      *r = a + b;
+#pragma omp parallel
+      {
+        *r = a + b;
+      }
+      *r = a + b;
+    }
+    *r = a + b;
+  }
+  *r = a + b;
+}
+
+#endif
diff --git a/clang/test/OpenMP/master_codegen.cpp b/clang/test/OpenMP/master_codegen.cpp
index 9a33f2f53b0d7..8554ad8e7deca 100644
--- a/clang/test/OpenMP/master_codegen.cpp
+++ b/clang/test/OpenMP/master_codegen.cpp
@@ -19,7 +19,7 @@
 
 // ALL:       define {{.*}}void [[FOO:@.+]]()
 
-void foo() {}
+void foo() { extern void mayThrow(); mayThrow(); }
 
 // ALL-LABEL: @main
 // TERM_DEBUG-LABEL: @main
diff --git a/clang/test/OpenMP/nvptx_allocate_codegen.cpp b/clang/test/OpenMP/nvptx_allocate_codegen.cpp
index 46565443354ed..01542ca4044a8 100644
--- a/clang/test/OpenMP/nvptx_allocate_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_allocate_codegen.cpp
@@ -101,7 +101,7 @@ void bar() {
 // CHECK: alloca float,
 // CHECK-NOT: alloca double,
 // CHECK: load float, float* %
-// CHECK: store double {{.+}}, double addrspace(3)* @bar_b,
+// CHECK: store double {{.+}}, double* addrspacecast (double addrspace(3)* @bar_b to double*),
 }
 
 #pragma omp end declare target
diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp
index 2ee6bd2b4701c..1372246c7fc8c 100644
--- a/clang/test/OpenMP/nvptx_data_sharing.cpp
+++ b/clang/test/OpenMP/nvptx_data_sharing.cpp
@@ -55,7 +55,7 @@ void test_ds(){
 // CK1: [[A:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 0
 // CK1: [[B:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 1
 // CK1: store i32 10, i32* [[A]]
-// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i16 1)
+// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}})
 // CK1: call void @__kmpc_begin_sharing_variables(i8*** [[SHAREDARGS1]], i64 1)
 // CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]]
 // CK1: [[SHARGSTMP2:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP1]], i64 0
@@ -65,7 +65,7 @@ void test_ds(){
 // CK1: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
 // CK1: call void @__kmpc_end_sharing_variables()
 // CK1: store i32 100, i32* [[B]]
-// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i16 1)
+// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}})
 // CK1: call void @__kmpc_begin_sharing_variables(i8*** [[SHAREDARGS2]], i64 2)
 // CK1: [[SHARGSTMP3:%.+]] = load i8**, i8*** [[SHAREDARGS2]]
 // CK1: [[SHARGSTMP4:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP3]], i64 0
diff --git a/clang/test/OpenMP/nvptx_lambda_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_capturing.cpp
index 8fe918b043cf4..38e9f0d03efd6 100644
--- a/clang/test/OpenMP/nvptx_lambda_capturing.cpp
+++ b/clang/test/OpenMP/nvptx_lambda_capturing.cpp
@@ -131,7 +131,7 @@ int main(int argc, char **argv) {
 
 // HOST-LABEL: @main
 
-// HOST-DAG: call i32 @__tgt_target(i64 -1, i8* @{{.+}}, i32 11, i8** [[BASES:%.+]], i8** [[PTRS:%.+]],
+// HOST-DAG: call i32 @__tgt_target_mapper(i64 -1, i8* @{{.+}}, i32 11, i8** [[BASES:%.+]], i8** [[PTRS:%.+]],
 // HOST-DAG: [[BASES:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[BASE_PTR:%.+]], i32 0, i32 0
 // HOST-DAG: [[PTRS:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[PTR_PTR:%.+]], i32 0, i32 0
 // HOST-DAG: [[BASE_REF:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[BASE_PTR]], i32 0, i32 6
diff --git a/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp
index 033794726ff0c..3a7b0d8fbaf4f 100644
--- a/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp
+++ b/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp
@@ -78,7 +78,7 @@ int main()
 // actual target invocation
 // CHECK: [[BASES_GEP:%.+]] = getelementptr {{.+}} [3 x {{.+}}*], [3 x {{.+}}*]* [[BASE_PTRS]], {{.+}} 0, {{.+}} 0
 // CHECK: [[PTRS_GEP:%.+]] = getelementptr {{.+}} [3 x {{.+}}*], [3 x {{.+}}*]* [[PTRS]], {{.+}} 0, {{.+}} 0
-// CHECK: {{%.+}} = call{{.+}} @__tgt_target_teams({{.+}}, {{.+}}, {{.+}}, i8** [[BASES_GEP]], i8** [[PTRS_GEP]], i[[PTRSZ]]* getelementptr inbounds ([3 x i{{.+}}], [3 x i{{.+}}]* [[SIZES]], i{{.+}} 0, i{{.+}} 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[TYPES]], i{{.+}} 0, i{{.+}} 0), {{.+}}, {{.+}})
+// CHECK: {{%.+}} = call{{.+}} @__tgt_target_teams_mapper({{.+}}, {{.+}}, {{.+}}, i8** [[BASES_GEP]], i8** [[PTRS_GEP]], i[[PTRSZ]]* getelementptr inbounds ([3 x i{{.+}}], [3 x i{{.+}}]* [[SIZES]], i{{.+}} 0, i{{.+}} 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[TYPES]], i{{.+}} 0, i{{.+}} 0), i8** null, {{.+}}, {{.+}})
 
 
   omp_loop(0,100,body);
@@ -120,6 +120,6 @@ int main()
 // actual target invocation
 // CHECK: [[BASES_GEP:%.+]] = getelementptr {{.+}} [5 x {{.+}}*], [5 x {{.+}}*]* [[BASE_PTRS]], {{.+}} 0, {{.+}} 0
 // CHECK: [[PTRS_GEP:%.+]] = getelementptr {{.+}} [5 x {{.+}}*], [5 x {{.+}}*]* [[PTRS]], {{.+}} 0, {{.+}} 0
-// CHECK: {{%.+}} = call{{.+}} @__tgt_target_teams({{.+}}, {{.+}}, {{.+}}, i8** [[BASES_GEP]], i8** [[PTRS_GEP]], i[[PTRSZ]]* getelementptr inbounds ([5 x i{{.+}}], [5 x i{{.+}}]* [[SIZES_TEMPLATE]], i{{.+}} 0, i{{.+}} 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[TYPES_TEMPLATE]], i{{.+}} 0, i{{.+}} 0), {{.+}}, {{.+}})
+// CHECK: {{%.+}} = call{{.+}} @__tgt_target_teams_mapper({{.+}}, {{.+}}, {{.+}}, i8** [[BASES_GEP]], i8** [[PTRS_GEP]], i[[PTRSZ]]* getelementptr inbounds ([5 x i{{.+}}], [5 x i{{.+}}]* [[SIZES_TEMPLATE]], i{{.+}} 0, i{{.+}} 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[TYPES_TEMPLATE]], i{{.+}} 0, i{{.+}} 0), i8** null, {{.+}}, {{.+}})
 
 #endif
diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp
index c8b15c8f6e3ba..ad25e0d775d12 100644
--- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp
@@ -92,7 +92,7 @@ int bar(int n){
 //
 // CHECK: [[AWAIT_WORK]]
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#CONVERGENT:]]
-// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]
+// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]])
 // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
@@ -166,13 +166,13 @@ int bar(int n){
 // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
 // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
 // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
-// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN1]]_wrapper to i8*),
+// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN1]]_wrapper to i8*))
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
 // CHECK: call void @__kmpc_serialized_parallel(
 // CHECK: {{call|invoke}} void [[PARALLEL_FN3:@.+]](
 // CHECK: call void @__kmpc_end_serialized_parallel(
-// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN2]]_wrapper to i8*),
+// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN2]]_wrapper to i8*))
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
 // CHECK-64-DAG: load i32, i32* [[REF_A]]
@@ -211,7 +211,7 @@ int bar(int n){
 //
 // CHECK: [[AWAIT_WORK]]
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
-// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]],
+// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]])
 // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
@@ -291,7 +291,7 @@ int bar(int n){
 // CHECK: br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]]
 //
 // CHECK: [[IF_THEN]]
-// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN4]]_wrapper to i8*),
+// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN4]]_wrapper to i8*))
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
 // CHECK: br label {{%?}}[[IF_END:.+]]
diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp
index 91f31185d8c1a..56f04cb01f0aa 100644
--- a/clang/test/OpenMP/nvptx_target_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_codegen.cpp
@@ -612,7 +612,7 @@ int baz(int f, double &a) {
 // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]])
 // CHECK: br label
 
-// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1)
+// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*))
 // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_PTR:%.+]], i{{64|32}} 2)
 // CHECK: [[SHARED:%.+]] = load i8**, i8*** [[SHARED_PTR]],
 // CHECK: [[REF:%.+]] = getelementptr inbounds i8*, i8** [[SHARED]], i{{64|32}} 0
diff --git a/clang/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp b/clang/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp
index 877aa7ab0b622..90fc2b21c2f6e 100644
--- a/clang/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp
+++ b/clang/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp
@@ -71,7 +71,7 @@ int bar(int n){
 // CHECK-HOST: [[BPTR7:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK-HOST: [[BPTR8:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0
 
-// CHECK-HOST: call i32 @__tgt_target(i64 -1, i8* @{{.*}}.region_id, i32 2, i8** [[BPTR7]], i8** [[BPTR8]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_MAPTYPES]], i32 0, i32 0))
+// CHECK-HOST: call i32 @__tgt_target_mapper(i64 -1, i8* @{{.*}}.region_id, i32 2, i8** [[BPTR7]], i8** [[BPTR8]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_MAPTYPES]], i32 0, i32 0), i8** null)
 
 // CHECK-DEVICE: [[VAR_LINK:@.+]] = weak global double* null
 // CHECK-DEVICE: [[VAR_TO:@.+]] = weak global double* null
diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
index 3ab955fa85080..8ff393f074e4a 100644
--- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
@@ -68,7 +68,7 @@ int bar(int n){
   //
   // CHECK: [[AWAIT_WORK]]
   // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
-  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i16 1)
+  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]])
   // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
   // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
   // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
@@ -154,7 +154,7 @@ int bar(int n){
   //
   // CHECK: [[AWAIT_WORK]]
   // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
-  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i16 1)
+  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]])
   // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
   // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
   // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
index fe294bbddf2b7..4f23f18730cc2 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
@@ -88,7 +88,7 @@ int bar(int n){
   // CHECK: [[I_ADDR:%.+]] = getelementptr inbounds [[GLOB_TY]], [[GLOB_TY]]* [[RD]], i32 0, i32 0
   //
   // CHECK: call void @__kmpc_for_static_init_4(
-  // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1)
+  // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*))
   // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_VARS_PTR:%.+]], i{{64|32}} 1)
   // CHECK: [[SHARED_VARS_BUF:%.+]] = load i8**, i8*** [[SHARED_VARS_PTR]],
   // CHECK: [[VARS_BUF:%.+]] = getelementptr inbounds i8*, i8** [[SHARED_VARS_BUF]], i{{64|32}} 0
diff --git a/clang/test/OpenMP/openmp_offload_codegen.cpp b/clang/test/OpenMP/openmp_offload_codegen.cpp
index 8df1745054d32..4e3cf82bf8311 100644
--- a/clang/test/OpenMP/openmp_offload_codegen.cpp
+++ b/clang/test/OpenMP/openmp_offload_codegen.cpp
@@ -35,8 +35,9 @@ void target_maps_parallel_integer(int a){
 // CK1: [[GEPOP:%.+]] = getelementptr inbounds {{.*}}
 // CK1: [[GEPOPBIT:%.+]] = bitcast i8** [[GEPOP]]
 // CK1: store i32* %ParamToKernel, i32** [[GEPOPBIT]]
+// CK1: [[GEPMAPPERARG:%.+]] = getelementptr inbounds {{.*}}
 // CK1: [[GEPOBPARG:%.+]] = getelementptr inbounds {{.*}}
 // CK1: [[GEPOPARG:%.+]] = getelementptr inbounds {{.*}}
-// CK1: call {{.*}}tgt_target({{.*}}i8** [[GEPOBPARG]], i8** [[GEPOPARG]]
+// CK1: call {{.*}}tgt_target_mapper({{.*}}i8** [[GEPOBPARG]], i8** [[GEPOPARG]]{{.*}}, i8** null)
 
 #endif
diff --git a/clang/test/OpenMP/parallel_default_messages.cpp b/clang/test/OpenMP/parallel_default_messages.cpp
index 6b8ad67051850..b098c43852a85 100644
--- a/clang/test/OpenMP/parallel_default_messages.cpp
+++ b/clang/test/OpenMP/parallel_default_messages.cpp
@@ -4,18 +4,25 @@
 // RUN: %clang_cc1 -verify=expected,ge40 -fopenmp-version=40 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
 // RUN: %clang_cc1 -verify -fopenmp-version=31 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
 // RUN: %clang_cc1 -verify -fopenmp-version=30 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
+// RUN: %clang_cc1 -verify=expected,ge40 -fopenmp-version=51 -fopenmp -DOMP51 -ferror-limit 100 -o - %s -Wuninitialized
+// RUN: %clang_cc1 -verify=expected,ge40 -fopenmp-version=51 -fopenmp-simd -DOMP51 -ferror-limit 100 -o - %s -Wuninitialized
 
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   const int c = 0;
 
   #pragma omp parallel default // expected-error {{expected '(' after 'default'}}
-  #pragma omp parallel default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
-  #pragma omp parallel default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
-  #pragma omp parallel default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
-  #pragma omp parallel default (shared), default(shared) // expected-error {{directive '#pragma omp parallel' cannot contain more than one 'default' clause}}
-  #pragma omp parallel default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel default(  // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp parallel default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
+#pragma omp parallel default(none                     // expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp parallel default(shared), default(shared) // expected-error {{directive '#pragma omp parallel' cannot contain more than one 'default' clause}}
+#pragma omp parallel default(x)                       // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
 
   #pragma omp parallel default(none) // expected-note {{explicit data sharing attribute requested here}}
@@ -27,5 +34,14 @@ int main(int argc, char **argv) {
 
   #pragma omp parallel default(none) // ge40-note {{explicit data sharing attribute requested here}}
   (void)c; // ge40-error {{variable 'c' must have explicitly specified data sharing attributes}}
+
+#ifdef OMP51
+#pragma omp parallel default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp
index de445634470bb..4ef6e8228808f 100644
--- a/clang/test/OpenMP/parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/parallel_for_codegen.cpp
@@ -372,7 +372,7 @@ void runtime(float *a, float *b, float *c, float *d) {
 }
 
 // TERM_DEBUG-LABEL: foo
-int foo() {return 0;};
+int foo() { extern void mayThrow(); mayThrow(); return 0; };
 
 // TERM_DEBUG-LABEL: parallel_for
 // CLEANUP: parallel_for
diff --git a/clang/test/OpenMP/parallel_for_default_messages.cpp b/clang/test/OpenMP/parallel_for_default_messages.cpp
index b02fa8803a3b3..c64b76948c018 100644
--- a/clang/test/OpenMP/parallel_for_default_messages.cpp
+++ b/clang/test/OpenMP/parallel_for_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -DOMP51 -ferror-limit 100 -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=51 -DOMP51 -ferror-limit 100 -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   int i;
 #pragma omp parallel for default // expected-error {{expected '(' after 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel for default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{explicit data sharing attribute requested here}}
@@ -21,7 +30,7 @@ int main(int argc, char **argv) {
 #pragma omp parallel for default(shared), default(shared) // expected-error {{directive '#pragma omp parallel for' cannot contain more than one 'default' clause}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 
@@ -34,5 +43,13 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i) // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
     foo();
 
+#ifdef OMP51
+#pragma omp parallel for default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  for (i = 0; i < argc; ++i) {
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/parallel_for_simd_codegen.cpp b/clang/test/OpenMP/parallel_for_simd_codegen.cpp
index e9cc2f302eafc..715328771ccce 100644
--- a/clang/test/OpenMP/parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/parallel_for_simd_codegen.cpp
@@ -22,7 +22,7 @@
 #ifndef HEADER
 #define HEADER
 
-long long get_val() { return 0; }
+long long get_val() { extern void mayThrow(); mayThrow(); return 0; }
 double *g_ptr;
 
 // CHECK-LABEL: define {{.*void}} @{{.*}}simple{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
@@ -801,7 +801,7 @@ for (int i = 0; i < 10; ++i);
 // OMP50-DAG: ![[NOVM]] = !{!"llvm.loop.vectorize.enable", i1 false}
 
 // TERM_DEBUG-LABEL: bar
-int bar() {return 0;};
+int bar() { extern void mayThrow(); mayThrow(); return 0; };
 
 // TERM_DEBUG-LABEL: parallel_simd
 void parallel_simd(float *a) {
diff --git a/clang/test/OpenMP/parallel_for_simd_default_messages.cpp b/clang/test/OpenMP/parallel_for_simd_default_messages.cpp
index 570ee14bbc84b..6368d280de5db 100644
--- a/clang/test/OpenMP/parallel_for_simd_default_messages.cpp
+++ b/clang/test/OpenMP/parallel_for_simd_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   int i;
 #pragma omp parallel for simd default // expected-error {{expected '(' after 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for simd default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for simd default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel for simd default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{explicit data sharing attribute requested here}}
@@ -21,7 +30,7 @@ int main(int argc, char **argv) {
 #pragma omp parallel for simd default(shared), default(shared) // expected-error {{directive '#pragma omp parallel for simd' cannot contain more than one 'default' clause}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for simd default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 
@@ -34,5 +43,13 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i) // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}} expected-error {{variable 'i' must have explicitly specified data sharing attributes}}
     foo();
 
+#ifdef OMP51
+#pragma omp parallel for default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  for (i = 0; i < argc; ++i) {
+    x++; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    y++; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/parallel_master_codegen.cpp b/clang/test/OpenMP/parallel_master_codegen.cpp
index 9ffa941314b98..98993e05c8530 100644
--- a/clang/test/OpenMP/parallel_master_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_codegen.cpp
@@ -18,7 +18,7 @@
 // CK1-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 // CK1-LABEL: foo
-void foo() {}
+void foo() { extern void mayThrow(); mayThrow(); }
 
 void parallel_master() {
 #pragma omp parallel master
@@ -118,6 +118,162 @@ void parallel_master_private() {
 
 #endif
 
+#ifdef CK31
+///==========================================================================///
+// RUN: %clang_cc1 -DCK31 -fopenmp-version=51 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix CK31
+// RUN: %clang_cc1 -DCK31 -fopenmp-version=51 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK31 -fopenmp-version=51 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK31
+
+// RUN: %clang_cc1 -DCK31 -fopenmp-version=51 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK31 -fopenmp-version=51 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK31 -fopenmp-version=51 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// CK31-DAG:   %struct.ident_t = type { i32, i32, i32, i32, i8* }
+// CK31-DAG:   [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+
+void parallel_master_default_firstprivate() {
+  int a;
+#pragma omp parallel master default(firstprivate)
+  a++;
+}
+
+// CK31-LABEL: define void @{{.+}}parallel_master{{.+}}
+// CK31:       [[A_VAL:%.+]] = alloca i32{{.+}}
+// CK31:       [[A_CASTED:%.+]] = alloca i64
+// CK31:       [[ZERO_VAL:%.+]] = load i32, i32* [[A_VAL]]
+// CK31:       [[CONV:%.+]] = bitcast i64* [[A_CASTED]] to i32*
+// CK31:       store i32 [[ZERO_VAL]], i32* [[CONV]]
+// CK31:       [[ONE_VAL:%.+]] = load i64, i64* [[A_CASTED]]
+// CK31:       call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[ONE_VAL]])
+// CK31:       ret void
+
+// CK31:       [[GLOBAL_TID_ADDR:%.+]] = alloca i32*
+// CK31:       [[BOUND_TID_ADDR:%.+]] = alloca i32*
+// CK31:       [[A_ADDR:%.+]] = alloca i64{{.+}}
+// CK31:       store i32* [[GLOBAL_TID:%.+]], i32** [[GLOBAL_TID_ADDR]]{{.+}}
+// CK31:       store i32* [[BOUND_TID:%.+]], i32** [[BOUND_TID_ADDR]]
+// CK31:       store i64 [[A_VAL]], i64* [[A_ADDR]]
+// CK31:       [[CONV]] = bitcast i64* [[A_ADDR]]
+// CK31:       [[ZERO_VAL]] = load i32*, i32** [[GLOBAL_TID_ADDR]]
+// CK31:       [[ONE_VAL]] = load i32, i32* [[ZERO_VAL]]
+// CK31:       [[TWO_VAL:%.+]] = call i32 @__kmpc_master(%struct.ident_t* @0, i32 [[ONE_VAL]])
+// CK31:       [[THREE:%.+]] = icmp ne i32 [[TWO_VAL]], 0
+// CK31:       br i1 %3, label [[OMP_IF_THEN:%.+]], label [[OMP_IF_END:%.+]]
+
+// CK31:       [[FOUR:%.+]] = load i32, i32* [[CONV:%.+]]
+// CK31:       [[INC:%.+]] = add nsw i32 [[FOUR]]
+// CK31:       store i32 [[INC]], i32* [[CONV]]
+// CK31:       call void @__kmpc_end_master(%struct.ident_t* @0, i32 [[ONE_VAL]])
+// CK31:       br label [[OMP_IF_END]]
+
+// CK31:       ret void
+
+#endif
+
+#ifdef CK32
+///==========================================================================///
+// RUN: %clang_cc1 -DCK32 -fopenmp-version=51 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix CK32
+// RUN: %clang_cc1 -DCK32 -fopenmp-version=51 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK32 -fopenmp-version=51 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK32
+
+// RUN: %clang_cc1 -DCK32 -fopenmp-version=51 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK32 -fopenmp-version=51 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK32 -fopenmp-version=51 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// CK32-DAG:   %struct.ident_t = type { i32, i32, i32, i32, i8* }
+// CK32-DAG:   [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+
+struct St {
+  int a, b;
+  static int y;
+  St() : a(0), b(0) {}
+  ~St() {}
+};
+int St::y = 0;
+
+void parallel_master_default_firstprivate() {
+  St a = St();
+  static int y = 0;
+#pragma omp parallel master default(firstprivate)
+  {
+    a.a += 1;
+    a.b += 1;
+    y++;
+    a.y++;
+  }
+}
+
+// CK32-LABEL: define {{.+}} @{{.+}}parallel_master_default_firstprivate{{.+}}
+// CK32: [[A_VAL:%.+]] = alloca %struct.St{{.+}}
+// CK32: [[Y_CASTED:%.+]] = alloca i64
+// CK32: call void @[[CTOR:.+]](%struct.St* [[A_VAL]])
+// CK32: [[ZERO:%.+]] = load i32, i32* @{{.+}}parallel_master_default_firstprivate{{.+}}
+// CK32: [[CONV:%.+]] = bitcast i64* [[Y_CASTED]] to i32*
+// CK32: store i32 [[ZERO]], i32* [[CONV]]
+// CK32: [[ONE:%.+]] = load i64, i64* [[Y_CASTED]]
+// CK32: call void {{.+}}@{{.+}} %struct.St* [[A_VAL]], i64 [[ONE]])
+// CK32: call void [[DTOR:@.+]](%struct.St* [[A_VAL]])
+
+// CK32: [[THIS_ADDR:%.+]] = alloca %struct.St*
+// CK32: store %struct.St* [[THIS:%.+]], %struct.St** [[THIS_ADDR]]
+// CK32: [[THIS_ONE:%.+]] = load %struct.St*, %struct.St** [[THIS_ADDR]]
+// CK32: call void [[CTOR_2:.+]](%struct.St* [[THIS_ONE]])
+// CK32: ret void
+
+// CK32: [[GLOBAL_TID_ADDR:%.+]] = alloca i32*
+// CK32: [[BOUND_TID_ADDR:%.+]] = alloca i32*
+// CK32: [[A_ADDR:%.+]] = alloca %struct.St
+// CK32: [[Y_ADDR:%.+]] = alloca i64
+// CK32: store i32* [[GLOBAL_TID:%.+]], i32** [[GLOBAL_TID_ADDR]]
+// CK32: store i32* %.bound_tid., i32** [[BOUND_TID_ADDR]]
+// CK32: store %struct.St* [[A_VAL]], %struct.St** [[A_ADDR]]{{.+}}
+// CK32: store i64 [[Y:%.+]], i64* [[Y_ADDR]]
+// CK32: [[ONE:%.+]] = load i32*, i32** [[GLOBAL_TID_ADDR]]
+// CK32: [[TWO:%.+]] = load i32, i32* [[ONE]]
+// CK32: [[THREE:%.+]] = call i32 @{{.+}} i32 [[TWO]])
+// CK32: [[FOUR:%.+]] = icmp ne i32 [[THREE]], 0
+// CK32: br i1 [[FOUR]], label [[IF_THEN:%.+]], label [[IF_END:%.+]]
+
+// CK32: [[A_1:%.+]] = getelementptr inbounds %struct.St, %struct.St* [[ZERO]], i32 0, i32 0
+// CK32: [[FIVE:%.+]] = load i32, i32* [[A_1]]
+// CK32: [[ADD:%.+]] = add nsw i32 [[FIVE]], 1
+// CK32: store i32 [[ADD]], i32* [[A_1]]
+// CK32: [[B:%.+]] = getelementptr inbounds %struct.St, %struct.St* [[ZERO]], i32 0, i32 1
+// CK32: [[SIX:%.+]] = load i32, i32* [[B]]
+// CK32: [[ADD_2:%.+]] = add nsw i32 [[SIX]], 1
+// CK32: store i32 [[ADD_2]], i32* [[B]]
+// CK32: [[SEVEN:%.+]] = load i32, i32* [[CONV]]
+// CK32: [[INC:%.+]] = add nsw i32 [[SEVEN]], 1
+// CK32: store i32 [[INC]], i32* [[CONV]]
+// CK32: [[EIGHT:%.+]] = load i32, i32* [[FUNC:@.+]]
+// CK32: [[INC_3:%.+]] = add nsw i32 [[EIGHT]], 1
+// CK32: store i32 [[INC_3]], i32* @{{.+}}
+// CK32: call void @{{.+}} i32 [[TWO]])
+// CK32: br label [[IF_END]]
+
+// CK32: [[DTOR]](%struct.St* [[THIS]])
+// CK32: [[THIS_ADDR]] = alloca %struct.St*
+// CK32: store %struct.St* [[THIS]], %struct.St** [[THIS_ADDR]]
+// CK32: [[THIS_ONE]] = load %struct.St*, %struct.St** [[THIS_ADDR]]
+// CK32: call void @_ZN2StD2Ev(%struct.St* [[THIS_ONE]])
+
+// CK32: [[THIS_ADDR]] = alloca %struct.St*
+// CK32: store %struct.St* [[THIS]], %struct.St** [[THIS_ADDR]]
+// CK32: [[THIS_ONE]] = load %struct.St*, %struct.St** [[THIS_ADDR]]
+// CK32: [[A_VAL]] = getelementptr inbounds %struct.St, %struct.St* [[THIS_ONE]], i32 0, i32 0
+// CK32: store i32 0, i32* [[A_VAL]]
+// CK32: [[B_VAL:%.+]] = getelementptr inbounds %struct.St, %struct.St* [[THIS_ONE]], i32 0, i32 1
+// CK32: store i32 0, i32* [[B_VAL]]
+// CK32: ret void
+
+// CK32: [[THIS_ADDR:%.+]] = alloca %struct.St*
+// CK32: store %struct.St* %this, %struct.St** [[THIS_ADDR]]
+// CK32: [[THIS_ONE]] = load %struct.St*, %struct.St** [[THIS_ADDR]]
+
+#endif
+
 #ifdef CK4
 ///==========================================================================///
 // RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix CK4
diff --git a/clang/test/OpenMP/parallel_master_default_messages.cpp b/clang/test/OpenMP/parallel_master_default_messages.cpp
index 557cba5aa322a..39f78ea53ae16 100644
--- a/clang/test/OpenMP/parallel_master_default_messages.cpp
+++ b/clang/test/OpenMP/parallel_master_default_messages.cpp
@@ -2,20 +2,29 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
 #pragma omp parallel master default // expected-error {{expected '(' after 'default'}}
   {
-#pragma omp parallel master default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp parallel master default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
     {
-#pragma omp parallel master default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel master default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
       {
 #pragma omp parallel master default(none // expected-error {{expected ')'}} expected-note {{to match this '('}}
         {
 #pragma omp parallel master default(shared), default(shared) // expected-error {{directive '#pragma omp parallel master' cannot contain more than one 'default' clause}}
           {
-#pragma omp parallel master default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel master default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
             {
               foo();
             }
@@ -37,5 +46,14 @@ int main(int argc, char **argv) {
       ++argc;  // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
     }
   }
+
+#ifdef OMP51
+#pragma omp parallel master default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/parallel_num_threads_codegen.cpp b/clang/test/OpenMP/parallel_num_threads_codegen.cpp
index 79615b9341687..47109ffc7af37 100644
--- a/clang/test/OpenMP/parallel_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/parallel_num_threads_codegen.cpp
@@ -22,7 +22,7 @@ void foo();
 struct S {
   intptr_t a, b, c;
   S(intptr_t a) : a(a) {}
-  operator char() { return a; }
+  operator char() { extern void mayThrow(); mayThrow(); return a; }
   ~S() {}
 };
 
diff --git a/clang/test/OpenMP/parallel_sections_codegen.cpp b/clang/test/OpenMP/parallel_sections_codegen.cpp
index eadc4937203a3..bee078050256f 100644
--- a/clang/test/OpenMP/parallel_sections_codegen.cpp
+++ b/clang/test/OpenMP/parallel_sections_codegen.cpp
@@ -10,9 +10,9 @@
 #ifndef HEADER
 #define HEADER
 // CHECK-LABEL: foo
-void foo() {};
+void foo() { extern void mayThrow(); mayThrow(); };
 // CHECK-LABEL: bar
-void bar() {};
+void bar() { extern void mayThrow(); mayThrow(); };
 
 template 
 T tmain() {
diff --git a/clang/test/OpenMP/parallel_sections_default_messages.cpp b/clang/test/OpenMP/parallel_sections_default_messages.cpp
index d6a10fe56b344..cfa95445fb536 100644
--- a/clang/test/OpenMP/parallel_sections_default_messages.cpp
+++ b/clang/test/OpenMP/parallel_sections_default_messages.cpp
@@ -7,15 +7,15 @@ void foo();
 int main(int argc, char **argv) {
 #pragma omp parallel sections default // expected-error {{expected '(' after 'default'}}
   {
-#pragma omp parallel sections default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp parallel sections default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
     {
-#pragma omp parallel sections default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel sections default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
       {
 #pragma omp parallel sections default(none // expected-error {{expected ')'}} expected-note {{to match this '('}}
         {
 #pragma omp parallel sections default(shared), default(shared) // expected-error {{directive '#pragma omp parallel sections' cannot contain more than one 'default' clause}}
           {
-#pragma omp parallel sections default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel sections default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
             {
               foo();
             }
diff --git a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
new file mode 100644
index 0000000000000..163f0b92468af
--- /dev/null
+++ b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
@@ -0,0 +1,102 @@
+// RUN: %clang_cc1                                 -verify=host                                                              -Rpass=openmp -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1                                 -verify=all,safe                                                          -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
+// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify=all,safe                                                          -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
+
+// host-no-diagnostics
+
+void bar1(void) {
+#pragma omp parallel // #0
+                     // all-remark@#0 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // safe-remark@#0 {{Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}}
+                     // force-remark@#0 {{[UNSAFE] Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will rewrite the state machine use due to command line flag, this can lead to undefined behavior if the parallel region is called from a target region outside this translation unit.}}
+                     // force-remark@#0 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__2_wrapper, kernel ID: }}
+  {
+  }
+}
+void bar2(void) {
+#pragma omp parallel // #1
+                     // all-remark@#1 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // safe-remark@#1 {{Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}}
+                     // force-remark@#1 {{[UNSAFE] Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will rewrite the state machine use due to command line flag, this can lead to undefined behavior if the parallel region is called from a target region outside this translation unit.}}
+                     // force-remark@#1 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__6_wrapper, kernel ID: }}
+  {
+  }
+}
+
+void foo1(void) {
+#pragma omp target teams // #2
+                         // all-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading}}
+                         // all-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading}}
+  {
+#pragma omp parallel // #3
+                     // all-remark@#3 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // all-remark@#3 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading}}
+    {
+    }
+    bar1();
+#pragma omp parallel // #4
+                     // all-remark@#4 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // all-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading}}
+    {
+    }
+  }
+}
+
+void foo2(void) {
+#pragma omp target teams // #5
+                         // all-remark@#5 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__5_wrapper, kernel ID: __omp_offloading}}
+                         // all-remark@#5 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__7_wrapper, kernel ID: __omp_offloading}}
+  {
+#pragma omp parallel // #6
+                     // all-remark@#6 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // all-remark@#6 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__5_wrapper, kernel ID: __omp_offloading}}
+    {
+    }
+    bar1();
+    bar2();
+#pragma omp parallel // #7
+                     // all-remark@#7 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // all-remark@#7 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__7_wrapper, kernel ID: __omp_offloading}}
+    {
+    }
+    bar1();
+    bar2();
+  }
+}
+
+void foo3(void) {
+#pragma omp target teams // #8
+                         // all-remark@#8 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__9_wrapper, kernel ID: __omp_offloading}}
+                         // all-remark@#8 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__10_wrapper, kernel ID: __omp_offloading}}
+  {
+#pragma omp parallel // #9
+                     // all-remark@#9 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // all-remark@#9 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__9_wrapper, kernel ID: __omp_offloading}}
+    {
+    }
+    bar1();
+    bar2();
+#pragma omp parallel // #10
+                     // all-remark@#10 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // all-remark@#10 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__10_wrapper, kernel ID: __omp_offloading}}
+    {
+    }
+    bar1();
+    bar2();
+  }
+}
+
+void spmd(void) {
+  // Verify we do not emit the remarks above for "SPMD" regions.
+#pragma omp target teams
+#pragma omp parallel
+  {
+  }
+
+#pragma omp target teams distribute parallel for
+  for (int i = 0; i < 100; ++i) {
+  }
+}
+
+// all-remark@* 3 {{OpenMP runtime call __kmpc_global_thread_num moved to}}
+// all-remark@* 3 {{OpenMP runtime call __kmpc_global_thread_num deduplicated}}
diff --git a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
new file mode 100644
index 0000000000000..97507041e1953
--- /dev/null
+++ b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1                                 -verify=host -Rpass=openmp -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1                                 -verify      -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
+// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify      -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
+
+// host-no-diagnostics
+
+void bar(void) {
+#pragma omp parallel // #1                                                                                                                                                                                                                                                                                                                                           \
+                     // expected-remark@#1 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
+                     // expected-remark@#1 {{Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}}
+  {
+  }
+}
+
+void foo(void) {
+#pragma omp target teams // #2                                                                                                                                                                      \
+                         // expected-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading}} \
+                         // expected-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading}}
+  {
+#pragma omp parallel // #3                                                                                                                                                                                                                                                                                                                                           \
+                     // expected-remark@#3 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
+                     // expected-remark@#3 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading}}
+    {
+    }
+    bar();
+#pragma omp parallel // #4                                                                                                                                                                                                                                                                                                                                           \
+                     // expected-remark@#4 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
+                     // expected-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading}}
+    {
+    }
+  }
+}
+
+void spmd(void) {
+  // Verify we do not emit the remarks above for "SPMD" regions.
+#pragma omp target teams
+#pragma omp parallel
+  {
+  }
+
+#pragma omp target teams distribute parallel for
+  for (int i = 0; i < 100; ++i) {
+  }
+}
+
+// expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num moved to}}
+// expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num deduplicated}}
diff --git a/clang/test/OpenMP/sections_codegen.cpp b/clang/test/OpenMP/sections_codegen.cpp
index 68fd38f7d0bba..d33e79238459f 100644
--- a/clang/test/OpenMP/sections_codegen.cpp
+++ b/clang/test/OpenMP/sections_codegen.cpp
@@ -12,9 +12,9 @@
 // CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
 // CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 1026, i32 0, i32 0, i8*
 // CHECK-LABEL: foo
-void foo() {};
+void foo() { extern void mayThrow(); mayThrow(); };
 // CHECK-LABEL: bar
-void bar() {};
+void bar() { extern void mayThrow(); mayThrow(); };
 
 template 
 T tmain() {
diff --git a/clang/test/OpenMP/simd_codegen.cpp b/clang/test/OpenMP/simd_codegen.cpp
index cb53bb1aa38b8..8ba87dce82fcb 100644
--- a/clang/test/OpenMP/simd_codegen.cpp
+++ b/clang/test/OpenMP/simd_codegen.cpp
@@ -26,7 +26,7 @@
 // OMP50-DAG: [[LAST_IV:@.+]] = {{.*}}common global i64 0
 // OMP50-DAG: [[LAST_A:@.+]] = {{.*}}common global i32 0
 
-long long get_val() { return 0; }
+long long get_val() { extern void mayThrow(); mayThrow(); return 0; }
 double *g_ptr;
 
 struct S {
@@ -798,7 +798,7 @@ void bartfoo() {
 
 #endif // OMP5
 // TERM_DEBUG-LABEL: bar
-int bar() {return 0;};
+int bar() { extern void mayThrow(); mayThrow(); return 0; };
 
 // TERM_DEBUG-LABEL: parallel_simd
 void parallel_simd(float *a) {
diff --git a/clang/test/OpenMP/single_codegen.cpp b/clang/test/OpenMP/single_codegen.cpp
index a56cdb0ae81a4..1d88c2808ed29 100644
--- a/clang/test/OpenMP/single_codegen.cpp
+++ b/clang/test/OpenMP/single_codegen.cpp
@@ -42,7 +42,7 @@ TestClass tc;
 TestClass tc2[2];
 #pragma omp threadprivate(tc, tc2)
 
-void foo() {}
+void foo() { extern void mayThrow(); mayThrow(); }
 
 struct SS {
   int a;
diff --git a/clang/test/OpenMP/target_codegen.cpp b/clang/test/OpenMP/target_codegen.cpp
index b8fd7cf34b37a..9cec6bfa5a48f 100644
--- a/clang/test/OpenMP/target_codegen.cpp
+++ b/clang/test/OpenMP/target_codegen.cpp
@@ -127,7 +127,7 @@ int foo(int n) {
   // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
   // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
@@ -142,7 +142,7 @@ int foo(int n) {
   // CHECK-DAG:   store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
   // CHECK-DAG:   [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK-DAG:   [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** null)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -178,7 +178,7 @@ int foo(int n) {
     global += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0))
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i8** null)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -202,7 +202,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0))
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -258,7 +258,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0))
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i8** null)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -537,7 +537,7 @@ int bar(int n){
 // CHECK-32:    [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
 
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0))
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i8** null)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
@@ -617,7 +617,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0))
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i8** null)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -668,7 +668,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0))
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i8** null)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -724,7 +724,7 @@ int bar(int n){
 
 // OMP45:       [[BPR:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
 // OMP45:       [[PR:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
-// OMP45:       [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET9]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT10]], i32 0, i32 0))
+// OMP45:       [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET9]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT10]], i32 0, i32 0), i8** null)
 // OMP45-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // OMP45-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // OMP45:       [[FAIL]]
@@ -819,7 +819,7 @@ int bar(int n){
 
 // OMP50:       [[BPR:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
 // OMP50:       [[PR:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
-// OMP50:       [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET9]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT10]], i32 0, i32 0))
+// OMP50:       [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET9]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT10]], i32 0, i32 0), i8** null)
 // OMP50-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // OMP50-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // OMP50:       [[FAIL]]
diff --git a/clang/test/OpenMP/target_data_codegen.cpp b/clang/test/OpenMP/target_data_codegen.cpp
index f1c9f621bf748..274b3e16b2f69 100644
--- a/clang/test/OpenMP/target_data_codegen.cpp
+++ b/clang/test/OpenMP/target_data_codegen.cpp
@@ -50,7 +50,7 @@ void foo(int arg) {
   float lb[arg];
 
   // Region 00
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
   // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
@@ -65,7 +65,7 @@ void foo(int arg) {
 
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
 
-  // CK1-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
   // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
@@ -81,7 +81,7 @@ void foo(int arg) {
   // Region 02
   // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CK1: [[IFTHEN]]
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -100,7 +100,7 @@ void foo(int arg) {
   // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 
   // CK1: [[IFTHEN]]
-  // CK1-DAG: call void @__tgt_target_data_end(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
   // CK1: br label %[[IFEND:[^,]+]]
@@ -111,7 +111,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 03
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -129,7 +129,7 @@ void foo(int arg) {
   // CK1-32-DAG: [[CSVAL032]] = mul nuw i32 %{{[^,]+}}, 4
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
 
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S]]
@@ -140,7 +140,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 04
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -162,7 +162,7 @@ void foo(int arg) {
 
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
 
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
   #pragma omp target data map(to: gb.b[:3])
@@ -172,7 +172,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 05
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -190,7 +190,7 @@ void foo(int arg) {
   // CK1-32-DAG: [[CSVAL032]] = mul nuw i32 %{{[^,]+}}, 4
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
 
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S]]
@@ -201,7 +201,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 06
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -219,7 +219,7 @@ void foo(int arg) {
   // CK1-32-DAG: [[CSVAL032]] = mul nuw i32 %{{[^,]+}}, 4
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
 
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S]]
@@ -269,7 +269,7 @@ int bar(int arg){
 // Region 00
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK2-DAG: call void @__tgt_target_data_begin_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]]
@@ -307,7 +307,7 @@ int bar(int arg){
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK2-DAG: call void @__tgt_target_data_end_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
@@ -387,7 +387,7 @@ int bar(int arg){
 // Region 00
 // CK4: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK4: [[IFTHEN]]
-// CK4-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK4-DAG: call void @__tgt_target_data_begin_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK4-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK4-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK4-DAG: [[GEPBP]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]]
@@ -425,7 +425,7 @@ int bar(int arg){
 // CK4: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 
 // CK4: [[IFTHEN]]
-// CK4-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK4-DAG: call void @__tgt_target_data_end_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK4-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK4-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK4-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
@@ -491,4 +491,23 @@ void test_close_modifier(int arg) {
   {++arg;}
 }
 #endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK7 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK7 --check-prefix CK7-64
+// RUN: %clang_cc1 -DCK7 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK7 --check-prefix CK7-64
+
+// RUN: %clang_cc1 -DCK7 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY7 %s
+// RUN: %clang_cc1 -DCK7 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY7 %s
+// SIMD-ONLY7-NOT: {{__kmpc|__tgt}}
+#ifdef CK7
+// CK7: test_device_ptr_addr
+void test_device_ptr_addr(int arg) {
+  int *p;
+  // CK7: add nsw i32
+  // CK7: add nsw i32
+  #pragma omp target data use_device_ptr(p) use_device_addr(arg)
+  { ++arg, ++(*p); }
+}
+#endif
 #endif
diff --git a/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp b/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp
index 4598c0e91220a..a73566ba09fd2 100644
--- a/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp
+++ b/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp
@@ -85,7 +85,7 @@ int main() {
 // CHECK: store float* [[VLA_ADDR]], float** [[PTR4_VLA_ADDR]],
 // CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0
 // CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0
-// CHECK: call void @__tgt_target_data_begin(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZES1]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES1]], i32 0, i32 0))
+// CHECK: call void @__tgt_target_data_begin_mapper(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZES1]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES1]], i32 0, i32 0), i8** null)
 // CHECK: [[A_REF:%.+]] = load float*, float** [[BPTR0_A_ADDR]],
 // CHECK: [[REF_REF:%.+]] = load float*, float** [[BPTR2_REF_ADDR]],
 // CHECK: store float* [[REF_REF]], float** [[TMP_REF_ADDR:%.+]],
@@ -113,7 +113,7 @@ int main() {
 // CHECK: store float [[INC]], float* [[VLA0_ADDR]],
 // CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0
 // CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0
-// CHECK: call void @__tgt_target_data_end(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZES1]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES1]], i32 0, i32 0))
+// CHECK: call void @__tgt_target_data_end_mapper(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZES1]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES1]], i32 0, i32 0), i8** null)
 
 // CHECK: foo
 // %this.addr = alloca %struct.S*, align 8
@@ -187,7 +187,7 @@ int main() {
 // CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0
 // CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0
 // CHECK: [[SIZE:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 0
-// CHECK: call void @__tgt_target_data_begin(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES2]], i32 0, i32 0))
+// CHECK: call void @__tgt_target_data_begin_mapper(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES2]], i32 0, i32 0), i8** null)
 // CHECK: [[A_ADDR:%.+]] = load i32*, i32** [[BPTR1_A_ADDR]],
 // CHECK: store i32* [[A_ADDR]], i32** [[A_REF:%.+]],
 // CHECK: [[PTR_ADDR:%.+]] = load i32**, i32*** [[BPTR2_PTR_ADDR]],
@@ -219,6 +219,6 @@ int main() {
 // CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0
 // CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0
 // CHECK: [[SIZE:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 0
-// CHECK: call void @__tgt_target_data_end(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES2]], i32 0, i32 0))
+// CHECK: call void @__tgt_target_data_end_mapper(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES2]], i32 0, i32 0), i8** null)
 
 #endif
diff --git a/clang/test/OpenMP/target_defaultmap_codegen.cpp b/clang/test/OpenMP/target_defaultmap_codegen.cpp
index 3deff63273d53..f6119570974fc 100644
--- a/clang/test/OpenMP/target_defaultmap_codegen.cpp
+++ b/clang/test/OpenMP/target_defaultmap_codegen.cpp
@@ -30,7 +30,7 @@
 void implicit_maps_double_complex (int a){
   double _Complex dc = (double)a;
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -80,7 +80,7 @@ void implicit_maps_double_complex (int a){
 void implicit_maps_double_complex (int a){
   double _Complex dc = (double)a;
 
-  // CK2-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK2-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -130,7 +130,7 @@ void implicit_maps_double_complex (int a){
 void implicit_maps_double_complex (int a){
   double _Complex dc = (double)a;
 
-  // CK3-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK3-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -185,7 +185,7 @@ void implicit_maps_double_complex (int a){
 void implicit_maps_double (int a){
   double d = (double)a;
 
-  // CK4-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK4-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK4-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK4-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK4-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -252,7 +252,7 @@ void implicit_maps_double (int a){
 void implicit_maps_array (int a){
   double darr[2] = {(double)a, (double)a};
 
-  // CK5-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK5-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK5-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK5-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK5-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -303,7 +303,7 @@ void implicit_maps_array (int a){
 void implicit_maps_array (int a){
   double darr[2] = {(double)a, (double)a};
 
-  // CK6-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK6-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK6-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK6-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK6-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -354,7 +354,7 @@ void implicit_maps_array (int a){
 void implicit_maps_array (int a){
   double darr[2] = {(double)a, (double)a};
 
-  // CK7-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK7-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK7-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK7-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK7-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -405,7 +405,7 @@ void implicit_maps_array (int a){
 void implicit_maps_array (int a){
   double darr[2] = {(double)a, (double)a};
 
-  // CK8-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK8-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK8-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK8-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK8-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -459,7 +459,7 @@ void zero_size_section_and_private_maps (int ii){
   int pvtArr[10];
 
   // Region 09
-  // CK9-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK9-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK9-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK9-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -508,7 +508,7 @@ void zero_size_section_and_private_maps (int ii){
 void explicit_maps_single (){
   int *pa;
 
-  // CK10-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE]]{{.+}})
+  // CK10-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE]]{{.+}}, i8** null)
   // CK10-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK10-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -556,7 +556,7 @@ void explicit_maps_single (){
 void explicit_maps_single (){
   int *pa;
 
-  // CK11-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK11-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK11-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK11-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -604,7 +604,7 @@ void explicit_maps_single (){
 void explicit_maps_single (){
   int *pa;
 
-  // CK12-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK12-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK12-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK12-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -652,7 +652,7 @@ void explicit_maps_single (){
 void explicit_maps_single (){
   int *pa;
 
-  // CK13-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK13-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK13-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK13-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -700,7 +700,7 @@ void explicit_maps_single (){
 void explicit_maps_single (){
   int *pa;
 
-  // CK14-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK14-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK14-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK14-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -750,7 +750,7 @@ void explicit_maps_single (){
 void implicit_maps_variable_length_array (int a){
   double vla[2][a];
 
-  // CK15-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}})
+  // CK15-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK15-DAG: [[SGEP]] = getelementptr inbounds {{.+}}[[SS:%[^,]+]], i32 0, i32 0
@@ -835,7 +835,7 @@ class SSS {
 void implicit_maps_struct (int a){
   SSS s = {a, (double)a};
 
-  // CK16-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK16-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK16-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK16-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK16-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -892,7 +892,7 @@ class SSS {
 void implicit_maps_struct (int a){
   SSS s = {a, (double)a};
 
-  // CK17-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK17-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK17-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK17-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK17-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -949,7 +949,7 @@ class SSS {
 void implicit_maps_struct (int a){
   SSS s = {a, (double)a};
 
-  // CK18-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK18-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK18-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK18-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK18-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1006,7 +1006,7 @@ class SSS {
 void implicit_maps_struct (int a){
   SSS s = {a, (double)a};
 
-  // CK19-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK19-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK19-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1062,7 +1062,7 @@ void implicit_maps_struct (int a){
 void implicit_maps_double (int a){
   double d = (double)a;
 
-  // CK20-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK20-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK20-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK20-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK20-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1135,7 +1135,7 @@ class SSS {
 void implicit_maps_struct (int a){
   SSS s = {a, (double)a};
 
-  // CK21-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK21-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK21-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK21-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK21-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1186,7 +1186,7 @@ void implicit_maps_struct (int a){
 void implicit_maps_pointer (){
   double *ddyn;
 
-  // CK22-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK22-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK22-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK22-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1258,7 +1258,7 @@ void foo(float *&lr, T *&tr) {
   float *l;
   T *t;
 
-  // CK23-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES00]]{{.+}}, {{.+}}[[TYPES00]]{{.+}})
+  // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES00]]{{.+}}, {{.+}}[[TYPES00]]{{.+}}, i8** null)
   // CK23-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1275,7 +1275,7 @@ void foo(float *&lr, T *&tr) {
     ++g;
   }
 
-  // CK23-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES01]]{{.+}}, {{.+}}[[TYPES01]]{{.+}})
+  // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES01]]{{.+}}, {{.+}}[[TYPES01]]{{.+}}, i8** null)
   // CK23-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1292,7 +1292,7 @@ void foo(float *&lr, T *&tr) {
     ++l;
   }
 
-  // CK23-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES02]]{{.+}}, {{.+}}[[TYPES02]]{{.+}})
+  // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES02]]{{.+}}, {{.+}}[[TYPES02]]{{.+}}, i8** null)
   // CK23-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1309,7 +1309,7 @@ void foo(float *&lr, T *&tr) {
     ++t;
   }
 
-  // CK23-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES03]]{{.+}}, {{.+}}[[TYPES03]]{{.+}})
+  // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES03]]{{.+}}, {{.+}}[[TYPES03]]{{.+}}, i8** null)
   // CK23-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1327,7 +1327,7 @@ void foo(float *&lr, T *&tr) {
     ++lr;
   }
 
-  // CK23-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES04]]{{.+}}, {{.+}}[[TYPES04]]{{.+}})
+  // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES04]]{{.+}}, {{.+}}[[TYPES04]]{{.+}}, i8** null)
   // CK23-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1345,7 +1345,7 @@ void foo(float *&lr, T *&tr) {
     ++tr;
   }
 
-  // CK23-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES05]]{{.+}}, {{.+}}[[TYPES05]]{{.+}})
+  // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES05]]{{.+}}, {{.+}}[[TYPES05]]{{.+}}, i8** null)
   // CK23-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1363,7 +1363,7 @@ void foo(float *&lr, T *&tr) {
     ++tr;
   }
 
-  // CK23-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES06]]{{.+}}, {{.+}}[[TYPES06]]{{.+}})
+  // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES06]]{{.+}}, {{.+}}[[TYPES06]]{{.+}}, i8** null)
   // CK23-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1428,7 +1428,7 @@ void explicit_maps_single (int ii){
 
   // Close.
   // Region 00
-  // CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1447,7 +1447,7 @@ void explicit_maps_single (int ii){
 
   // Always Close.
   // Region 01
-  // CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
   // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1538,7 +1538,7 @@ void declare_target_link()
 #pragma omp target defaultmap(none:scalar) defaultmap(none:aggregate) defaultmap(none:pointer)
   {
 
-    // CK26-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+    // CK26-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
     // CK26-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
     // CK26-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
     // CK26-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 2
diff --git a/clang/test/OpenMP/target_depend_codegen.cpp b/clang/test/OpenMP/target_depend_codegen.cpp
index e8b07ace5fb05..97999eadf38c7 100644
--- a/clang/test/OpenMP/target_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_depend_codegen.cpp
@@ -121,7 +121,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START:%.+]], i[[SZ]] 1
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START]], i[[SZ]] 2
@@ -178,7 +178,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -195,7 +195,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]])
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_device_codegen.cpp b/clang/test/OpenMP/target_device_codegen.cpp
index 8117540d39396..4da7677e4ce7d 100644
--- a/clang/test/OpenMP/target_device_codegen.cpp
+++ b/clang/test/OpenMP/target_device_codegen.cpp
@@ -18,7 +18,7 @@ void foo(int n) {
   // CHECK:       store i32 [[N]], i32* [[DEVICE_CAP:%.+]],
   // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
@@ -31,7 +31,7 @@ void foo(int n) {
   // CHECK:       store i32 [[N]], i32* [[DEVICE_CAP:%.+]],
   // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
@@ -40,9 +40,9 @@ void foo(int n) {
   // CHECK:       [[END]]
   #pragma omp target device(device_num: n)
   ;
-  // CHECK-NOT:   call i32 @__tgt_target(
+  // CHECK-NOT:   call i32 @__tgt_target_mapper(
   // CHECK:       call void @__omp_offloading_{{.+}}_l46()
-  // CHECK-NOT:   call i32 @__tgt_target(
+  // CHECK-NOT:   call i32 @__tgt_target_mapper(
   #pragma omp target device(ancestor: n)
   ;
 }
diff --git a/clang/test/OpenMP/target_enter_data_codegen.cpp b/clang/test/OpenMP/target_enter_data_codegen.cpp
index 1bb2f76207ffc..541ea57b17e5b 100644
--- a/clang/test/OpenMP/target_enter_data_codegen.cpp
+++ b/clang/test/OpenMP/target_enter_data_codegen.cpp
@@ -50,7 +50,7 @@ void foo(int arg) {
   float lb[arg];
 
   // Region 00
-  // CK1-DAG: call void @__tgt_target_data_begin_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_nowait_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
   // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
@@ -76,7 +76,7 @@ void foo(int arg) {
   // Region 02
   // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CK1: [[IFTHEN]]
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -100,7 +100,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 03
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -125,7 +125,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 04
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -155,7 +155,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 05
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -180,7 +180,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 06
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -244,7 +244,7 @@ int bar(int arg){
 // Region 00
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK2-DAG: call void @__tgt_target_data_begin_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
@@ -394,7 +394,7 @@ int bar(int arg){
 // Region 00
 // CK5: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK5: [[IFTHEN]]
-// CK5-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK5-DAG: call void @__tgt_target_data_begin_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK5-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK5-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK5-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
diff --git a/clang/test/OpenMP/target_enter_data_depend_codegen.cpp b/clang/test/OpenMP/target_enter_data_depend_codegen.cpp
index 83e4cf8a89601..72d7ab933e977 100644
--- a/clang/test/OpenMP/target_enter_data_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_enter_data_depend_codegen.cpp
@@ -64,7 +64,7 @@ void foo(int arg) {
   // CK1: store i32 [[DEVICE]], i32* [[CAP_DEVICE]],
   // CK1: [[DEV1:%.+]] = load i32, i32* %{{.+}}
   // CK1: [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CK1: [[BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 0
   // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
@@ -130,7 +130,7 @@ void foo(int arg) {
   // CK1: [[IF_BOOL:%.+]] = trunc i8 [[IF]] to i1
   // CK1: [[IF:%.+]] = zext i1 [[IF_BOOL]] to i8
   // CK1: store i8 [[IF]], i8* [[IF_DEVICE]],
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
@@ -213,7 +213,7 @@ void foo(int arg) {
   // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
   // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
   // CK1: [[GEPS0:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[S]], i32 0, i32 0
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
@@ -298,7 +298,7 @@ void foo(int arg) {
   // CK1: store double* %{{.+}}, double** [[P1_BC]],
   // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
   // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|52}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{104|60}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
@@ -373,55 +373,61 @@ void foo(int arg) {
 }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, %struct.kmp_task_t_with_privates* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_begin_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_begin_nowait_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_begin(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY3]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
-
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1-NOT: __tgt_target_data_end
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY4]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
-
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [2 x i8*]*, [2 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [2 x i8*]*, [2 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [2 x i64]*, [2 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [2 x i8*]*, [2 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i64]** [[S_PRIV]], [2 x i8*]** [[M_PRIV]])
 // CK1-NOT: __tgt_target_data_end
 // CK1: ret i32 0
 // CK1: }
diff --git a/clang/test/OpenMP/target_exit_data_codegen.cpp b/clang/test/OpenMP/target_exit_data_codegen.cpp
index c045373b0e6f4..4413ce12a0363 100644
--- a/clang/test/OpenMP/target_exit_data_codegen.cpp
+++ b/clang/test/OpenMP/target_exit_data_codegen.cpp
@@ -51,7 +51,7 @@ void foo(int arg) {
 
   // Region 00
   // CK1-NOT: __tgt_target_data_begin
-  // CK1-DAG: call void @__tgt_target_data_end_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_nowait_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
   // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
@@ -77,7 +77,7 @@ void foo(int arg) {
   // CK1-NOT: __tgt_target_data_begin
   // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CK1: [[IFTHEN]]
-  // CK1-DAG: call void @__tgt_target_data_end(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -101,7 +101,7 @@ void foo(int arg) {
 
   // Region 03
   // CK1-NOT: __tgt_target_data_begin
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -126,7 +126,7 @@ void foo(int arg) {
 
   // Region 04
   // CK1-NOT: __tgt_target_data_begin
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -156,7 +156,7 @@ void foo(int arg) {
 
   // Region 05
   // CK1-NOT: __tgt_target_data_begin
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -181,7 +181,7 @@ void foo(int arg) {
 
   // Region 06
   // CK1-NOT: __tgt_target_data_begin
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -245,7 +245,7 @@ int bar(int arg){
 // CK2-NOT: __tgt_target_data_begin
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:.+]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK2-DAG: call void @__tgt_target_data_end_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:.+]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
@@ -349,7 +349,7 @@ int bar(int arg){
 // CK4-NOT: __tgt_target_data_begin
 // CK4: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK4: [[IFTHEN]]
-// CK4-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:.+]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK4-DAG: call void @__tgt_target_data_end_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:.+]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK4-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK4-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK4-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
diff --git a/clang/test/OpenMP/target_exit_data_depend_codegen.cpp b/clang/test/OpenMP/target_exit_data_depend_codegen.cpp
index f5dec5e6ea91f..72be1090ca1b2 100644
--- a/clang/test/OpenMP/target_exit_data_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_exit_data_depend_codegen.cpp
@@ -64,7 +64,7 @@ void foo(int arg) {
   // CK1: store i32 [[DEVICE]], i32* [[CAP_DEVICE]],
   // CK1: [[DEV1:%.+]] = load i32, i32* %{{.+}}
   // CK1: [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CK1: [[BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 0
   // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
@@ -130,7 +130,7 @@ void foo(int arg) {
   // CK1: [[IF_BOOL:%.+]] = trunc i8 [[IF]] to i1
   // CK1: [[IF:%.+]] = zext i1 [[IF_BOOL]] to i8
   // CK1: store i8 [[IF]], i8* [[IF_DEVICE]],
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
@@ -213,7 +213,7 @@ void foo(int arg) {
   // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
   // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
   // CK1: [[GEPS0:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[S]], i32 0, i32 0
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
@@ -298,7 +298,7 @@ void foo(int arg) {
   // CK1: store double* %{{.+}}, double** [[P1_BC]],
   // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
   // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|52}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{104|60}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
@@ -373,56 +373,62 @@ void foo(int arg) {
 }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, %struct.kmp_task_t_with_privates* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_end_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_end_nowait_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_end(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_end_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY3]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
-
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
-// CK1-NOT: __tgt_target_data_end
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
+// CK1-NOT: __tgt_target_data_end_mapper
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY4]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
-
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [2 x i8*]*, [2 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [2 x i8*]*, [2 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [2 x i64]*, [2 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i64]** [[S_PRIV]])
-// CK1-NOT: __tgt_target_data_end
+// CK1-DAG: [[M]] = load [2 x i8*]*, [2 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i64]** [[S_PRIV]], [2 x i8*]** [[M_PRIV]])
+// CK1-NOT: __tgt_target_data_end_mapper
 // CK1: ret i32 0
 // CK1: }
 
diff --git a/clang/test/OpenMP/target_firstprivate_codegen.cpp b/clang/test/OpenMP/target_firstprivate_codegen.cpp
index 895bff64b7270..e4a1a0302411d 100644
--- a/clang/test/OpenMP/target_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_firstprivate_codegen.cpp
@@ -130,7 +130,7 @@ int foo(int n, double *ptr) {
   // CHECK:  store i32* [[P_PTR]], i32** [[PCAST_TOPTR2]],
   // CHECK:  [[BASE_PTR_GEP_ARG:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK:  [[PTR_GEP_ARG:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK:  {{.+}} = call i32 @__tgt_target(i64 -1, {{.+}}, i32 2, i8** [[BASE_PTR_GEP_ARG]], i8** [[PTR_GEP_ARG]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0))
+  // CHECK:  {{.+}} = call i32 @__tgt_target_mapper(i64 -1, {{.+}}, i32 2, i8** [[BASE_PTR_GEP_ARG]], i8** [[PTR_GEP_ARG]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** null)
 
   // TCHECK:  define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], i32** nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[P_IN:%.+]])
   // TCHECK:  [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
@@ -254,7 +254,7 @@ int foo(int n, double *ptr) {
   // CHECK:  [[BASE_PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK:  [[PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK:  [[SIZES_ARG2:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[SIZET2]],  i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK: {{.+}} = call i32 @__tgt_target(i64 -1, {{.+}}, i32 9, i8** [[BASE_PTR_GEP_ARG2]], i8** [[PTR_GEP_ARG2]], i[[SZ]]* [[SIZES_ARG2]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT2]], i32 0, i32 0))
+  // CHECK: {{.+}} = call i32 @__tgt_target_mapper(i64 -1, {{.+}}, i32 9, i8** [[BASE_PTR_GEP_ARG2]], i8** [[PTR_GEP_ARG2]], i[[SZ]]* [[SIZES_ARG2]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT2]], i32 0, i32 0), i8** null)
 
   // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the
   // target region
@@ -353,7 +353,7 @@ int foo(int n, double *ptr) {
 
   // CHECK:  [[BASE_PTR_GEP_ARG3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK:  [[PTR_GEP_ARG3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK: {{.+}} = call i32 @__tgt_target(i64 -1, {{.+}}, i32 2, i8** [[BASE_PTR_GEP_ARG3]], i8** [[PTR_GEP_ARG3]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0))
+  // CHECK: {{.+}} = call i32 @__tgt_target_mapper(i64 -1, {{.+}}, i32 2, i8** [[BASE_PTR_GEP_ARG3]], i8** [[PTR_GEP_ARG3]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null)
 
   // TCHECK:  define weak void @__omp_offloading_{{.+}}(double* [[PTR_IN:%.+]], [[TTII]]* nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[E:%.+]])
   // TCHECK-NOT: alloca [[TTII]],
@@ -500,7 +500,7 @@ struct S1 {
   // CHECK:  store i{{[0-9]+}} [[B_SIZE:%.+]], i{{[0-9]+}}* [[SIZES_GEP4_4]],
 
   // only check that we use the map types stored in the global variable
-  // CHECK:  call i32 @__tgt_target(i64 -1, {{.+}}, i32 6, i8** {{.+}}, i8** {{.+}}, i{{[0-9]+}}* {{.+}}, i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT4]], i32 0, i32 0))
+  // CHECK:  call i32 @__tgt_target_mapper(i64 -1, {{.+}}, i32 6, i8** {{.+}}, i8** {{.+}}, i{{[0-9]+}}* {{.+}}, i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT4]], i32 0, i32 0), i8** null)
 
   // TCHECK: define weak void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i{{[0-9]+}} [[B_IN:%.+]], i{{[0-9]+}} [[VLA:%.+]], i{{[0-9]+}} [[VLA1:%.+]], i{{[0-9]+}}{{.+}} [[C_IN:%.+]])
   // TCHECK:  [[TH_ADDR:%.+]] = alloca [[S1]]*,
@@ -572,7 +572,7 @@ struct S1 {
   // CHECK:  store [10 x i{{[0-9]+}}]* [[B]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]],
 
   // only check that the right sizes and map types are used
-  // CHECK:  call i32 @__tgt_target(i64 -1, {{.+}}, i32 3, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0))
+  // CHECK:  call i32 @__tgt_target_mapper(i64 -1, {{.+}}, i32 3, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i8** null)
 };
 
 int bar(int n, double *ptr) {
@@ -608,7 +608,7 @@ int bar(int n, double *ptr) {
 // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP6_1]] to [10 x i{{[0-9]+}}]**
 // CHECK:  store [10 x i{{[0-9]+}}]* [[B]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]],
 
-// CHECK:  call i32 @__tgt_target(i64 -1, {{.+}}, i32 2, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT6]], i32 0, i32 0))
+// CHECK:  call i32 @__tgt_target_mapper(i64 -1, {{.+}}, i32 2, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT6]], i32 0, i32 0), i8** null)
 
 // TCHECK: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]])
 // TCHECK:  [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
diff --git a/clang/test/OpenMP/target_is_device_ptr_codegen.cpp b/clang/test/OpenMP/target_is_device_ptr_codegen.cpp
index 90514acadf815..7c2eef577f9f3 100644
--- a/clang/test/OpenMP/target_is_device_ptr_codegen.cpp
+++ b/clang/test/OpenMP/target_is_device_ptr_codegen.cpp
@@ -49,7 +49,7 @@ void foo(float *&lr, T *&tr) {
   float *l;
   T *t;
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES00]]{{.+}}, {{.+}}[[TYPES00]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES00]]{{.+}}, {{.+}}[[TYPES00]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -66,7 +66,7 @@ void foo(float *&lr, T *&tr) {
     ++g;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES01]]{{.+}}, {{.+}}[[TYPES01]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES01]]{{.+}}, {{.+}}[[TYPES01]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -83,7 +83,7 @@ void foo(float *&lr, T *&tr) {
     ++l;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES02]]{{.+}}, {{.+}}[[TYPES02]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES02]]{{.+}}, {{.+}}[[TYPES02]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -100,7 +100,7 @@ void foo(float *&lr, T *&tr) {
     ++t;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES03]]{{.+}}, {{.+}}[[TYPES03]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES03]]{{.+}}, {{.+}}[[TYPES03]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -118,7 +118,7 @@ void foo(float *&lr, T *&tr) {
     ++lr;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES04]]{{.+}}, {{.+}}[[TYPES04]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES04]]{{.+}}, {{.+}}[[TYPES04]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -136,7 +136,7 @@ void foo(float *&lr, T *&tr) {
     ++tr;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES05]]{{.+}}, {{.+}}[[TYPES05]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES05]]{{.+}}, {{.+}}[[TYPES05]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -154,7 +154,7 @@ void foo(float *&lr, T *&tr) {
     ++tr;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES06]]{{.+}}, {{.+}}[[TYPES06]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES06]]{{.+}}, {{.+}}[[TYPES06]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -231,7 +231,7 @@ struct ST {
   void foo(double *&arg) {
     int *la = 0;
 
-    // CK2-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK2-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
     // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -246,7 +246,7 @@ struct ST {
       a++;
     }
 
-    // CK2-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+    // CK2-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
     // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -261,7 +261,7 @@ struct ST {
       b++;
     }
 
-    // CK2-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+    // CK2-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
     // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
diff --git a/clang/test/OpenMP/target_map_codegen.cpp b/clang/test/OpenMP/target_map_codegen.cpp
index 92e0224a2de3b..2eab004eeff2e 100644
--- a/clang/test/OpenMP/target_map_codegen.cpp
+++ b/clang/test/OpenMP/target_map_codegen.cpp
@@ -51,7 +51,7 @@ void implicit_maps_integer (int a){
   B::modify(a);
   int i = a;
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -111,7 +111,7 @@ void implicit_maps_integer (int a){
 // CK2-LABEL: implicit_maps_reference{{.*}}(
 void implicit_maps_reference (int a, int *b){
   int &i = a;
-  // CK2-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK2-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -131,7 +131,7 @@ void implicit_maps_reference (int a, int *b){
   }
 
   int *&p = b;
-  // CK2-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES2]]{{.+}}, {{.+}}[[TYPES2]]{{.+}})
+  // CK2-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES2]]{{.+}}, {{.+}}[[TYPES2]]{{.+}}, i8** null)
   // CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -197,7 +197,7 @@ void implicit_maps_reference (int a, int *b){
 // CK3-LABEL: implicit_maps_parameter{{.*}}(
 void implicit_maps_parameter (int a){
 
-  // CK3-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK3-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -259,7 +259,7 @@ void implicit_maps_nested_integer (int a){
   // CK4: define internal void [[KERNELP1]](i32* {{[^,]+}}, i32* {{[^,]+}}, i32* {{[^,]+}})
   #pragma omp parallel
   {
-    // CK4-DAG: call i32 @__tgt_target_teams(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i32 1, i32 0)
+    // CK4-DAG: call i32 @__tgt_target_teams_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null, i32 1, i32 0)
     // CK4-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
     // CK4-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
     // CK4-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -323,7 +323,7 @@ void implicit_maps_nested_integer_and_enum (int a){
   // Using an enum should not change the mapping information.
   int  i = a;
 
-  // CK5-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK5-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK5-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK5-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK5-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -378,7 +378,7 @@ void implicit_maps_nested_integer_and_enum (int a){
 // CK6-LABEL: implicit_maps_host_global{{.*}}(
 int Gi;
 void implicit_maps_host_global (int a){
-  // CK6-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK6-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK6-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK6-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK6-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -440,7 +440,7 @@ void implicit_maps_host_global (int a){
 void implicit_maps_double (int a){
   double d = (double)a;
 
-  // CK7-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK7-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK7-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK7-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK7-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -507,7 +507,7 @@ void implicit_maps_double (int a){
 void implicit_maps_float (int a){
   float f = (float)a;
 
-  // CK8-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK8-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK8-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK8-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK8-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -561,7 +561,7 @@ void implicit_maps_float (int a){
 void implicit_maps_array (int a){
   double darr[2] = {(double)a, (double)a};
 
-  // CK9-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK9-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK9-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK9-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK9-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -612,7 +612,7 @@ void implicit_maps_array (int a){
 void implicit_maps_pointer (){
   double *ddyn;
 
-  // CK10-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK10-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK10-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK10-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK10-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -664,7 +664,7 @@ void implicit_maps_pointer (){
 void implicit_maps_double_complex (int a, int *b){
   double _Complex dc = (double)a;
 
-  // CK11-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK11-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK11-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK11-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK11-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -719,7 +719,7 @@ void implicit_maps_double_complex (int a, int *b){
 void implicit_maps_float_complex (int a){
   float _Complex fc = (float)a;
 
-  // CK12-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK12-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK12-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK12-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK12-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -788,7 +788,7 @@ void implicit_maps_float_complex (int a){
 void implicit_maps_variable_length_array (int a){
   double vla[2][a];
 
-  // CK13-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}})
+  // CK13-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK13-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK13-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK13-DAG: [[SGEP]] = getelementptr inbounds {{.+}}[[SS:%[^,]+]], i32 0, i32 0
@@ -889,7 +889,7 @@ void implicit_maps_class (int a){
   SSS sss(a, (double)a);
 
   // CK14: define {{.*}}void @{{.+}}foo{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}})
-  // CK14-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 4, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZES:%[^,]+]], {{.+}}[[TYPES]]{{.+}})
+  // CK14-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 4, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZES:%[^,]+]], {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK14-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK14-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK14-DAG: [[SIZES]] = getelementptr inbounds {{.+}}[[S:%[^,]+]], i32 0, i32 0
@@ -1009,7 +1009,7 @@ void implicit_maps_templated_class (int a){
   SSST<123> ssst(a, (double)a);
 
   // CK15: define {{.*}}void @{{.+}}foo{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}})
-  // CK15-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 4, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZES:%[^,]+]], {{.+}}[[TYPES]]{{.+}})
+  // CK15-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 4, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZES:%[^,]+]], {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK15-DAG: [[SIZES]] = getelementptr inbounds {{.+}}[[S:%[^,]+]], i32 0, i32 0
@@ -1057,7 +1057,7 @@ void implicit_maps_templated_class (int a){
   ssst.foo(456);
 
   // CK15: define {{.*}}void @{{.+}}bar{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}})
-  // CK15-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 4, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZES:[^,]+]], {{.+}}[[TYPES2]]{{.+}})
+  // CK15-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 4, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZES:[^,]+]], {{.+}}[[TYPES2]]{{.+}}, i8** null)
   // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK15-DAG: [[SIZES]] = getelementptr inbounds {{.+}}[[S:%[^,]+]], i32 0, i32 0
@@ -1164,7 +1164,7 @@ void implicit_maps_templated_function (int a){
   int i = a;
 
   // CK16: define {{.*}}i32 @{{.+}}foo{{.+}}(i32 {{[^,]+}})
-  // CK16-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK16-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK16-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK16-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
 
@@ -1222,7 +1222,7 @@ class SSS {
 void implicit_maps_struct (int a){
   SSS s = {a, (double)a};
 
-  // CK17-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK17-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK17-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK17-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK17-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1281,7 +1281,7 @@ void implicit_maps_template_type_capture (int a){
   int i = a;
 
   // CK18: define {{.*}}i32 @{{.+}}foo{{.+}}(i32 {{[^,]+}})
-  // CK18-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK18-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK18-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK18-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
 
@@ -1307,12 +1307,26 @@ void implicit_maps_template_type_capture (int a){
 
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK19 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s --check-prefix CK19 --check-prefix CK19-64
+// RUN: %clang_cc1 -DUSE -DCK19 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s --check-prefixes=CK19,CK19-64,CK19-USE
+// RUN: %clang_cc1 -DUSE -DCK19 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK19,CK19-64,CK19-USE
+// RUN: %clang_cc1 -DUSE -DCK19 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK19,CK19-32,CK19-USE
+// RUN: %clang_cc1 -DUSE -DCK19 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK19,CK19-32,CK19-USE
+
+// RUN: %clang_cc1 -DUSE -DCK19 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY18 %s
+// RUN: %clang_cc1 -DUSE -DCK19 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY18 %s
+// RUN: %clang_cc1 -DUSE -DCK19 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY18 %s
+// RUN: %clang_cc1 -DUSE -DCK19 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY18 %s
+
+// RUN: %clang_cc1 -DCK19 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s --check-prefixes=CK19,CK19-64,CK19-NOUSE
 // RUN: %clang_cc1 -DCK19 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefix CK19 --check-prefix CK19-64
-// RUN: %clang_cc1 -DCK19 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefix CK19 --check-prefix CK19-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK19,CK19-64,CK19-NOUSE
+// RUN: %clang_cc1 -DCK19 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK19,CK19-32,CK19-NOUSE
 // RUN: %clang_cc1 -DCK19 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefix CK19 --check-prefix CK19-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK19,CK19-32,CK19-NOUSE
 
 // RUN: %clang_cc1 -DCK19 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY18 %s
 // RUN: %clang_cc1 -DCK19 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
@@ -1320,6 +1334,8 @@ void implicit_maps_template_type_capture (int a){
 // RUN: %clang_cc1 -DCK19 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY18 %s
 // RUN: %clang_cc1 -DCK19 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY18 %s
+
+
 // SIMD-ONLY18-NOT: {{__kmpc|__tgt}}
 #ifdef CK19
 
@@ -1388,29 +1404,40 @@ void implicit_maps_template_type_capture (int a){
 // CK19: [[MTYPE15:@.+]] = private {{.*}}constant [1 x i64] [i64 34]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE16:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 33]
+// CK19-USE: [[MTYPE16:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 33]
+// CK19-NOUSE: [[MTYPE16:@.+]] = private {{.*}}constant [1 x i64] [i64 33]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[SIZE17:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 240]
-// CK19: [[MTYPE17:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 34]
+// CK19-USE: [[SIZE17:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 240]
+// CK19-USE: [[MTYPE17:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 34]
+// CK19-NOUSE: [[SIZE17:@.+]] = private {{.*}}constant [1 x i64] [i64 240]
+// CK19-NOUSE: [[MTYPE17:@.+]] = private {{.*}}constant [1 x i64] [i64 34]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[SIZE18:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 240]
-// CK19: [[MTYPE18:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 35]
+// CK19-USE: [[SIZE18:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 240]
+// CK19-USE: [[MTYPE18:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 35]
+// CK19-NOUSE: [[SIZE18:@.+]] = private {{.*}}constant [1 x i64] [i64 240]
+// CK19-NOUSE: [[MTYPE18:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE19:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 32]
+// CK19-USE: [[MTYPE19:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 32]
+// CK19-NOUSE: [[MTYPE19:@.+]] = private {{.*}}constant [1 x i64] [i64 32]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[SIZE20:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 4]
-// CK19: [[MTYPE20:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 33]
+// CK19-USE: [[SIZE20:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 4]
+// CK19-USE: [[MTYPE20:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 33]
+// CK19-NOUSE: [[SIZE20:@.+]] = private {{.*}}constant [1 x i64] [i64 4]
+// CK19-NOUSE: [[MTYPE20:@.+]] = private {{.*}}constant [1 x i64] [i64 33]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE21:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 35]
+// CK19-USE: [[MTYPE21:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 35]
+// CK19-NOUSE: [[MTYPE21:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[SIZE22:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 4]
-// CK19: [[MTYPE22:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 35]
+// CK19-USE: [[SIZE22:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 4]
+// CK19-USE: [[MTYPE22:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 35]
+// CK19-NOUSE: [[SIZE22:@.+]] = private {{.*}}constant [1 x i64] [i64 4]
+// CK19-NOUSE: [[MTYPE22:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
 // CK19: [[SIZE23:@.+]] = private {{.*}}constant [1 x i64] [i64 4]
@@ -1441,11 +1468,14 @@ void implicit_maps_template_type_capture (int a){
 // CK19: [[MTYPE29:@.+]] = private {{.*}}constant [3 x i64] [i64 35, i64 16, i64 19]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE30:@.+]] = private {{.*}}constant [4 x i64] [i64 800, i64 800, i64 800, i64 35]
+// CK19-USE: [[MTYPE30:@.+]] = private {{.*}}constant [4 x i64] [i64 800, i64 800, i64 800, i64 35]
+// CK19-NOUSE: [[MTYPE30:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[SIZE31:@.+]] = private {{.*}}constant [4 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 {{8|4}}, i64 40]
-// CK19: [[MTYPE31:@.+]] = private {{.*}}constant [4 x i64] [i64 800, i64 800, i64 800, i64 35]
+// CK19-USE: [[SIZE31:@.+]] = private {{.*}}constant [4 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 {{8|4}}, i64 40]
+// CK19-USE: [[MTYPE31:@.+]] = private {{.*}}constant [4 x i64] [i64 800, i64 800, i64 800, i64 35]
+// CK19-NOUSE: [[SIZE31:@.+]] = private {{.*}}constant [1 x i64] [i64 40]
+// CK19-NOUSE: [[MTYPE31:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
 // CK19: [[SIZE32:@.+]] = private {{.*}}constant [1 x i64] [i64 13728]
@@ -1467,20 +1497,26 @@ void implicit_maps_template_type_capture (int a){
 // CK19: [[MTYPE36:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE37:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-USE: [[MTYPE37:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-NOUSE: [[MTYPE37:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE38:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-USE: [[MTYPE38:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-NOUSE: [[MTYPE38:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE39:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-USE: [[MTYPE39:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-NOUSE: [[MTYPE39:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE40:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-USE: [[MTYPE40:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-NOUSE: [[MTYPE40:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[SIZE41:@.+]] = private {{.*}}constant [3 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 208]
-// CK19: [[MTYPE41:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-USE: [[SIZE41:@.+]] = private {{.*}}constant [3 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 208]
+// CK19-USE: [[MTYPE41:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-NOUSE: [[SIZE41:@.+]] = private {{.*}}constant [1 x i64] [i64 208]
+// CK19-NOUSE: [[MTYPE41:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
 // CK19: [[SIZE42:@.+]] = private {{.*}}constant [3 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 104]
@@ -1499,7 +1535,7 @@ void explicit_maps_single (int ii){
   int a = ii;
 
   // Region 00
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1510,17 +1546,20 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
   // CK19-DAG: store i32* [[VAR0]], i32** [[CP0]]
 
-  // CK19: call void [[CALL00:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL00:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL00:@.+]]()
   #pragma omp target map(alloc:a)
   {
+#ifdef USE
     ++a;
+#endif
   }
 
   // Map of a scalar in nested region.
   int b = a;
 
   // Region 00n
-  // CK19-DAG: call i32 @__tgt_target_teams(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00n]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00n]]{{.+}}, i32 1, i32 0)
+  // CK19-DAG: call i32 @__tgt_target_teams_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00n]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00n]]{{.+}}, i8** null, i32 1, i32 0)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1531,18 +1570,21 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
   // CK19-DAG: store i32* [[VAR0]], i32** [[CP0]]
 
-  // CK19: call void [[CALL00n:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL00n:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL00n:@.+]]()
   #pragma omp target map(alloc:b)
   #pragma omp parallel
   {
+#ifdef USE
     ++b;
+#endif
   }
 
   // Map of an array.
   int arra[100];
 
   // Region 01
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1553,14 +1595,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store [100 x i32]* [[VAR0:%.+]], [100 x i32]** [[CBP0]]
   // CK19-DAG: store [100 x i32]* [[VAR0]], [100 x i32]** [[CP0]]
 
-  // CK19: call void [[CALL01:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL01:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL01:@.+]]()
   #pragma omp target map(to:arra)
   {
+#ifdef USE
     arra[50]++;
+#endif
   }
 
   // Region 02
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1572,14 +1617,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[SEC0:%[^,]+]], i32** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} 20
 
-  // CK19: call void [[CALL02:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL02:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL02:@.+]]()
   #pragma omp target map(from:arra[20:60])
   {
+#ifdef USE
     arra[50]++;
+#endif
   }
 
   // Region 03
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1591,14 +1639,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[SEC0:%[^,]+]], i32** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} 0
 
-  // CK19: call void [[CALL03:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL03:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL03:@.+]]()
   #pragma omp target map(tofrom:arra[:60])
   {
+#ifdef USE
     arra[50]++;
+#endif
   }
 
   // Region 04
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1610,14 +1661,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[SEC0:%[^,]+]], i32** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} 0
 
-  // CK19: call void [[CALL04:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL04:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL04:@.+]]()
   #pragma omp target map(alloc:arra[:])
   {
+#ifdef USE
     arra[50]++;
+#endif
   }
 
   // Region 05
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1629,14 +1683,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[SEC0:%[^,]+]], i32** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} 15
 
-  // CK19: call void [[CALL05:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL05:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL05:@.+]]()
   #pragma omp target map(to:arra[15])
   {
+#ifdef USE
     arra[15]++;
+#endif
   }
 
   // Region 06
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1652,14 +1709,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[CSVAL0]] = {{mul nuw i.+ %.*, 4|sext i32 .+ to i64}}
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} %{{.*}}
 
-  // CK19: call void [[CALL06:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL06:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL06:@.+]]()
   #pragma omp target map(tofrom:arra[ii:ii+23])
   {
+#ifdef USE
     arra[50]++;
+#endif
   }
 
   // Region 07
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1675,14 +1735,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[CSVAL0]] = {{mul nuw i.+ %.*, 4|sext i32 .+ to i64}}
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} 0
 
-  // CK19: call void [[CALL07:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL07:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL07:@.+]]()
   #pragma omp target map(alloc:arra[:ii])
   {
+#ifdef USE
     arra[50]++;
+#endif
   }
 
   // Region 08
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1694,17 +1757,20 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[SEC0:%[^,]+]], i32** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} %{{.*}}
 
-  // CK19: call void [[CALL08:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL08:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL08:@.+]]()
   #pragma omp target map(tofrom:arra[ii])
   {
+#ifdef USE
     arra[15]++;
+#endif
   }
 
   // Map of a pointer.
   int *pa;
 
   // Region 09
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1715,14 +1781,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32** [[VAR0:%.+]], i32*** [[CBP0]]
   // CK19-DAG: store i32** [[VAR0]], i32*** [[CP0]]
 
-  // CK19: call void [[CALL09:@.+]](i32** {{[^,]+}})
+  // CK19-USE: call void [[CALL09:@.+]](i32** {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL09:@.+]]()
   #pragma omp target map(from:pa)
   {
+#ifdef USE
     pa[50]++;
+#endif
   }
 
   // Region 10
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1736,14 +1805,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} 20
   // CK19-DAG: [[RVAR00]] = load i32*, i32** [[VAR0]]
 
-  // CK19: call void [[CALL10:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL10:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL10:@.+]]()
   #pragma omp target map(tofrom:pa[20:60])
   {
+#ifdef USE
     pa[50]++;
+#endif
   }
 
   // Region 11
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1757,14 +1829,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} 0
   // CK19-DAG: [[RVAR00]] = load i32*, i32** [[VAR0]]
 
-  // CK19: call void [[CALL11:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL11:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL11:@.+]]()
   #pragma omp target map(alloc:pa[:60])
   {
+#ifdef USE
     pa[50]++;
+#endif
   }
 
   // Region 12
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1778,14 +1853,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} 15
   // CK19-DAG: [[RVAR00]] = load i32*, i32** [[VAR0]]
 
-  // CK19: call void [[CALL12:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL12:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL12:@.+]]()
   #pragma omp target map(to:pa[15])
   {
+#ifdef USE
     pa[15]++;
+#endif
   }
 
   // Region 13
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1803,14 +1881,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} %{{.*}}
   // CK19-DAG: [[RVAR00]] = load i32*, i32** [[VAR0]]
 
-  // CK19: call void [[CALL13:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL13:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL13:@.+]]()
   #pragma omp target map(alloc:pa[ii-23:ii])
   {
+#ifdef USE
     pa[50]++;
+#endif
   }
 
   // Region 14
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1828,14 +1909,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} 0
   // CK19-DAG: [[RVAR00]] = load i32*, i32** [[VAR0]]
 
-  // CK19: call void [[CALL14:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL14:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL14:@.+]]()
   #pragma omp target map(to:pa[:ii])
   {
+#ifdef USE
     pa[50]++;
+#endif
   }
 
   // Region 15
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE15]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE15]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE15]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE15]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1849,217 +1933,305 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} %{{.*}}
   // CK19-DAG: [[RVAR00]] = load i32*, i32** [[VAR0]]
 
-  // CK19: call void [[CALL15:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL15:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL15:@.+]]()
   #pragma omp target map(from:pa[ii+12])
   {
+#ifdef USE
     pa[15]++;
+#endif
   }
 
   // Map of a variable-size array.
   int va[ii];
 
   // Region 16
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE16]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|2}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[MTYPE16]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
 
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z:64|32]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[S0]]
-
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
-  // CK19-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
-  // CK19-DAG: store i32* [[VAR1]], i32** [[CP1]]
-  // CK19-DAG: store i{{.+}} [[CSVAL1:%[^,]+]], i{{.+}}* [[S1]]
-  // CK19-DAG: [[CSVAL1]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
-
-  // CK19: call void [[CALL16:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z:64|32]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[S0]]
+
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
+  // CK19-USE-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
+  // CK19-USE-DAG: store i32* [[VAR1]], i32** [[CP1]]
+  // CK19-USE-DAG: store i{{.+}} [[CSVAL1:%[^,]+]], i{{.+}}* [[S1]]
+  // CK19-USE-DAG: [[CSVAL1]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK19-NOUSE-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
+  // CK19-NOUSE-DAG: store i32* [[VAR0]], i32** [[CP0]]
+  // CK19-NOUSE-DAG: store i{{.+}} [[CSVAL0:%[^,]+]], i{{.+}}* [[S0]]
+  // CK19-NOUSE-DAG: [[CSVAL0]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
+
+  // CK19-USE: call void [[CALL16:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL16:@.+]]()
   #pragma omp target map(to:va)
   {
+#ifdef USE
    va[50]++;
+#endif
   }
 
   // Region 17
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE17]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|2}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[SIZE17]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[MTYPE17]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
-  // CK19-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
-  // CK19-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
-  // CK19-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 20
-
-  // CK19: call void [[CALL17:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
+
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
+  // CK19-USE-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
+  // CK19-USE-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
+  // CK19-USE-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 20
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK19-NOUSE-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
+  // CK19-NOUSE-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[VAR0]], i{{.+}} 20
+
+  // CK19-USE: call void [[CALL17:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL17:@.+]]()
   #pragma omp target map(from:va[20:60])
   {
+#ifdef USE
    va[50]++;
+#endif
   }
 
   // Region 18
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE18]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE18]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|2}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[SIZE18]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[MTYPE18]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
-  // CK19-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
-  // CK19-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
-  // CK19-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 0
-
-  // CK19: call void [[CALL18:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
+
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
+  // CK19-USE-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
+  // CK19-USE-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
+  // CK19-USE-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 0
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK19-NOUSE-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
+  // CK19-NOUSE-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[VAR0]], i{{.+}} 0
+
+  // CK19-USE: call void [[CALL18:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL18:@.+]]()
   #pragma omp target map(tofrom:va[:60])
   {
+#ifdef USE
    va[50]++;
+#endif
   }
 
   // Region 19
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE19]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|2}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[MTYPE19]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
 
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[S0]]
-
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
-  // CK19-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
-  // CK19-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
-  // CK19-DAG: store i{{.+}} [[CSVAL1:%[^,]+]], i{{.+}}* [[S1]]
-  // CK19-DAG: [[CSVAL1]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
-  // CK19-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 0
-
-  // CK19: call void [[CALL19:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[S0]]
+
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
+  // CK19-USE-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
+  // CK19-USE-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
+  // CK19-USE-DAG: store i{{.+}} [[CSVAL1:%[^,]+]], i{{.+}}* [[S1]]
+  // CK19-USE-DAG: [[CSVAL1]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
+  // CK19-USE-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 0
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK19-NOUSE-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
+  // CK19-NOUSE-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]]
+  // CK19-NOUSE-DAG: store i{{.+}} [[CSVAL0:%[^,]+]], i{{.+}}* [[S0]]
+  // CK19-NOUSE-DAG: [[CSVAL0]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[VAR0]], i{{.+}} 0
+
+  // CK19-USE: call void [[CALL19:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL19:@.+]]()
   #pragma omp target map(alloc:va[:])
   {
+#ifdef USE
    va[50]++;
+#endif
   }
 
   // Region 20
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE20]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|2}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[SIZE20]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[MTYPE20]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
-  // CK19-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
-  // CK19-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
-  // CK19-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 15
-
-  // CK19: call void [[CALL20:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
+
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
+  // CK19-USE-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
+  // CK19-USE-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
+  // CK19-USE-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 15
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK19-NOUSE-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
+  // CK19-NOUSE-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[VAR0]], i{{.+}} 15
+
+  // CK19-USE: call void [[CALL20:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL20:@.+]]()
   #pragma omp target map(to:va[15])
   {
+#ifdef USE
    va[15]++;
+#endif
   }
 
   // Region 21
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE21]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|2}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[MTYPE21]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
 
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[S0]]
-
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
-  // CK19-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
-  // CK19-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
-  // CK19-DAG: store i{{.+}} [[CSVAL1:%[^,]+]], i{{.+}}* [[S1]]
-  // CK19-DAG: [[CSVAL1]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
-  // CK19-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} %{{.+}}
-
-  // CK19: call void [[CALL21:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[S0]]
+
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
+  // CK19-USE-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
+  // CK19-USE-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
+  // CK19-USE-DAG: store i{{.+}} [[CSVAL1:%[^,]+]], i{{.+}}* [[S1]]
+  // CK19-USE-DAG: [[CSVAL1]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
+  // CK19-USE-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} %{{.+}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK19-NOUSE-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
+  // CK19-NOUSE-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]]
+  // CK19-NOUSE-DAG: store i{{.+}} [[CSVAL0:%[^,]+]], i{{.+}}* [[S0]]
+  // CK19-NOUSE-DAG: [[CSVAL0]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[VAR0]], i{{.+}} %{{.+}}
+
+  // CK19-USE: call void [[CALL21:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL21:@.+]]()
   #pragma omp target map(tofrom:va[ii:ii+23])
   {
+#ifdef USE
    va[50]++;
+#endif
   }
 
   // Region 22
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE22]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE22]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|2}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[SIZE22]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[MTYPE22]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
-  // CK19-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
-  // CK19-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
-  // CK19-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} %{{.+}}
-
-  // CK19: call void [[CALL22:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
+
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
+  // CK19-USE-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
+  // CK19-USE-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
+  // CK19-USE-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} %{{.+}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK19-NOUSE-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
+  // CK19-NOUSE-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[VAR0]], i{{.+}} %{{.+}}
+
+  // CK19-USE: call void [[CALL22:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL22:@.+]]()
   #pragma omp target map(tofrom:va[ii])
   {
+#ifdef USE
    va[15]++;
+#endif
   }
 
   // Always.
   // Region 23
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE23]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE23]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE23]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE23]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2070,10 +2242,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
   // CK19-DAG: store i32* [[VAR0]], i32** [[CP0]]
 
-  // CK19: call void [[CALL23:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL23:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL23:@.+]]()
   #pragma omp target map(always, tofrom: a)
   {
+#ifdef USE
    a++;
+#endif
   }
 
   // Multidimensional arrays.
@@ -2081,7 +2256,7 @@ void explicit_maps_single (int ii){
   int ***mptr;
 
   // Region 24
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE24]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE24]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE24]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE24]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2092,14 +2267,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store [4 x [5 x [6 x i32]]]* [[VAR0:%.+]], [4 x [5 x [6 x i32]]]** [[CBP0]]
   // CK19-DAG: store [4 x [5 x [6 x i32]]]* [[VAR0]], [4 x [5 x [6 x i32]]]** [[CP0]]
 
-  // CK19: call void [[CALL24:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL24:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL24:@.+]]()
   #pragma omp target map(tofrom: marr)
   {
+#ifdef USE
    marr[1][2][3]++;
+#endif
   }
 
   // Region 25
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE25]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE25]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE25]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE25]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2113,14 +2291,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC00]] = getelementptr {{.*}}[5 x [6 x i32]]* [[SEC000:[^,]+]], i{{.+}} 0, i{{.+}} 2
   // CK19-DAG: [[SEC000]] = getelementptr {{.*}}[4 x [5 x [6 x i32]]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1
 
-  // CK19: call void [[CALL25:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL25:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL25:@.+]]()
   #pragma omp target map(tofrom: marr[1][2][2:4])
   {
+#ifdef USE
    marr[1][2][3]++;
+#endif
   }
 
   // Region 26
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE26]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE26]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE26]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE26]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2134,14 +2315,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC00]] = getelementptr {{.*}}[5 x [6 x i32]]* [[SEC000:[^,]+]], i{{.+}} 0, i{{.+}} 2
   // CK19-DAG: [[SEC000]] = getelementptr {{.*}}[4 x [5 x [6 x i32]]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1
 
-  // CK19: call void [[CALL26:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL26:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL26:@.+]]()
   #pragma omp target map(tofrom: marr[1][2][:])
   {
+#ifdef USE
    marr[1][2][3]++;
+#endif
   }
 
   // Region 27
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE27]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE27]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE27]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE27]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2155,14 +2339,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC00]] = getelementptr {{.*}}[5 x [6 x i32]]* [[SEC000:[^,]+]], i{{.+}} 0, i{{.+}} 2
   // CK19-DAG: [[SEC000]] = getelementptr {{.*}}[4 x [5 x [6 x i32]]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1
 
-  // CK19: call void [[CALL27:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL27:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL27:@.+]]()
   #pragma omp target map(tofrom: marr[1][2][3])
   {
+#ifdef USE
    marr[1][2][3]++;
+#endif
   }
 
   // Region 28
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE28]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE28]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE28]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE28]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2200,14 +2387,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC22222]] = getelementptr {{.*}}i32*** [[SEC222222:[^,]+]], i{{.+}} 1
   // CK19-DAG: [[SEC222222]] = load i32***, i32**** [[PTR]],
 
-  // CK19: call void [[CALL28:@.+]](i32*** {{[^,]+}})
+  // CK19-USE: call void [[CALL28:@.+]](i32*** {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL28:@.+]]()
   #pragma omp target map(tofrom: mptr[1][2][2:4])
   {
+#ifdef USE
     mptr[1][2][3]++;
+#endif
   }
 
   // Region 29
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE29]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE29]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE29]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE29]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2245,110 +2435,141 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC22222]] = getelementptr {{.*}}i32*** [[SEC222222:[^,]+]], i{{.+}} 1
   // CK19-DAG: [[SEC222222]] = load i32***, i32**** [[PTR]],
 
-  // CK19: call void [[CALL29:@.+]](i32*** {{[^,]+}})
+  // CK19-USE: call void [[CALL29:@.+]](i32*** {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL29:@.+]]()
   #pragma omp target map(tofrom: mptr[1][2][3])
   {
+#ifdef USE
     mptr[1][2][3]++;
+#endif
   }
 
   // Multidimensional VLA.
   double mva[23][ii][ii+5];
 
   // Region 30
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE30]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|4}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|4}} x i{{.+}}]* [[MTYPE30]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
   //
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] 23, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] 23, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S0]]
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] 23, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] 23, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S0]]
   //
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
-  // CK19-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S1]]
-  // CK19-64-DAG: [[VAR1]] = zext i32 %{{[^,]+}} to i64
-  // CK19-64-DAG: [[VAR11]] = zext i32 %{{[^,]+}} to i64
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S1]]
+  // CK19-64-USE-DAG: [[VAR1]] = zext i32 %{{[^,]+}} to i64
+  // CK19-64-USE-DAG: [[VAR11]] = zext i32 %{{[^,]+}} to i64
   //
-  // CK19-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to i[[Z]]*
-  // CK19-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR2:%.+]], i[[Z]]* [[CBP2]]
-  // CK19-DAG: store i[[Z]] [[VAR22:%.+]], i[[Z]]* [[CP2]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S2]]
-  // CK19-64-DAG: [[VAR2]] = zext i32 %{{[^,]+}} to i64
-  // CK19-64-DAG: [[VAR22]] = zext i32 %{{[^,]+}} to i64
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR2:%.+]], i[[Z]]* [[CBP2]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR22:%.+]], i[[Z]]* [[CP2]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S2]]
+  // CK19-64-USE-DAG: [[VAR2]] = zext i32 %{{[^,]+}} to i64
+  // CK19-64-USE-DAG: [[VAR22]] = zext i32 %{{[^,]+}} to i64
   //
-  // CK19-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
-  // CK19-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
-  // CK19-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 3
-  // CK19-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to double**
-  // CK19-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to double**
-  // CK19-DAG: store double* [[VAR3:%.+]], double** [[CBP3]]
-  // CK19-DAG: store double* [[VAR3]], double** [[CP3]]
-  // CK19-DAG: store i64 [[CSVAL3:%[^,]+]], i64* [[S3]]
-  // CK19-DAG: [[CSVAL3]] = {{mul nuw i64 %[^,]+, 8|sext i32 .+ to i64}}
-
-  // CK19: call void [[CALL30:@.+]](i[[Z]] 23, i[[Z]] %{{[^,]+}}, i[[Z]] %{{[^,]+}}, double* %{{[^,]+}})
+  // CK19-USE-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
+  // CK19-USE-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
+  // CK19-USE-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 3
+  // CK19-USE-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to double**
+  // CK19-USE-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to double**
+  // CK19-USE-DAG: store double* [[VAR3:%.+]], double** [[CBP3]]
+  // CK19-USE-DAG: store double* [[VAR3]], double** [[CP3]]
+  // CK19-USE-DAG: store i64 [[CSVAL3:%[^,]+]], i64* [[S3]]
+  // CK19-USE-DAG: [[CSVAL3]] = {{mul nuw i64 %[^,]+, 8|sext i32 .+ to i64}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to double**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to double**
+  // CK19-NOUSE-DAG: store double* [[VAR0:%.+]], double** [[CBP0]]
+  // CK19-NOUSE-DAG: store double* [[VAR0]], double** [[CP0]]
+  // CK19-NOUSE-DAG: store i64 [[CSVAL0:%[^,]+]], i64* [[S0]]
+  // CK19-NOUSE-DAG: [[CSVAL0]] = {{mul nuw i64 %[^,]+, 8|sext i32 .+ to i64}}
+
+  // CK19-USE: call void [[CALL30:@.+]](i[[Z]] 23, i[[Z]] %{{[^,]+}}, i[[Z]] %{{[^,]+}}, double* %{{[^,]+}})
+  // CK19-NOUSE: call void [[CALL30:@.+]]()
   #pragma omp target map(tofrom: mva)
   {
+#ifdef USE
     mva[1][2][3]++;
+#endif
   }
 
   // Region 31
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE31]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE31]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|4}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[{{1|4}} x i{{.+}}]* [[SIZE31]], {{.+}}getelementptr {{.+}}[{{1|4}} x i{{.+}}]* [[MTYPE31]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   //
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] 23, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] 23, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] 23, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] 23, i[[Z]]* [[CP0]]
   //
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
-  // CK19-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
   //
-  // CK19-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to i[[Z]]*
-  // CK19-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR2:%.+]], i[[Z]]* [[CBP2]]
-  // CK19-DAG: store i[[Z]] [[VAR22:%.+]], i[[Z]]* [[CP2]]
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR2:%.+]], i[[Z]]* [[CBP2]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR22:%.+]], i[[Z]]* [[CP2]]
   //
-  // CK19-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
-  // CK19-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
-  // CK19-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to double**
-  // CK19-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to double**
-  // CK19-DAG: store double* [[VAR3:%.+]], double** [[CBP3]]
-  // CK19-DAG: store double* [[SEC3:%.+]], double** [[CP3]]
-  // CK19-DAG: [[SEC3]] = getelementptr {{.*}}double* [[SEC33:%.+]], i[[Z]] 0
-  // CK19-DAG: [[SEC33]] = getelementptr {{.*}}double* [[SEC333:%.+]], i[[Z]] [[IDX3:%.+]]
-  // CK19-DAG: [[IDX3]] = mul nsw i[[Z]] %{{[^,]+}}, %{{[^,]+}}
-  // CK19-DAG: [[SEC333]] = getelementptr {{.*}}double* [[VAR3]], i[[Z]] [[IDX33:%.+]]
-  // CK19-DAG: [[IDX33]] = mul nsw i[[Z]] 1, %{{[^,]+}}
-
-  // CK19: call void [[CALL31:@.+]](i[[Z]] 23, i[[Z]] %{{[^,]+}}, i[[Z]] %{{[^,]+}}, double* %{{[^,]+}})
+  // CK19-USE-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
+  // CK19-USE-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
+  // CK19-USE-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to double**
+  // CK19-USE-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to double**
+  // CK19-USE-DAG: store double* [[VAR3:%.+]], double** [[CBP3]]
+  // CK19-USE-DAG: store double* [[SEC3:%.+]], double** [[CP3]]
+  // CK19-USE-DAG: [[SEC3]] = getelementptr {{.*}}double* [[SEC33:%.+]], i[[Z]] 0
+  // CK19-USE-DAG: [[SEC33]] = getelementptr {{.*}}double* [[SEC333:%.+]], i[[Z]] [[IDX3:%.+]]
+  // CK19-USE-DAG: [[IDX3]] = mul nsw i[[Z]] %{{[^,]+}}, %{{[^,]+}}
+  // CK19-USE-DAG: [[SEC333]] = getelementptr {{.*}}double* [[VAR3]], i[[Z]] [[IDX33:%.+]]
+  // CK19-USE-DAG: [[IDX33]] = mul nsw i[[Z]] 1, %{{[^,]+}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to double**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to double**
+  // CK19-NOUSE-DAG: store double* [[VAR0:%.+]], double** [[CBP0]]
+  // CK19-NOUSE-DAG: store double* [[SEC0:%.+]], double** [[CP0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.*}}double* [[SEC00:%.+]], i[[Z:64|32]] 0
+  // CK19-NOUSE-DAG: [[SEC00]] = getelementptr {{.*}}double* [[SEC000:%.+]], i[[Z]] [[IDX0:%.+]]
+  // CK19-NOUSE-DAG: [[IDX0]] = mul nsw i[[Z]] %{{[^,]+}}, %{{[^,]+}}
+  // CK19-NOUSE-DAG: [[SEC000]] = getelementptr {{.*}}double* [[VAR0]], i[[Z]] [[IDX00:%.+]]
+  // CK19-NOUSE-DAG: [[IDX00]] = mul nsw i[[Z]] 1, %{{[^,]+}}
+
+  // CK19-USE: call void [[CALL31:@.+]](i[[Z]] 23, i[[Z]] %{{[^,]+}}, i[[Z]] %{{[^,]+}}, double* %{{[^,]+}})
+  // CK19-NOUSE: call void [[CALL31:@.+]]()
   #pragma omp target map(tofrom: mva[1][ii-2][:5])
   {
+#ifdef USE
     mva[1][2][3]++;
+#endif
   }
 
   // Multidimensional array sections.
@@ -2357,7 +2578,7 @@ void explicit_maps_single (int ii){
   double ***mptras;
 
   // Region 32
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE32]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE32]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE32]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE32]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2368,14 +2589,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store [11 x [12 x [13 x double]]]* [[VAR0:%.+]], [11 x [12 x [13 x double]]]** [[CBP0]]
   // CK19-DAG: store [11 x [12 x [13 x double]]]* [[VAR0]], [11 x [12 x [13 x double]]]** [[CP0]]
 
-  // CK19: call void [[CALL32:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL32:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL32:@.+]]()
   #pragma omp target map(marras)
   {
+#ifdef USE
     marras[1][2][3]++;
+#endif
   }
 
   // Region 33
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE33]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE33]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE33]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE33]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2387,14 +2611,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store [12 x [13 x double]]* [[SEC0:%.+]], [12 x [13 x double]]** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.+}}[11 x [12 x [13 x double]]]* [[VAR0]], i[[Z]] 0, i[[Z]] 0
 
-  // CK19: call void [[CALL33:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL33:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL33:@.+]]()
   #pragma omp target map(marras[:])
   {
+#ifdef USE
     marras[1][2][3]++;
+#endif
   }
 
   // Region 34
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE34]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE34]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE34]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE34]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2406,14 +2633,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store [12 x [13 x double]]* [[SEC0:%.+]], [12 x [13 x double]]** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.+}}[11 x [12 x [13 x double]]]* [[VAR0]], i[[Z]] 0, i[[Z]] 0
 
-  // CK19: call void [[CALL34:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL34:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL34:@.+]]()
   #pragma omp target map(marras[:][:][:])
   {
+#ifdef USE
     marras[1][2][3]++;
+#endif
   }
 
   // Region 35
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE35]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE35]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2431,14 +2661,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC00]] = getelementptr {{.+}}[11 x [12 x [13 x double]]]* [[VAR0]], i[[Z]] 0, i[[Z]] 1
   // CK19-DAG: [[CSVAL0]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
 
-  // CK19: call void [[CALL35:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL35:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL35:@.+]]()
   #pragma omp target map(marras[1][:ii][:])
   {
+#ifdef USE
     marras[1][2][3]++;
+#endif
   }
 
   // Region 36
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE36]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE36]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE36]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE36]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2452,215 +2685,289 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC00]] = getelementptr {{.+}}[12 x [13 x double]]* [[SEC000:%[^,]+]], i{{.+}} 0, i{{.+}} 0
   // CK19-DAG: [[SEC000]] = getelementptr {{.+}}[11 x [12 x [13 x double]]]* [[VAR0]], i{{.+}} 0, i{{.+}} 0
 
-  // CK19: call void [[CALL36:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL36:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL36:@.+]]()
   #pragma omp target map(marras[:1][:2][:13])
   {
+#ifdef USE
     marras[1][2][3]++;
+#endif
   }
 
   // Region 37
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE37]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|3}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|3}} x i{{.+}}]* [[MTYPE37]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
   //
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S0]]
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S0]]
   //
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
-  // CK19-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S1]]
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S1]]
   //
-  // CK19-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
-  // CK19-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
-  // CK19-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
-  // CK19-DAG: store [13 x double]* [[VAR2]], [13 x double]** [[CP2]]
-  // CK19-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
-  // CK19-DAG: [[CSVAL2]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
-
-  // CK19: call void [[CALL37:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
+  // CK19-USE-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
+  // CK19-USE-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
+  // CK19-USE-DAG: store [13 x double]* [[VAR2]], [13 x double]** [[CP2]]
+  // CK19-USE-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
+  // CK19-USE-DAG: [[CSVAL2]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [13 x double]**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [13 x double]**
+  // CK19-NOUSE-DAG: store [13 x double]* [[VAR0:%.+]], [13 x double]** [[CBP0]]
+  // CK19-NOUSE-DAG: store [13 x double]* [[VAR0]], [13 x double]** [[CP0]]
+  // CK19-NOUSE-DAG: store i64 [[CSVAL0:%[^,]+]], i64* [[S0]]
+  // CK19-NOUSE-DAG: [[CSVAL0]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
+
+  // CK19-USE: call void [[CALL37:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-NOUSE: call void [[CALL37:@.+]]()
   #pragma omp target map(mvlaas)
   {
+#ifdef USE
     mvlaas[1][2][3]++;
+#endif
   }
 
   // Region 38
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE38]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|3}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|3}} x i{{.+}}]* [[MTYPE38]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
   //
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S0]]
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S0]]
   //
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
-  // CK19-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S1]]
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S1]]
   //
-  // CK19-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
-  // CK19-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
-  // CK19-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
-  // CK19-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
-  // CK19-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
-  // CK19-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC22:%[^,]+]]
-  // CK19-DAG: [[SEC22]] = mul nsw i[[Z]] 0, %{{[^,]+}}
-  // CK19-DAG: [[CSVAL2]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
-
-  // CK19: call void [[CALL38:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
+  // CK19-USE-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
+  // CK19-USE-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
+  // CK19-USE-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
+  // CK19-USE-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
+  // CK19-USE-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC22:%[^,]+]]
+  // CK19-USE-DAG: [[SEC22]] = mul nsw i[[Z]] 0, %{{[^,]+}}
+  // CK19-USE-DAG: [[CSVAL2]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [13 x double]**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [13 x double]**
+  // CK19-NOUSE-DAG: store [13 x double]* [[VAR0:%.+]], [13 x double]** [[CBP0]]
+  // CK19-NOUSE-DAG: store [13 x double]* [[SEC0:%.+]], [13 x double]** [[CP0]]
+  // CK19-NOUSE-DAG: store i64 [[CSVAL0:%[^,]+]], i64* [[S0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.+}}[13 x double]* [[VAR0]], i[[Z]] [[SEC00:%[^,]+]]
+  // CK19-NOUSE-DAG: [[SEC00]] = mul nsw i[[Z]] 0, %{{[^,]+}}
+  // CK19-NOUSE-DAG: [[CSVAL0]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
+
+  // CK19-USE: call void [[CALL38:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-NOUSE: call void [[CALL38:@.+]]()
   #pragma omp target map(mvlaas[:])
   {
+#ifdef USE
     mvlaas[1][2][3]++;
+#endif
   }
 
   // Region 39
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE39]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|3}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|3}} x i{{.+}}]* [[MTYPE39]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
   //
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S0]]
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S0]]
   //
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
-  // CK19-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S1]]
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S1]]
   //
-  // CK19-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
-  // CK19-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
-  // CK19-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
-  // CK19-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
-  // CK19-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
-  // CK19-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC22:%[^,]+]]
-  // CK19-DAG: [[SEC22]] = mul nsw i[[Z]] 0, %{{[^,]+}}
-  // CK19-DAG: [[CSVAL2]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
-
-  // CK19: call void [[CALL39:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
+  // CK19-USE-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
+  // CK19-USE-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
+  // CK19-USE-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
+  // CK19-USE-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
+  // CK19-USE-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC22:%[^,]+]]
+  // CK19-USE-DAG: [[SEC22]] = mul nsw i[[Z]] 0, %{{[^,]+}}
+  // CK19-USE-DAG: [[CSVAL2]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [13 x double]**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [13 x double]**
+  // CK19-NOUSE-DAG: store [13 x double]* [[VAR0:%.+]], [13 x double]** [[CBP0]]
+  // CK19-NOUSE-DAG: store [13 x double]* [[SEC0:%.+]], [13 x double]** [[CP0]]
+  // CK19-NOUSE-DAG: store i64 [[CSVAL0:%[^,]+]], i64* [[S0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.+}}[13 x double]* [[VAR0]], i[[Z]] [[SEC00:%[^,]+]]
+  // CK19-NOUSE-DAG: [[SEC00]] = mul nsw i[[Z]] 0, %{{[^,]+}}
+  // CK19-NOUSE-DAG: [[CSVAL0]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
+
+  // CK19-USE: call void [[CALL39:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-NOUSE: call void [[CALL39:@.+]]()
   #pragma omp target map(mvlaas[:][:][:])
   {
+#ifdef USE
     mvlaas[1][2][3]++;
+#endif
   }
 
   // Region 40
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE40]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|3}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|3}} x i{{.+}}]* [[MTYPE40]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
   //
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S0]]
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S0]]
   //
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
-  // CK19-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S1]]
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S1]]
   //
-  // CK19-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
-  // CK19-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
-  // CK19-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
-  // CK19-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
-  // CK19-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
-  // CK19-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[SEC22:%[^,]+]], i[[Z]] 0
-  // CK19-DAG: [[SEC22]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC222:%[^,]+]]
-  // CK19-DAG: [[SEC222]] = mul nsw i[[Z]] 1, %{{[^,]+}}
-
-  // CK19: call void [[CALL40:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
+  // CK19-USE-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
+  // CK19-USE-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
+  // CK19-USE-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
+  // CK19-USE-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
+  // CK19-USE-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[SEC22:%[^,]+]], i[[Z]] 0
+  // CK19-USE-DAG: [[SEC22]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC222:%[^,]+]]
+  // CK19-USE-DAG: [[SEC222]] = mul nsw i[[Z]] 1, %{{[^,]+}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [13 x double]**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [13 x double]**
+  // CK19-NOUSE-DAG: store [13 x double]* [[VAR0:%.+]], [13 x double]** [[CBP0]]
+  // CK19-NOUSE-DAG: store [13 x double]* [[SEC0:%.+]], [13 x double]** [[CP0]]
+  // CK19-NOUSE-DAG: store i64 [[CSVAL0:%[^,]+]], i64* [[S0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.+}}[13 x double]* [[SEC00:%[^,]+]], i[[Z]] 0
+  // CK19-NOUSE-DAG: [[SEC00]] = getelementptr {{.+}}[13 x double]* [[VAR0]], i[[Z]] [[SEC000:%[^,]+]]
+  // CK19-NOUSE-DAG: [[SEC000]] = mul nsw i[[Z]] 1, %{{[^,]+}}
+
+  // CK19-USE: call void [[CALL40:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-NOUSE: call void [[CALL40:@.+]]()
   #pragma omp target map(mvlaas[1][:ii][:])
   {
+#ifdef USE
     mvlaas[1][2][3]++;
+#endif
   }
 
   // Region 41
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE41]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE41]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|3}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[{{1|3}} x i{{.+}}]* [[SIZE41]], {{.+}}getelementptr {{.+}}[{{1|3}} x i{{.+}}]* [[MTYPE41]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   //
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
   //
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
-  // CK19-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
   //
-  // CK19-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
-  // CK19-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
-  // CK19-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
-  // CK19-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
-  // CK19-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[SEC22:%[^,]+]], i[[Z]] 0
-  // CK19-DAG: [[SEC22]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC222:%[^,]+]]
-  // CK19-DAG: [[SEC222]] = mul nsw i[[Z]] 0, %{{[^,]+}}
-
-  // CK19: call void [[CALL41:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
+  // CK19-USE-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
+  // CK19-USE-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
+  // CK19-USE-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
+  // CK19-USE-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[SEC22:%[^,]+]], i[[Z]] 0
+  // CK19-USE-DAG: [[SEC22]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC222:%[^,]+]]
+  // CK19-USE-DAG: [[SEC222]] = mul nsw i[[Z]] 0, %{{[^,]+}}
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+
+  // CK19-NO-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NO-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [13 x double]**
+  // CK19-NO-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [13 x double]**
+  // CK19-NO-USE-DAG: store [13 x double]* [[VAR0:%.+]], [13 x double]** [[CBP0]]
+  // CK19-NO-USE-DAG: store [13 x double]* [[SEC0:%.+]], [13 x double]** [[CP0]]
+  // CK19-NO-USE-DAG: [[SEC0]] = getelementptr {{.+}}[13 x double]* [[SEC00:%[^,]+]], i[[Z]] 0
+  // CK19-NO-USE-DAG: [[SEC00]] = getelementptr {{.+}}[13 x double]* [[VAR0]], i[[Z]] [[SEC000:%[^,]+]]
+  // CK19-NO-USE-DAG: [[SEC000]] = mul nsw i[[Z]] 0, %{{[^,]+}}
+
+  // CK19-USE: call void [[CALL41:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-NOUSE: call void [[CALL41:@.+]]()
   #pragma omp target map(mvlaas[:1][:2][:13])
   {
+#ifdef USE
     mvlaas[1][2][3]++;
+#endif
   }
 
   // Region 42
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE42]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE42]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE42]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE42]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2698,14 +3005,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC22222]] = getelementptr {{.*}}double*** [[SEC222222:[^,]+]], i{{.+}} 0
   // CK19-DAG: [[SEC222222]] = load double***, double**** [[PTR]],
 
-  // CK19: call void [[CALL42:@.+]](double*** {{[^,]+}})
+  // CK19-USE: call void [[CALL42:@.+]](double*** {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL42:@.+]]()
   #pragma omp target map(mptras[:1][2][:13])
   {
+#ifdef USE
     mptras[1][2][3]++;
+#endif
   }
 
   // Region 43 - the memory is not contiguous for this map - will map the whole last dimension.
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE43]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE43]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2723,14 +3033,17 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC00]] = getelementptr {{.+}}[11 x [12 x [13 x double]]]* [[VAR0]], i[[Z]] 0, i[[Z]] 1
   // CK19-DAG: [[CSVAL0]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
 
-  // CK19: call void [[CALL43:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL43:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL43:@.+]]()
   #pragma omp target map(marras[1][:ii][1:])
   {
+#ifdef USE
     marras[1][2][3]++;
+#endif
   }
 
   // Region 44
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE44]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE44]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE44]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE44]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2742,10 +3055,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[SEC0:%[^,]+]], i32** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} 20
 
-  // CK19: call void [[CALL44:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL44:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL44:@.+]]()
   #pragma omp target map(from:arra[20:])
   {
+#ifdef USE
     arra[50]++;
+#endif
   }
 
 }
@@ -2839,7 +3155,7 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f
   float *&dd = d;
 
   // Region 00
-  // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK20-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2859,7 +3175,7 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f
   }
 
   // Region 01
-  // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK20-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
   // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2880,7 +3196,7 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f
   }
 
   // Region 02
-  // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK20-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2898,7 +3214,7 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f
   }
 
   // Region 03
-  // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK20-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2926,12 +3242,26 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f
 
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK21 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s --check-prefix CK21 --check-prefix CK21-64
+// RUN: %clang_cc1 -DUSE -DCK21 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s --check-prefixes=CK21,CK21-64,CK21-USE
+// RUN: %clang_cc1 -DUSE -DCK21 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK21,CK21-64,CK21-USE
+// RUN: %clang_cc1 -DUSE -DCK21 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK21,CK21-32,CK21-USE
+// RUN: %clang_cc1 -DUSE -DCK21 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK21,CK21-32,CK21-USE
+
+// RUN: %clang_cc1 -DUSE -DCK21 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY20 %s
+// RUN: %clang_cc1 -DUSE -DCK21 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY20 %s
+// RUN: %clang_cc1 -DUSE -DCK21 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY20 %s
+// RUN: %clang_cc1 -DUSE -DCK21 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY20 %s
+
+// RUN: %clang_cc1 -DCK21 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s --check-prefixes=CK21,CK21-64,CK21-NOUSE
 // RUN: %clang_cc1 -DCK21 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefix CK21 --check-prefix CK21-64
-// RUN: %clang_cc1 -DCK21 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefix CK21 --check-prefix CK21-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK21,CK21-64,CK21-NOUSE
+// RUN: %clang_cc1 -DCK21 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK21,CK21-32,CK21-NOUSE
 // RUN: %clang_cc1 -DCK21 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefix CK21 --check-prefix CK21-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK21,CK21-32,CK21-NOUSE
 
 // RUN: %clang_cc1 -DCK21 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY20 %s
 // RUN: %clang_cc1 -DCK21 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
@@ -2939,6 +3269,7 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f
 // RUN: %clang_cc1 -DCK21 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY20 %s
 // RUN: %clang_cc1 -DCK21 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY20 %s
+
 // SIMD-ONLY20-NOT: {{__kmpc|__tgt}}
 #ifdef CK21
 // CK21: [[ST:%.+]] = type { i32, i32, float* }
@@ -2977,7 +3308,7 @@ struct CC {
     T *lb;
 
     // Region 00
-    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
     // CK21-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3002,14 +3333,17 @@ struct CC {
     // CK21-DAG: store i64 {{.+}}, i64* [[S1]]
     // CK21-DAG: [[SEC1]] = getelementptr {{.*}}[[ST]]* [[VAR1:%.+]], i{{.+}} 0, i{{.+}} 0
 
-    // CK21: call void [[CALL00:@.+]]([[ST]]* {{[^,]+}})
+    // CK21-USE: call void [[CALL00:@.+]]([[ST]]* {{[^,]+}})
+    // CK21-NOUSE: call void [[CALL00:@.+]]()
     #pragma omp target map(A)
     {
+#ifdef USE
       A += 1;
+#endif
     }
 
     // Region 01
-    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3023,14 +3357,17 @@ struct CC {
     // CK21-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} 0
     // CK21-DAG: [[RVAR00]] = load i32*, i32** [[VAR0]]
 
-    // CK21: call void [[CALL01:@.+]](i32* {{[^,]+}})
+    // CK21-USE: call void [[CALL01:@.+]](i32* {{[^,]+}})
+    // CK21-NOUSE: call void [[CALL01:@.+]]()
     #pragma omp target map(lb[:X])
     {
+#ifdef USE
       lb[4] += 1;
+#endif
     }
 
     // Region 02
-    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
     // CK21-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3057,14 +3394,17 @@ struct CC {
     // CK21-DAG: [[RVAR1]] = load float*, float** [[SEC1_:%[^,]+]]
     // CK21-DAG: [[SEC1_]] = getelementptr {{.*}}[[ST]]* [[VAR0]], i{{.+}} 0, i{{.+}} 2
 
-    // CK21: call void [[CALL02:@.+]]([[ST]]* {{[^,]+}})
+    // CK21-USE: call void [[CALL02:@.+]]([[ST]]* {{[^,]+}})
+    // CK21-NOUSE: call void [[CALL02:@.+]]()
     #pragma omp target map(from:B[X:X+2])
     {
+#ifdef USE
       B[2] += 1.0f;
+#endif
     }
 
     // Region 03
-    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3075,14 +3415,17 @@ struct CC {
     // CK21-DAG: store [123 x float]* [[VAR0:%.+]], [123 x float]** [[CBP0]]
     // CK21-DAG: store [123 x float]* [[VAR0]], [123 x float]** [[CP0]]
 
-    // CK21: call void [[CALL03:@.+]]([123 x float]* {{[^,]+}})
+    // CK21-USE: call void [[CALL03:@.+]]([123 x float]* {{[^,]+}})
+    // CK21-NOUSE: call void [[CALL03:@.+]]()
     #pragma omp target map(from:la)
     {
+#ifdef USE
       la[3] += 1.0f;
+#endif
     }
 
     // Region 04
-    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3093,15 +3436,18 @@ struct CC {
     // CK21-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
     // CK21-DAG: store i32* [[VAR0]], i32** [[CP0]]
 
-    // CK21: call void [[CALL04:@.+]](i32* {{[^,]+}})
+    // CK21-USE: call void [[CALL04:@.+]](i32* {{[^,]+}})
+    // CK21-NOUSE: call void [[CALL04:@.+]]()
     #pragma omp target map(from:arg)
     {
+#ifdef USE
       arg +=1;
+#endif
     }
 
     // Make sure the extra flag is passed to the second map.
     // Region 05
-    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE05]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
     // CK21-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3135,11 +3481,14 @@ struct CC {
     // CK21-DAG: store i64 {{.+}}, i64* [[S2]]
     // CK21-DAG: [[SEC2]] = getelementptr {{.*}}[[ST]]* [[VAR2]], i{{.+}} 0, i{{.+}} 1
 
-    // CK21: call void [[CALL05:@.+]]([[ST]]* {{[^,]+}})
+    // CK21-USE: call void [[CALL05:@.+]]([[ST]]* {{[^,]+}})
+    // CK21-NOUSE: call void [[CALL05:@.+]]()
     #pragma omp target map(A, A2)
     {
+#ifdef USE
       A += 1;
       A2 += 1;
+#endif
     }
     return A;
   }
@@ -3261,7 +3610,7 @@ STT *std;
 // CK22-LABEL: explicit_maps_globals{{.*}}(
 int explicit_maps_globals(void){
   // Region 00
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3277,7 +3626,7 @@ int explicit_maps_globals(void){
   { a+=1; }
 
   // Region 01
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3293,7 +3642,7 @@ int explicit_maps_globals(void){
   { c[3]+=1; }
 
   // Region 02
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3309,7 +3658,7 @@ int explicit_maps_globals(void){
   { d[3]+=1; }
 
   // Region 03
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3325,7 +3674,7 @@ int explicit_maps_globals(void){
   { c[3]+=1; }
 
   // Region 04
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3344,7 +3693,7 @@ int explicit_maps_globals(void){
   { d[3]+=1; }
 
   // Region 05
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3360,7 +3709,7 @@ int explicit_maps_globals(void){
   { sa.fa+=1; }
 
   // Region 06
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE06]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE06]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3376,7 +3725,7 @@ int explicit_maps_globals(void){
   { sc[3].fa+=1; }
 
   // Region 07
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE07]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE07]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3392,7 +3741,7 @@ int explicit_maps_globals(void){
   { sd[3].fa+=1; }
 
   // Region 08
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3408,7 +3757,7 @@ int explicit_maps_globals(void){
   { sc[3].fa+=1; }
 
   // Region 09
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3427,7 +3776,7 @@ int explicit_maps_globals(void){
   { sd[3].fa+=1; }
 
   // Region 10
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3443,7 +3792,7 @@ int explicit_maps_globals(void){
   { sta.fa+=1; }
 
   // Region 11
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3459,7 +3808,7 @@ int explicit_maps_globals(void){
   { stc[3].fa+=1; }
 
   // Region 12
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3475,7 +3824,7 @@ int explicit_maps_globals(void){
   { std[3].fa+=1; }
 
   // Region 13
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE13]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE13]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3491,7 +3840,7 @@ int explicit_maps_globals(void){
   { stc[3].fa+=1; }
 
   // Region 14
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE14]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE14]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3578,7 +3927,7 @@ int explicit_maps_inside_captured(int a){
   // CK23: define {{.*}}explicit_maps_inside_captured{{.*}}
   [&](void){
     // Region 00
-    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3597,7 +3946,7 @@ int explicit_maps_inside_captured(int a){
     #pragma omp target map(a)
       { a+=1; }
     // Region 01
-    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3616,7 +3965,7 @@ int explicit_maps_inside_captured(int a){
     #pragma omp target map(b)
       { b+=1; }
     // Region 02
-    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3636,7 +3985,7 @@ int explicit_maps_inside_captured(int a){
       { c[3]+=1; }
 
     // Region 03
-    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3655,7 +4004,7 @@ int explicit_maps_inside_captured(int a){
     #pragma omp target map(d)
       { d[3]+=1; }
     // Region 04
-    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3676,7 +4025,7 @@ int explicit_maps_inside_captured(int a){
       { c[3]+=1; }
 
     // Region 05
-    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3793,7 +4142,7 @@ int explicit_maps_struct_fields(int a){
   SC *p;
 
 // Region 01
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3825,7 +4174,7 @@ int explicit_maps_struct_fields(int a){
 // Same thing but starting from a pointer.
 //
 // Region 13
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE13]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE13]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3857,7 +4206,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 14
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE14]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE14]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3890,7 +4239,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 15
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE15]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE15]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3924,7 +4273,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 16
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE16]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE16]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3957,7 +4306,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 17
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3993,7 +4342,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 18
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE18]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE18]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4028,7 +4377,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 19
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE19]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE19]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4075,7 +4424,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 20
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4109,7 +4458,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 21
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE21]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE21]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4156,7 +4505,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 22
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE22]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE22]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4191,7 +4540,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 23
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE23]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE23]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4239,7 +4588,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 24
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE24]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE24]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4359,7 +4708,7 @@ struct CC {
 
   int foo(T arg) {
     // Region 00
-    // CK25-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK25-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
     // CK25-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK25-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
     // CK25-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4392,7 +4741,7 @@ struct CC {
     }
 
     // Region 01
-    // CK25-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+    // CK25-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
     // CK25-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK25-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4487,7 +4836,7 @@ struct CC {
     #pragma omp parallel firstprivate(fA,fB) private(pA,pB)
     {
       // Region 00
-      // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+      // CK26-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
       // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
       // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4514,7 +4863,7 @@ struct CC {
       }
 
       // Region 01
-      // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}})
+      // CK26-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
       // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
       // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4541,7 +4890,7 @@ struct CC {
       }
 
       // Region 02
-      // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}})
+      // CK26-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
       // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
       // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4568,7 +4917,7 @@ struct CC {
       }
 
       // Region 01
-      // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE03]]{{.+}})
+      // CK26-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
       // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
       // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4697,7 +5046,7 @@ void zero_size_section_and_private_maps (int ii){
   int *pa;
 
   // Region 00
-  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4715,7 +5064,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 01
-  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4736,7 +5085,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 02
-  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4757,7 +5106,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 03
-  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4782,7 +5131,7 @@ void zero_size_section_and_private_maps (int ii){
   int pvtArr[10];
 
   // Region 04
-  // CK27: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null)
+  // CK27: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null)
   // CK27: call void [[CALL04:@.+]]()
   #pragma omp target private(pvtPtr)
   {
@@ -4790,7 +5139,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 05
-  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4808,7 +5157,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 06
-  // CK27: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null)
+  // CK27: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null)
   // CK27: call void [[CALL06:@.+]]()
   #pragma omp target private(pvtScl)
   {
@@ -4816,7 +5165,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 07
-  // CK27-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZE07]]{{.+}}, {{.+}}[[MTYPE07]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZE07]]{{.+}}, {{.+}}[[MTYPE07]]{{.+}}, i8** null)
   // CK27-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK27-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK27-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -4836,7 +5185,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 08
-  // CK27: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null)
+  // CK27: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null)
   // CK27: call void [[CALL08:@.+]]()
   #pragma omp target private(pvtArr)
   {
@@ -4844,7 +5193,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 09
-  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4901,7 +5250,7 @@ void explicit_maps_pointer_references (int *p){
   int *&a = p;
 
   // Region 00
-  // CK28-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK28-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK28-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK28-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4921,7 +5270,7 @@ void explicit_maps_pointer_references (int *p){
   }
 
   // Region 01
-  // CK28-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK28-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
   // CK28-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK28-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4988,7 +5337,7 @@ struct SSB{
   void foo() {
 
     // Region 00
-    // CK29-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK29-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 
     // CK29-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK29-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
@@ -5034,7 +5383,7 @@ struct SSB{
     }
 
     // Region 01
-    // CK29-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE01]]{{.+}})
+    // CK29-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
 
     // CK29-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK29-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
@@ -5079,7 +5428,7 @@ struct SSB{
     }
 
     // Region 02
-    // CK29-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE02]]{{.+}})
+    // CK29-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
 
     // CK29-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK29-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
@@ -5172,7 +5521,7 @@ typedef struct StructWithPtrTag : public Base {
   int *ptr1;
 } StructWithPtr;
 
-// CK30-DAG: call i32 @__tgt_target(i64 -1, i8* @.__omp_offloading_{{.*}}map_with_deep_copy{{.*}}_l{{[0-9]+}}.region_id, i32 6, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MTYPE00]], i32 0, i32 0))
+// CK30-DAG: call i32 @__tgt_target_mapper(i64 -1, i8* @.__omp_offloading_{{.*}}map_with_deep_copy{{.*}}_l{{[0-9]+}}.region_id, i32 6, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MTYPE00]], i32 0, i32 0), i8** null)
 // CK30-DAG: [[GEPS]] = getelementptr inbounds [6 x i{{64|32}}], [6 x i64]* [[SIZES:%.+]], i32 0, i32 0
 // CK30-DAG: [[GEPP]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[PTRS:%.+]], i32 0, i32 0
 // CK30-DAG: [[GEPBP]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BASES:%.+]], i32 0, i32 0
@@ -5314,7 +5663,7 @@ void explicit_maps_single (int ii){
 
   // Close.
   // Region 00
-  // CK31-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK31-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK31-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK31-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -5333,7 +5682,7 @@ void explicit_maps_single (int ii){
 
   // Always Close.
   // Region 01
-  // CK31-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK31-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
   // CK31-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK31-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -5376,7 +5725,7 @@ void explicit_maps_single (int ii){
 
 void array_shaping(float *f, int sa) {
 
-  // CK32-DAG: call i32 @__tgt_target(i64 -1, i8* @{{.+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_TO]]{{.+}})
+  // CK32-DAG: call i32 @__tgt_target_mapper(i64 -1, i8* @{{.+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_TO]]{{.+}}, i8** null)
   // CK32-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK32-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK32-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -5402,7 +5751,7 @@ void array_shaping(float *f, int sa) {
   #pragma omp target map(to:([3][sa][4])f)
   f[0] = 1;
   sa = 1;
-  // CK32-DAG: call i32 @__tgt_target(i64 -1, i8* @{{.+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_FROM]]{{.+}})
+  // CK32-DAG: call i32 @__tgt_target_mapper(i64 -1, i8* @{{.+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_FROM]]{{.+}}, i8** null)
   // CK32-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK32-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK32-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
diff --git a/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp b/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp
index ccdd092350973..cc3df28b9839a 100644
--- a/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp
+++ b/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp
@@ -60,7 +60,7 @@ struct maptest {
     // CHECK: [[BPTR:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BPTRS]], i32 0, i32 0
     // CHECK: [[PTR:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS]], i32 0, i32 0
     // CHECK: [[SIZE:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[SIZES]], i32 0, i32 0
-    // CHECK: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAP_ENTER]], i32 0, i32 0))
+    // CHECK: call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAP_ENTER]], i32 0, i32 0), i8** null)
 #pragma omp target enter data map(alloc : s.data[:6])
   }
 
@@ -104,7 +104,7 @@ struct maptest {
     // CHECK: [[BPTR:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BPTRS]], i32 0, i32 0
     // CHECK: [[PTR:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS]], i32 0, i32 0
     // CHECK: [[SIZE:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[SIZES]], i32 0, i32 0
-    // CHECK: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAP_EXIT]], i32 0, i32 0))
+    // CHECK: call void @__tgt_target_data_end_mapper(i64 -1, i32 2, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAP_EXIT]], i32 0, i32 0), i8** null)
 #pragma omp target exit data map(delete : s.data[:6])
   }
 };
diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp
index 2e094c294dfa0..b8a002f34789a 100644
--- a/clang/test/OpenMP/target_parallel_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_codegen.cpp
@@ -98,7 +98,7 @@ int foo(int n) {
   double cn[5][n];
   TT d;
 
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 0)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
@@ -115,7 +115,7 @@ int foo(int n) {
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -140,7 +140,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -196,7 +196,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -540,7 +540,7 @@ int bar(int n){
 // CHECK-32:    [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
 
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
@@ -607,7 +607,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -657,7 +657,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
diff --git a/clang/test/OpenMP/target_parallel_default_messages.cpp b/clang/test/OpenMP/target_parallel_default_messages.cpp
index 0691cdf37e4eb..c8f68659438fe 100644
--- a/clang/test/OpenMP/target_parallel_default_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_default_messages.cpp
@@ -2,20 +2,29 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   #pragma omp target parallel default // expected-error {{expected '(' after 'default'}}
   foo();
-  #pragma omp target parallel default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target parallel default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   foo();
-  #pragma omp target parallel default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target parallel default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
   #pragma omp target parallel default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
   foo();
   #pragma omp target parallel default (shared), default(shared) // expected-error {{directive '#pragma omp target parallel' cannot contain more than one 'default' clause}}
   foo();
-  #pragma omp target parallel default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target parallel default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
 
   #pragma omp target parallel default(none) // expected-note {{explicit data sharing attribute requested here}}
@@ -28,5 +37,14 @@ int main(int argc, char **argv) {
   #pragma omp target parallel default(none) // expected-note {{explicit data sharing attribute requested here}}
   #pragma omp parallel default(shared)
   ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
+
+#ifndef OMP51
+#pragma omp target parallel default(firstprivate) // expected-error {{data-sharing attribute 'firstprivate' in 'default' clause requires OpenMP version 5.1 or above}}
+  {
+    ++x;
+    ++y;
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/target_parallel_depend_codegen.cpp b/clang/test/OpenMP/target_parallel_depend_codegen.cpp
index 71d02ec19b4be..7d2dc6fe20e27 100644
--- a/clang/test/OpenMP/target_parallel_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 1, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 1, i32 0)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp
index e8590530a0d89..bc78515edd4c8 100644
--- a/clang/test/OpenMP/target_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp
@@ -102,7 +102,7 @@ int foo(int n) {
   double cn[5][n];
   TT d;
 
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 0)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
@@ -122,7 +122,7 @@ int foo(int n) {
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0
@@ -159,7 +159,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -217,7 +217,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [10 x i64], [10 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -560,7 +560,7 @@ int bar(int n){
 // CHECK-32:    [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
 
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
@@ -635,7 +635,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -685,7 +685,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
diff --git a/clang/test/OpenMP/target_parallel_for_default_messages.cpp b/clang/test/OpenMP/target_parallel_for_default_messages.cpp
index fc6ba43138d76..4a3aae68e0865 100644
--- a/clang/test/OpenMP/target_parallel_for_default_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_for_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -DOMP51 -ferror-limit 100 -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=51 -DOMP51 -ferror-limit 100 -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   int i;
 #pragma omp target parallel for default // expected-error {{expected '(' after 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp target parallel for default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp target parallel for default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target parallel for default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{explicit data sharing attribute requested here}}
@@ -21,7 +30,7 @@ int main(int argc, char **argv) {
 #pragma omp target parallel for default(shared), default(shared) // expected-error {{directive '#pragma omp target parallel for' cannot contain more than one 'default' clause}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp target parallel for default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 
@@ -34,5 +43,13 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i) // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
     foo();
 
+#ifndef OMP51
+#pragma omp target parallel for default(firstprivate) // expected-error {{data-sharing attribute 'firstprivate' in 'default' clause requires OpenMP version 5.1 or above}}
+  for (i = 0; i < argc; ++i) {
+    ++x;
+    ++y;
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/target_parallel_for_depend_codegen.cpp b/clang/test/OpenMP/target_parallel_for_depend_codegen.cpp
index 04680950a1596..fcb06bb83a255 100644
--- a/clang/test/OpenMP/target_parallel_for_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[IN]], i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[IN]], i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 1, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 1, i32 0)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
index 055d5dce28bbd..5bb93b222b78c 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
@@ -130,7 +130,7 @@ int foo(int n) {
   double cn[5][n];
   TT d;
 
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 0)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
@@ -148,7 +148,7 @@ int foo(int n) {
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0
@@ -185,7 +185,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -243,7 +243,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [10 x i64], [10 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -600,8 +600,8 @@ int bar(int n){
 // CHECK-32:    [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
 
-// OMP45-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
-// OMP50-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 7, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 %{{.+}})
+// OMP45-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 1, i32 0)
+// OMP50-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 7, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 1, i32 %{{.+}})
 // OMP45-DAG:   [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
 // OMP45-DAG:   [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
 // OMP45-DAG:   [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
@@ -707,7 +707,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -757,7 +757,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
diff --git a/clang/test/OpenMP/target_parallel_for_simd_default_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_default_messages.cpp
index daa93b9c9050b..48489309ef037 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_default_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   int i;
 #pragma omp target parallel for simd default // expected-error {{expected '(' after 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp target parallel for simd default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp target parallel for simd default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target parallel for simd default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{explicit data sharing attribute requested here}}
@@ -21,7 +30,7 @@ int main(int argc, char **argv) {
 #pragma omp target parallel for simd default(shared), default(shared) // expected-error {{directive '#pragma omp target parallel for simd' cannot contain more than one 'default' clause}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp target parallel for simd default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 
@@ -34,5 +43,13 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i) // expected-error {{variable 'i' must have explicitly specified data sharing attributes}} expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
     foo();
 
+#ifndef OMP51
+#pragma omp target parallel for simd default(firstprivate) // expected-error {{data-sharing attribute 'firstprivate' in 'default' clause requires OpenMP version 5.1 or above}}
+  for (int i = 0; i < argc; i++) {
+    ++x;
+    ++y;
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/target_parallel_for_simd_depend_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_depend_codegen.cpp
index 66a065fb98ac8..a26033840c4fb 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[IN]], i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[IN]], i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 1, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 1, i32 0)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp
index e7c3abee6be47..4c08742a2fba7 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 0)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp
index f04b8108cd0ea..67ca60e51c2cf 100644
--- a/clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 0)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_parallel_if_codegen.cpp b/clang/test/OpenMP/target_parallel_if_codegen.cpp
index b315362735fec..69e7f77231445 100644
--- a/clang/test/OpenMP/target_parallel_if_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_if_codegen.cpp
@@ -151,7 +151,7 @@ int bar(int n){
 // CHECK:       store i8 [[FB]], i8* [[CONV]], align
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i32 1, i32 [[NT:%.+]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i8** null, i32 1, i32 [[NT:%.+]])
 // CHECK-DAG:   [[NT]] = select i1 %{{.+}}, i32 0, i32 1
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
@@ -178,7 +178,7 @@ int bar(int n){
 // CHECK:       br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]]
 //
 // CHECK:       [[IF_THEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[NT:%.+]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i8** null, i32 1, i32 [[NT:%.+]])
 // CHECK-DAG:   [[NT]] = select i1 %{{.+}}, i32 0, i32 1
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
@@ -211,7 +211,7 @@ int bar(int n){
 // CHECK:       br i1 [[TB]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]]
 //
 // CHECK:       [[IF_THEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[NT:%.+]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i8** null, i32 1, i32 [[NT:%.+]])
 // CHECK-DAG:   [[NT]] = select i1 %{{.+}}, i32 0, i32 1
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
@@ -233,7 +233,7 @@ int bar(int n){
 // CHECK:       br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]]
 //
 // CHECK:       [[IF_THEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i8** null, i32 1, i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -254,7 +254,7 @@ int bar(int n){
 //
 // CHECK: define {{.*}}[[FTEMPLATE]]
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 1)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i8** null, i32 1, i32 1)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -266,7 +266,7 @@ int bar(int n){
 //
 //
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i8** null, i32 1, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
diff --git a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
index f12248d6458ca..49e1f6af68b40 100644
--- a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
@@ -153,7 +153,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i8** null, i32 1, i32 [[THREADS]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -164,7 +164,7 @@ int bar(int n){
 //
 //
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.+}}, i32 1, i32 1024)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, {{.+}}, i8** null, i32 1, i32 1024)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -192,7 +192,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i8** null, i32 1, i32 [[THREADS]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -213,7 +213,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i8** null, i32 1, i32 [[THREADS]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -231,7 +231,7 @@ int bar(int n){
 //
 // CHECK: define {{.*}}[[FTEMPLATE]]
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 20)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i8** null, i32 1, i32 20)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -253,7 +253,7 @@ int bar(int n){
 // CHECK:       [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align
 // CHECK:       [[THREADS:%.+]] = zext i16 [[T]] to i32
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i8** null, i32 1, i32 [[THREADS]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
diff --git a/clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp
index d98f76261caf5..155d2b081f479 100644
--- a/clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 0)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_simd_codegen.cpp b/clang/test/OpenMP/target_simd_codegen.cpp
index 597cff7815a3b..5295312c7dd8a 100644
--- a/clang/test/OpenMP/target_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_simd_codegen.cpp
@@ -127,7 +127,7 @@ int foo(int n) {
   double cn[5][n];
   TT d;
 
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 1)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 1)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
@@ -145,7 +145,7 @@ int foo(int n) {
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 1)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i8** null, i32 1, i32 1)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0
@@ -182,7 +182,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 1, i32 1)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i32 1, i32 1)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -238,7 +238,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 1)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i32 1, i32 1)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -523,8 +523,8 @@ int bar(int n){
 // CHECK-32:    [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
 
-// OMP45-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 1)
-// OMP50-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x  i64], [6 x  i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 1)
+// OMP45-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 1, i32 1)
+// OMP50-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x  i64], [6 x  i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 1, i32 1)
 // OMP45-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
 // OMP45-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
 // OMP45-DAG:   [[SR]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S:%.+]], i32 0, i32 0
@@ -618,7 +618,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 1, i32 1)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i32 1, i32 1)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -668,7 +668,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 1, i32 1)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 1, i32 1)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
diff --git a/clang/test/OpenMP/target_simd_depend_codegen.cpp b/clang/test/OpenMP/target_simd_depend_codegen.cpp
index 72cd550207b67..001068d4c2c95 100644
--- a/clang/test/OpenMP/target_simd_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_simd_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 1)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 1)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 1, i32 1)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 1, i32 1)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp
index eaade4b9b5cd6..16800694d94e9 100644
--- a/clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 1)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 1, i32 1)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp
index 9011c3c0ff805..063003fbcc541 100644
--- a/clang/test/OpenMP/target_teams_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_codegen.cpp
@@ -107,7 +107,7 @@ int foo(int n) {
   double cn[5][n];
   TT d;
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}})
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** null, i32 {{[^,]+}}, i32 {{[^)]+}})
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -144,7 +144,7 @@ int foo(int n) {
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i8** null, i32 0, i32 0)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -168,7 +168,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i32 0, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -223,7 +223,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i32 0, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -582,7 +582,7 @@ int bar(int n){
 // CHECK-32:    [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
 
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
@@ -654,7 +654,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -704,7 +704,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
diff --git a/clang/test/OpenMP/target_teams_default_messages.cpp b/clang/test/OpenMP/target_teams_default_messages.cpp
index 21fa8270ef6a7..85c417f8f9853 100644
--- a/clang/test/OpenMP/target_teams_default_messages.cpp
+++ b/clang/test/OpenMP/target_teams_default_messages.cpp
@@ -2,20 +2,29 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
 #pragma omp target teams default // expected-error {{expected '(' after 'default'}}
   foo();
-#pragma omp target teams default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target teams default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   foo();
-#pragma omp target teams default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
 #pragma omp target teams default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
   foo();
 #pragma omp target teams default (shared), default(shared) // expected-error {{directive '#pragma omp target teams' cannot contain more than one 'default' clause}}
   foo();
-#pragma omp target teams default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
 
 #pragma omp target teams default(none) // expected-note {{explicit data sharing attribute requested here}}
@@ -24,5 +33,14 @@ int main(int argc, char **argv) {
 #pragma omp target teams default(none) // expected-note {{explicit data sharing attribute requested here}}
 #pragma omp parallel default(shared)
   ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
+
+#ifndef OMP51
+#pragma omp target teams default(firstprivate) // expected-error {{data-sharing attribute 'firstprivate' in 'default' clause requires OpenMP version 5.1 or above}}
+  {
+    ++x;
+    ++y;
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/target_teams_depend_codegen.cpp b/clang/test/OpenMP/target_teams_depend_codegen.cpp
index 9a58e40de7505..1327b5fc827b7 100644
--- a/clang/test/OpenMP/target_teams_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 0, i32 0)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_teams_distribute_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_codegen.cpp
index 547e45f6d3e7e..3271a0bab8066 100644
--- a/clang/test/OpenMP/target_teams_distribute_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_codegen.cpp
@@ -103,7 +103,7 @@ int foo(int n) {
   double cn[5][n];
   TT d;
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}})
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** null, i32 {{[^,]+}}, i32 {{[^)]+}})
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -140,7 +140,7 @@ int foo(int n) {
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i8** null, i32 0, i32 0)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -164,7 +164,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i32 0, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -220,7 +220,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i32 0, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [10 x i64], [10 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -569,7 +569,7 @@ int bar(int n){
 // CHECK-32:    [[CSSZSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSSZSIZE]] to i64
 
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
@@ -644,7 +644,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT6]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -701,7 +701,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET7]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET7]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
diff --git a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
index 62fc58980a7c6..39d843c5a9806 100644
--- a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute collapse(2)
     for(int i = 0; i < X; i++) {
@@ -104,7 +104,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -120,7 +120,7 @@ int main (int argc, char **argv) {
 // CK2: call void @__kmpc_for_static_fini(
 // CK2: ret void
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_default_messages.cpp b/clang/test/OpenMP/target_teams_distribute_default_messages.cpp
index fd834e7cba32c..a490ad61385ff 100644
--- a/clang/test/OpenMP/target_teams_distribute_default_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_default_messages.cpp
@@ -2,24 +2,41 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -DOMP51 %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=51 -DOMP51 %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   #pragma omp target teams distribute default // expected-error {{expected '(' after 'default'}}
   for (int i=0; i<200; i++) foo();
-  #pragma omp target teams distribute default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target teams distribute default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
-  #pragma omp target teams distribute default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams distribute default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target teams distribute default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target teams distribute default (shared), default(shared) // expected-error {{directive '#pragma omp target teams distribute' cannot contain more than one 'default' clause}}
   for (int i=0; i<200; i++) foo();
-  #pragma omp target teams distribute default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams distribute default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
   #pragma omp target teams distribute default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (int i=0; i<200; i++) ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
 
+#ifndef OMP51
+#pragma omp target teams distribute default(firstprivate) // expected-error {{data-sharing attribute 'firstprivate' in 'default' clause requires OpenMP version 5.1 or above}}
+  for (int i = 0; i < 200; i++) {
+    ++x;
+    ++y;
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/target_teams_distribute_depend_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_depend_codegen.cpp
index ba846605d54e8..de0d81aa69dfc 100644
--- a/clang/test/OpenMP/target_teams_distribute_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 0, i32 0)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
index 3eb153173006a..1c8346655d6d8 100644
--- a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
@@ -26,19 +26,19 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target teams distribute dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target teams distribute dist_schedule(static, X/2)
     for(int i = 0; i < X; i++) {
@@ -136,11 +136,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -173,11 +173,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
index c268b72a25edd..3609f547bac1e 100644
--- a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
@@ -85,7 +85,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // LAMBDA:  ret
 #pragma omp target teams distribute firstprivate(g, g1, sivar)
@@ -164,7 +164,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -258,7 +258,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[TOFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
index 459384f464fdc..71f4a01344816 100644
--- a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
@@ -68,7 +68,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -203,7 +203,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -303,7 +303,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
index 14e734f2b30c1..19d095f4d0a3d 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
@@ -61,7 +61,7 @@ int target_teams_fun(int *g){
   // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
   // HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
-  // HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}},
+  // HCK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}},
 
   // HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]])
   #pragma omp target teams distribute parallel for num_teams(te), thread_limit(th)
@@ -70,7 +70,7 @@ int target_teams_fun(int *g){
     #pragma omp cancel for
   }
 
-  // HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0),
+  // HCK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null,
   // HCK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
   {{{
   #pragma omp target teams distribute parallel for is_device_ptr(g)
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
index 80e38925b39ec..dd47fa349eb48 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute parallel for collapse(2)
     for(int i = 0; i < X; i++) {
@@ -106,7 +106,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -130,7 +130,7 @@ int main (int argc, char **argv) {
 
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp
index 00e0704a6ccac..2fe7931369618 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp
@@ -2,24 +2,41 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
 #pragma omp target teams distribute parallel for default // expected-error {{expected '(' after 'default'}}
   for (int i=0; i<200; i++) foo();
- #pragma omp target teams distribute parallel for default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target teams distribute parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
-#pragma omp target teams distribute parallel for default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams distribute parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 #pragma omp target teams distribute parallel for default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
 #pragma omp target teams distribute parallel for default (shared), default(shared) // expected-error {{directive '#pragma omp target teams distribute parallel for' cannot contain more than one 'default' clause}}
   for (int i=0; i<200; i++) foo();
-#pragma omp target teams distribute parallel for default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams distribute parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
 #pragma omp target teams distribute parallel for default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (int i=0; i<200; i++) ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
 
+#ifndef OMP51
+#pragma omp target teams distribute parallel for default(firstprivate) // expected-error {{data-sharing attribute 'firstprivate' in 'default' clause requires OpenMP version 5.1 or above}}
+  for (int i = 0; i < 200; i++) {
+    ++x;
+    ++y;
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_depend_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_depend_codegen.cpp
index af6637f2e0dcc..02fbb9cc2ec3a 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 0, i32 0)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
index 28619f883f9dd..d1c473b4b1926 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
@@ -26,19 +26,19 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute parallel for
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target teams distribute parallel for dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target teams distribute parallel for dist_schedule(static, X/2)
     for(int i = 0; i < X; i++) {
@@ -157,11 +157,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -212,11 +212,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
index 9c91538ce7e0f..f2c066eb34a25 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
@@ -109,7 +109,7 @@ int main() {
   // HLAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // HLAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // HLAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // HLAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // HLAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // HLAMBDA:  ret
 #pragma omp target teams distribute parallel for firstprivate(g, g1, sivar)
@@ -213,7 +213,7 @@ int main() {
 }
 
 // HCHECK: define {{.*}}i{{[0-9]+}} @main()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, 
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5,
 // HCHECK: call void @[[OFFL1:.+]](
 // HCHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // HCHECK:  ret
@@ -374,7 +374,7 @@ int main() {
 // CHECK: ret void
 
 // HCHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // HCHECK: call void @[[TOFFL1:.+]](
 // HCHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
index 8f1e88f980cdb..94b37b5d94fb2 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
@@ -23,10 +23,10 @@ int Arg;
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #pragma omp target teams distribute parallel for
   for(int i = 0 ; i < 100; i++) {}
@@ -81,12 +81,12 @@ int tmain(T Arg) {
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
 // CHECK-NOT: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
 #pragma omp target teams distribute parallel for if (true)
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
index c0f9ecaea20fb..71b42e785d32b 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
@@ -68,7 +68,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -219,7 +219,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](
 // CHECK: ret
 
@@ -344,7 +344,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
index 201c19e2ed5b7..73c6f16fa3e32 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
@@ -15,7 +15,7 @@
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: %0 = call i32 @__tgt_target_teams(i64 -1, i8* @{{.+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 0, i32 0)
+// CHECK: %0 = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{.+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK: call void [[TARGET_OUTLINE:@.+]]()
 // CHECK: ret void
 #pragma omp target teams distribute parallel for order(concurrent)
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
index 87a1499689304..1bfd8953298ad 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
@@ -108,7 +108,7 @@ int main() {
   // HLAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // HLAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // HLAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+    // HLAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
     // HLAMBDA: call void @[[LOFFL1:.+]](
     // HLAMBDA:  ret
 #pragma omp target teams distribute parallel for private(g, g1, sivar)
@@ -199,7 +199,7 @@ int main() {
 }
 
 // HCHECK: define {{.*}}i{{[0-9]+}} @main()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, {{.+}} null, {{.+}} null, i32 0, i32 0) 
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, {{.+}} null, {{.+}} null, i8** null, i32 0, i32 0)
 // HCHECK: call void @[[OFFL1:.+]]()
 // HCHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // HCHECK:  ret
@@ -277,7 +277,7 @@ int main() {
 // CHECK: ret void
 
 // HCHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // HCHECK: call void @[[TOFFL1:.+]]()
 // HCHECK: ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
index a7242c9112451..11cfe8ddf151a 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
@@ -44,9 +44,9 @@ int main() {
   return tmain();
 }
 
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL1:@.+]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL2:@.+]]()
 // CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
 // CHECK: ret i32 [[CALL_RET]]
@@ -76,7 +76,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.+}} [[TMAIN]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL3:@.+]]()
 
 // CHECK: define{{.+}} [[OFFL3]]()
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
index 6575659637a52..055e0d35d0789 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
@@ -45,7 +45,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target teams distribute parallel for reduction(+: sivar)
@@ -169,7 +169,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}}* @{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -261,7 +261,7 @@ int main() {
 // CHECK: br
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
index 3ec1782f2c09d..979993f841da9 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
@@ -34,33 +34,33 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute parallel for
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target teams distribute parallel for schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target teams distribute parallel for schedule(static, X/2)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL4:.+]](
     #pragma omp target teams distribute parallel for schedule(dynamic)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL5:.+]](
     #pragma omp target teams distribute parallel for schedule(dynamic, X/2)
     for(int i = 0; i < X; i++) {
@@ -234,15 +234,15 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL5:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -326,15 +326,15 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT5:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
index 2fc166ed0b873..d2031d6d214b1 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -61,7 +61,7 @@ int target_teams_fun(int *g){
 // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
 // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 // HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
-// HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}},
+// HCK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}},
 
 // HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[I_PAR]], i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]])
   int i;
@@ -70,7 +70,7 @@ int target_teams_fun(int *g){
     a[i] = 0;
   }
 
-  // HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0),
+  // HCK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null,
   // HCK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
   {{{
   #pragma omp target teams distribute parallel for simd is_device_ptr(g) simdlen(8)
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
index 3aca29165cd89..72db05c031ec8 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute parallel for simd collapse(2)
     for(int i = 0; i < X; i++) {
@@ -106,7 +106,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -130,7 +130,7 @@ int main (int argc, char **argv) {
 
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp
index 7c46c964d2ec3..e5ff856222501 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp
@@ -2,16 +2,25 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
 #pragma omp target teams distribute parallel for simd default // expected-error {{expected '(' after 'default'}}
   for (int i=0; i<200; i++) foo();
 
-#pragma omp target teams distribute parallel for simd default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target teams distribute parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
 
-#pragma omp target teams distribute parallel for simd default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams distribute parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
 #pragma omp target teams distribute parallel for simd default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -20,11 +29,19 @@ int main(int argc, char **argv) {
 #pragma omp target teams distribute parallel for simd default (shared), default(shared) // expected-error {{directive '#pragma omp target teams distribute parallel for simd' cannot contain more than one 'default' clause}}
   for (int i=0; i<200; i++) foo();
 
-#pragma omp target teams distribute parallel for simd default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
 #pragma omp target teams distribute parallel for simd default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (int i=0; i<200; i++) ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
 
+#ifndef OMP51
+#pragma omp target teams distribute parallel for simd default(firstprivate) // expected-error {{data-sharing attribute 'firstprivate' in 'default' clause requires OpenMP version 5.1 or above}}
+  for (int i = 0; i < argc; ++i) {
+    ++x;
+    ++y;
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_codegen.cpp
index 1d223e675e8ed..47de671de4aec 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 0, i32 0)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
index 281a8d7425343..8cad1f98ff089 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
@@ -26,19 +26,19 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute parallel for simd
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target teams distribute parallel for simd dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target teams distribute parallel for simd dist_schedule(static, X/2)
     for(int i = 0; i < X; i++) {
@@ -157,11 +157,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -212,11 +212,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
index 978e90dec5432..6d5aac81e04cc 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -109,7 +109,7 @@ int main() {
   // HLAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // HLAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // HLAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // HLAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // HLAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // HLAMBDA:  ret
 #pragma omp target teams distribute parallel for simd firstprivate(g, g1, sivar)
@@ -213,7 +213,7 @@ int main() {
 }
 
 // HCHECK: define {{.*}}i{{[0-9]+}} @main()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, 
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5,
 // HCHECK: call void @[[OFFL1:.+]](
 // HCHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // HCHECK:  ret
@@ -374,7 +374,7 @@ int main() {
 // CHECK: ret void
 
 // HCHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // HCHECK: call void @[[TOFFL1:.+]](
 // HCHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
index dda468e604eba..50de0a5e1dff3 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -29,10 +29,10 @@ int Arg;
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #ifdef OMP5
 #pragma omp target teams distribute parallel for simd if(simd: true) nontemporal(Arg)
@@ -96,12 +96,12 @@ int tmain(T Arg) {
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
 // CHECK-NOT: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
 #pragma omp target teams distribute parallel for simd if (true)
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
index ca09990fbaf62..226827335060a 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -68,7 +68,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -221,7 +221,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](
 // CHECK: ret
 
@@ -346,7 +346,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
index 88169197d1c61..72a5d8d76c844 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
@@ -108,7 +108,7 @@ int main() {
   // HLAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // HLAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // HLAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+    // HLAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
     // HLAMBDA: call void @[[LOFFL1:.+]](
     // HLAMBDA:  ret
 #pragma omp target teams distribute parallel for simd private(g, g1, sivar)
@@ -199,7 +199,7 @@ int main() {
 }
 
 // HCHECK: define {{.*}}i{{[0-9]+}} @main()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, {{.+}} null, {{.+}} null, i32 0, i32 0)
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, {{.+}} null, {{.+}} null, i8** null, i32 0, i32 0)
 // HCHECK: call void @[[OFFL1:.+]]()
 // HCHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // HCHECK:  ret
@@ -277,7 +277,7 @@ int main() {
 // CHECK: ret void
 
 // HCHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // HCHECK: call void @[[TOFFL1:.+]]()
 // HCHECK: ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
index 9d10c2e3dc7c4..f3f4a7bc9f8ab 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -44,9 +44,9 @@ int main() {
   return tmain();
 }
 
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL1:@.+]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL2:@.+]]()
 // CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
 // CHECK: ret i32 [[CALL_RET]]
@@ -76,7 +76,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.+}} [[TMAIN]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL3:@.+]]()
 
 // CHECK: define{{.+}} [[OFFL3]]()
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
index e938a7746313d..bfe26c6d7f0e7 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
@@ -45,7 +45,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target teams distribute parallel for simd reduction(+: sivar)
@@ -169,7 +169,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}}* @{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -261,7 +261,7 @@ int main() {
 // CHECK: br
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
index 9852a75187481..a1525c5a44a0f 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
@@ -34,33 +34,33 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute parallel for simd
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target teams distribute parallel for simd schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target teams distribute parallel for simd schedule(static, X/2)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL4:.+]](
     #pragma omp target teams distribute parallel for simd schedule(dynamic)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL5:.+]](
     #pragma omp target teams distribute parallel for simd schedule(dynamic, X/2)
     for(int i = 0; i < X; i++) {
@@ -234,15 +234,15 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL5:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -326,15 +326,15 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT5:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp
index 30bcdcfa68c5f..6e7d6a18ff69b 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp
index f352b2e1bc51e..15b7e7f6c93dc 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp
index f52d3b040f04b..e1cd6c40afa4d 100644
--- a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp
@@ -84,7 +84,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]]()
     // LAMBDA:  ret
 #pragma omp target teams distribute private(g, g1, sivar)
@@ -151,7 +151,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]]()
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -196,7 +196,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // CHECK: call void @[[TOFFL1:.+]]()
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
index 704bf00120168..ef01ee91b3b94 100644
--- a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
@@ -46,7 +46,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target teams distribute reduction(+: sivar)
@@ -124,7 +124,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i32* {{.+}})
 // CHECK: [[RES:%.+]] = call{{.*}} i32 @[[TMAIN_INT:[^(]+]]()
 // CHECK: ret i32 [[RES]]
@@ -174,7 +174,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}}* {{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
index 4912352e17ca2..fd070a548fdfd 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
@@ -132,7 +132,7 @@ int foo(int n) {
   double cn[5][n];
   TT d;
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}})
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** null, i32 {{[^,]+}}, i32 {{[^)]+}})
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -173,7 +173,7 @@ int foo(int n) {
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 0, i32 1)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i8** null, i32 0, i32 1)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -197,7 +197,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 0, i32 1)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i32 0, i32 1)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -252,7 +252,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 1)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i32 0, i32 1)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -595,7 +595,7 @@ int bar(int n){
 // CHECK-32:    [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
 
-// OMP45-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 1)
+// OMP45-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 0, i32 1)
 // OMP45-DAG:   [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
 // OMP45-DAG:   [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
 // OMP45-DAG:   [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
@@ -617,7 +617,7 @@ int bar(int n){
 // OMP45-DAG:   [[SADDR5:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX5:[0-9]+]]
 // OMP45-DAG:   [[BPADDR5:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX5]]
 // OMP45-DAG:   [[PADDR5:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX5]]
-// OMP50-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 7, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 1)
+// OMP50-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 7, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 0, i32 1)
 // OMP50-DAG:   [[BPR]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP:%.+]], i32 0, i32 0
 // OMP50-DAG:   [[PR]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P:%.+]], i32 0, i32 0
 // OMP50-DAG:   [[SR]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S:%.+]], i32 0, i32 0
@@ -702,7 +702,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT6]], i32 0, i32 0), i32 0, i32 1)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -759,7 +759,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET7]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 1)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET7]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
index 8a80774ef187d..95e0fcdf64db2 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute simd collapse(2)
     for(int i = 0; i < X; i++) {
@@ -104,7 +104,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -120,7 +120,7 @@ int main (int argc, char **argv) {
 // CK2: call void @__kmpc_for_static_fini(
 // CK2: ret void
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp
index 4e8ff2f70234a..d7c1f15c88983 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp
@@ -156,8 +156,8 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // OMP45:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
-  // OMP50:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{28|128|76}}, i[[SZ]] {{16|12|24}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // OMP45:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // OMP50:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{28|152|88}}, i[[SZ]] {{16|12|24}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -221,7 +221,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 0, i32 1)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 0, i32 1)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -241,8 +241,8 @@ int foo(int n) {
 // OMP50-32:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 3
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// OMP45:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 1)
-// OMP50:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 1)
+// OMP45:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 0, i32 1)
+// OMP50:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 0, i32 1)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
index 874d4fc92c667..c845d927c4229 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
@@ -26,19 +26,19 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute simd
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target teams distribute simd dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target teams distribute simd dist_schedule(static, X/2)
     for(int i = 0; i < X; i++) {
@@ -136,11 +136,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -173,11 +173,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
index 05ea0e897a4b5..4d3a87a7d18f5 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
@@ -85,7 +85,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
     // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // LAMBDA:  ret
 #pragma omp target teams distribute simd firstprivate(g, g1, sivar)
@@ -164,7 +164,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: call void @[[OFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -258,7 +258,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: call void @[[TOFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
index 6fae7829874f9..55d969186fd11 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
@@ -68,7 +68,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -205,7 +205,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -306,7 +306,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp
index 402c83921d8f6..82449397d553f 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp
@@ -84,7 +84,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 1)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 1)
     // LAMBDA: call void @[[LOFFL1:.+]]()
     // LAMBDA:  ret
 #pragma omp target teams distribute simd private(g, g1, sivar)
@@ -151,7 +151,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 1)
 // CHECK: call void @[[OFFL1:.+]]()
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -196,7 +196,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // CHECK: call void @[[TOFFL1:.+]]()
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
index a850238af5fe5..ec0142162e835 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
@@ -46,7 +46,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target teams distribute simd reduction(+: sivar)
@@ -124,7 +124,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: call void @[[OFFL1:.+]](i32* {{.+}})
 // CHECK: [[RES:%.+]] = call{{.*}} i32 @[[TMAIN_INT:[^(]+]]()
 // CHECK: ret i32 [[RES]]
@@ -174,7 +174,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}}* {{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp
index 6091ae2716b1d..5ece55242833f 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 1)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp
index 376d51ab00676..a0fff790ee8cc 100644
--- a/clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_teams_map_codegen.cpp b/clang/test/OpenMP/target_teams_map_codegen.cpp
index 00d2839781075..328e45a652a91 100644
--- a/clang/test/OpenMP/target_teams_map_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_map_codegen.cpp
@@ -20,15 +20,16 @@
 #ifndef HEADER
 #define HEADER
 
+// HOST: @[[MAPTYPES_PRIVATE:.offload_maptypes[0-9.]*]] = private {{.*}}constant [2 x i64] [i64 35, i64 35]
 // HOST: @[[MAPTYPES_FIRSTPRIVATE:.offload_maptypes[0-9.]*]] = private {{.*}}constant [2 x i64] [i64 35, i64 35]
 // HOST: @[[MAPTYPES_REDUCTION:.offload_maptypes[0-9.]*]] = private {{.*}}constant [2 x i64] [i64 35, i64 35]
 // HOST: @[[MAPTYPES_FROM:.offload_maptypes[0-9.]*]] = private {{.*}}constant [1 x i64] [i64 34]
 // HOST: @[[MAPTYPES_TO:.offload_maptypes[0-9.]*]] = private {{.*}}constant [1 x i64] [i64 33]
 // HOST: @[[MAPTYPES_ALLOC:.offload_maptypes[0-9.]*]] = private {{.*}}constant [1 x i64] [i64 32]
-// HOST: @[[MAPTYPES_ARRAY_R0:.offload_maptypes[0-9.]*]] = private {{.*}}constant [2 x i64] [i64 35, i64 35]
-// HOST: @[[MAPTYPES_ARRAY_R1:.offload_maptypes[0-9.]*]] = private {{.*}}constant [2 x i64] [i64 33, i64 33]
-// HOST-INT128: @[[MAPTYPES_INT128_R0:.offload_maptypes[0-9.]*]] = private {{.*}}constant [2 x i64] [i64 35, i64 35]
-// HOST-INT128: @[[MAPTYPES_INT128_R1:.offload_maptypes[0-9.]*]] = private {{.*}}constant [2 x i64] [i64 34, i64 34]
+// HOST: @[[MAPTYPES_ARRAY_R0:.offload_maptypes[0-9.]*]] = private {{.*}}constant [3 x i64] [i64 35, i64 35, i64 35]
+// HOST: @[[MAPTYPES_ARRAY_R1:.offload_maptypes[0-9.]*]] = private {{.*}}constant [3 x i64] [i64 33, i64 33, i64 33]
+// HOST-INT128: @[[MAPTYPES_INT128_R0:.offload_maptypes[0-9.]*]] = private {{.*}}constant [3 x i64] [i64 35, i64 35, i64 35]
+// HOST-INT128: @[[MAPTYPES_INT128_R1:.offload_maptypes[0-9.]*]] = private {{.*}}constant [3 x i64] [i64 34, i64 34, i64 34]
 //
 // CHECK: @.omp_offloading.entry_name{{[0-9.]*}} = {{.*}} c"[[OFFLOAD_PRIVATE:__omp_offloading_[^"\\]*mapWithPrivate[^"\\]*]]\00"
 // CHECK: @.omp_offloading.entry_name{{[0-9.]*}} = {{.*}} c"[[OFFLOAD_FIRSTPRIVATE:__omp_offloading_[^"\\]*mapWithFirstprivate[^"\\]*]]\00"
@@ -42,9 +43,7 @@
 // INT128: @.omp_offloading.entry_name{{[0-9.]*}} = {{.*}} c"[[OFFLOAD_INT128_R1:__omp_offloading_[^"\\]*mapInt128[^"\\]*]]\00"
 
 // HOST: define {{.*}}mapWithPrivate
-// HOST: call {{.*}} @.[[OFFLOAD_PRIVATE]].region_id
-// HOST-NOT: offload_maptypes
-// HOST-SAME: {{$}}
+// HOST: call {{.*}} @.[[OFFLOAD_PRIVATE]].region_id{{.*}} @[[MAPTYPES_PRIVATE]]
 //
 // CHECK: define {{.*}} void @[[OFFLOAD_PRIVATE]]()
 // CHECK: call void ({{.*}}@[[OUTLINE_PRIVATE:.omp_outlined.[.0-9]*]]
diff --git a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp
index 93b28f8c43645..000160f9c0a35 100644
--- a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp
@@ -153,7 +153,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i32 [[TEAMS]], i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i8** null, i32 [[TEAMS]], i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -164,7 +164,7 @@ int bar(int n){
 //
 //
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.+}}, i32 1024, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, {{.+}}, i8** null, i32 1024, i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -192,7 +192,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 [[TEAMS]], i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i8** null, i32 [[TEAMS]], i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -213,7 +213,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 [[TEAMS]], i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i8** null, i32 [[TEAMS]], i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -231,7 +231,7 @@ int bar(int n){
 //
 // CHECK: define {{.*}}[[FTEMPLATE]]
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 20, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i8** null, i32 20, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -253,7 +253,7 @@ int bar(int n){
 // CHECK:       [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align
 // CHECK:       [[TEAMS:%.+]] = sext i16 [[T]] to i32
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i8** null, i32 [[TEAMS]], i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
diff --git a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp
index 2432d6b3ad6e6..d546ad747455c 100644
--- a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp
@@ -153,7 +153,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[TL:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i32 0, i32 [[TL]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i8** null, i32 0, i32 [[TL]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -164,7 +164,7 @@ int bar(int n){
 //
 //
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.+}}, i32 0, i32 1024)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, {{.+}}, i8** null, i32 0, i32 1024)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -201,7 +201,7 @@ int bar(int n){
 // CHECK:       [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR1]], align
 // CHECK:       [[TL:%.+]] = load i32, i32* [[CAPE_ADDR2]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 [[TEAMS]], i32 [[TL]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i8** null, i32 [[TEAMS]], i32 [[TL]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -222,7 +222,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[TL:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 0, i32 [[TL]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i8** null, i32 0, i32 [[TL]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -240,7 +240,7 @@ int bar(int n){
 //
 // CHECK: define {{.*}}[[FTEMPLATE]]
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 0, i32 20)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i8** null, i32 0, i32 20)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -262,7 +262,7 @@ int bar(int n){
 // CHECK:       [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align
 // CHECK:       [[TEAMS:%.+]] = sext i16 [[T]] to i32
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 1024)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i8** null, i32 [[TEAMS]], i32 1024)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
diff --git a/clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp
index 829a99bd8eb7f..0473e5aceb4a9 100644
--- a/clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_update_codegen.cpp b/clang/test/OpenMP/target_update_codegen.cpp
index fd5a62a8067c7..5d569f18ce6b6 100644
--- a/clang/test/OpenMP/target_update_codegen.cpp
+++ b/clang/test/OpenMP/target_update_codegen.cpp
@@ -46,7 +46,7 @@ void foo(int arg) {
   float lb[arg];
 
   // Region 00
-  // CK1-DAG: call void @__tgt_target_data_update_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_update_nowait_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
   // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
@@ -71,7 +71,7 @@ void foo(int arg) {
   // Region 02
   // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CK1: [[IFTHEN]]
-  // CK1-DAG: call void @__tgt_target_data_update(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_update_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -94,7 +94,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 03
-  // CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -116,7 +116,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 04
-  // CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -185,7 +185,7 @@ int bar(int arg){
 // Region 00
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_update(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK2-DAG: call void @__tgt_target_data_update_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
@@ -316,7 +316,7 @@ void device_side_scan(int arg) {
 // CK5-LABEL: lvalue
 void lvalue(int *B, int l, int e) {
 
-  // CK5-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK5-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK5-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK5-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -358,7 +358,7 @@ void lvalue(int *B, int l, int e) {
 // CK6-LABEL: lvalue
 void lvalue(int *B, int l, int e) {
 
-  // CK6-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK6-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK6-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK6-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -403,7 +403,7 @@ void lvalue(int *B, int l, int e) {
 // CK7-LABEL: lvalue
 void lvalue(int *B, int l, int e) {
 
-  // CK7-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK7-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK7-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK7-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -454,7 +454,7 @@ void lvalue(int *B, int l, int e) {
 // CK8-LABEL: lvalue
 void lvalue(int **B, int l, int e) {
 
-  // CK8-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}], [2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK8-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}], [2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK8-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK8-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -509,7 +509,7 @@ struct S {
 // CK9-LABEL: lvalue
 void lvalue(struct S *s, int l, int e) {
 
-  // CK9-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK9-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK9-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK9-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK9-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]]
@@ -559,7 +559,7 @@ struct S {
 // CK10-LABEL: lvalue
 void lvalue(struct S *s, int l, int e) {
 
-  // CK10-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK10-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK10-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK10-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK10-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]]
@@ -609,7 +609,7 @@ struct S {
 // CK11-LABEL: lvalue
 void lvalue(struct S *s, int l, int e) {
 
-  // CK11-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK11-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK11-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK11-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK11-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]]
@@ -661,7 +661,7 @@ struct S {
 // CK12-LABEL: lvalue
 void lvalue(struct S *s, int l, int e) {
 
-  // CK12-DAG: call void @__tgt_target_data_update(i64 -1, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}, [3 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK12-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}, [3 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK12-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK12-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK12-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]]
@@ -719,7 +719,7 @@ void lvalue(struct S *s, int l, int e) {
 // CK13-LABEL: lvalue
 void lvalue(int **BB, int a, int b) {
 
-  // CK13-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK13-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK13-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK13-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -778,7 +778,7 @@ struct SSB {
   // CK14-LABEL: define {{.+}}foo
   void foo() {
 
-    // CK14-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK14-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
     // CK14-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK14-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
     // CK14-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]]
@@ -845,7 +845,7 @@ struct SSA {
 //CK-15-LABEL: lvalue_member
 void lvalue_member(SSA *sap) {
 
-  // CK15-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK15-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK15-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK15-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK15-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]]
@@ -905,7 +905,7 @@ void lvalue_member(SSA *sap) {
 //CK16-LABEL: lvalue_find_base
 void lvalue_find_base(float *f, int *i) {
 
-  // CK16-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK16-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK16-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK16-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -955,7 +955,7 @@ struct SSA {
 //CK17-LABEL: lvalue_find_base
 void lvalue_find_base(float **f, SSA *sa) {
 
-  // CK17-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK17-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK17-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK17-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1008,7 +1008,7 @@ void lvalue_find_base(float **f, SSA *sa) {
 //CK18-LABEL: array_shaping
 void array_shaping(float *f, int sa) {
 
-  // CK18-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_TO]]{{.+}})
+  // CK18-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_TO]]{{.+}}, i8** null)
   // CK18-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK18-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK18-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1033,7 +1033,7 @@ void array_shaping(float *f, int sa) {
   // CK18-32-DAG: [[SZ2]] = mul nuw i32 12, %{{.+}}
   #pragma omp target update to(([3][sa][4])f)
   sa = 1;
-  // CK18-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_FROM]]{{.+}})
+  // CK18-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_FROM]]{{.+}}, i8** null)
   // CK18-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK18-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK18-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
diff --git a/clang/test/OpenMP/target_update_depend_codegen.cpp b/clang/test/OpenMP/target_update_depend_codegen.cpp
index 5c61e058cf6db..7dec72c9b4d43 100644
--- a/clang/test/OpenMP/target_update_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_update_depend_codegen.cpp
@@ -64,7 +64,7 @@ void foo(int arg) {
   // CK1: store i32 [[DEVICE]], i32* [[CAP_DEVICE]],
   // CK1: [[DEV1:%.+]] = load i32, i32* %{{.+}}
   // CK1: [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CK1: [[BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 0
   // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
@@ -130,7 +130,7 @@ void foo(int arg) {
   // CK1: [[IF_BOOL:%.+]] = trunc i8 [[IF]] to i1
   // CK1: [[IF:%.+]] = zext i1 [[IF_BOOL]] to i8
   // CK1: store i8 [[IF]], i8* [[IF_DEVICE]],
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
@@ -213,7 +213,7 @@ void foo(int arg) {
   // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
   // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
   // CK1: [[GEPS0:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[S]], i32 0, i32 0
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
@@ -298,7 +298,7 @@ void foo(int arg) {
   // CK1: store double* %{{.+}}, double** [[P1_BC]],
   // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
   // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|52}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{104|60}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
@@ -373,55 +373,61 @@ void foo(int arg) {
 }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, %struct.kmp_task_t_with_privates* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_update_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_update_nowait_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_update(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_update_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY3]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
-
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1-NOT: __tgt_target_data_end
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY4]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
-
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [2 x i8*]*, [2 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [2 x i8*]*, [2 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [2 x i64]*, [2 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [2 x i8*]*, [2 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i64]** [[S_PRIV]], [2 x i8*]** [[M_PRIV]])
 // CK1-NOT: __tgt_target_data_end
 // CK1: ret i32 0
 // CK1: }
diff --git a/clang/test/OpenMP/target_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_uses_allocators_codegen.cpp
index 213e7c9a8778f..5e645aab67c19 100644
--- a/clang/test/OpenMP/target_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0))
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/task_default_messages.cpp b/clang/test/OpenMP/task_default_messages.cpp
index 4826c253aa043..8b6809ee05d56 100644
--- a/clang/test/OpenMP/task_default_messages.cpp
+++ b/clang/test/OpenMP/task_default_messages.cpp
@@ -2,15 +2,24 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
 #pragma omp task default                          // expected-error {{expected '(' after 'default'}}
-#pragma omp task default(                         // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
-#pragma omp task default()                        // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp task default(                         // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp task default()                        // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
 #pragma omp task default(none                     // expected-error {{expected ')'}} expected-note {{to match this '('}}
 #pragma omp task default(shared), default(shared) // expected-error {{directive '#pragma omp task' cannot contain more than one 'default' clause}}
-#pragma omp task default(x)                       // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp task default(x)                       // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
 
 #pragma omp task default(none) // expected-note {{explicit data sharing attribute requested here}}
@@ -19,5 +28,13 @@ int main(int argc, char **argv) {
 #pragma omp task default(none) // expected-note {{explicit data sharing attribute requested here}}
 #pragma omp task default(shared)
   ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
+
+#ifdef OMP51
+#pragma omp task default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
   return 0;
 }
diff --git a/clang/test/OpenMP/task_messages.cpp b/clang/test/OpenMP/task_messages.cpp
index 8b3183e0bd93e..13cbfb6c45693 100644
--- a/clang/test/OpenMP/task_messages.cpp
+++ b/clang/test/OpenMP/task_messages.cpp
@@ -4,6 +4,9 @@
 // RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-version=45 -fopenmp-simd -ferror-limit 200 -std=c++11 -o - %s -Wuninitialized
 // RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-version=50 -fopenmp-simd -ferror-limit 200 -std=c++11 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-version=51 -DOMP51 -fopenmp -ferror-limit 100 -std=c++11 -o - %s -Wuninitialized
+// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-version=51 -DOMP51 -fopenmp-simd -ferror-limit 100 -std=c++11 -o - %s -Wuninitialized
+
 void xxx(int argc) {
   int x; // expected-note {{initialize the variable 'x' to silence this warning}}
 #pragma omp task
@@ -16,6 +19,10 @@ void foo() {
 }
 
 typedef unsigned long omp_event_handle_t;
+namespace {
+static int y = 0;
+}
+static int x = 0;
 
 #pragma omp task // expected-error {{unexpected OpenMP directive '#pragma omp task'}}
 
@@ -52,6 +59,15 @@ int foo() {
 #pragma omp task default(none) // expected-note 2 {{explicit data sharing attribute requested here}}
 #pragma omp task default(shared)
   ++a; // expected-error 2 {{variable 'a' must have explicitly specified data sharing attributes}}
+#ifdef OMP51
+#pragma omp task default(firstprivate) // expected-note 4 {{explicit data sharing attribute requested here}}
+#pragma omp task
+  {
+    ++x; // expected-error 2 {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error 2 {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
 #pragma omp task default(none) // expected-note 2 {{explicit data sharing attribute requested here}}
 #pragma omp task
   // expected-error@+1 {{calling a private constructor of class 'S'}}
diff --git a/clang/test/OpenMP/taskgroup_codegen.cpp b/clang/test/OpenMP/taskgroup_codegen.cpp
index f672ab17fd59a..31ecb80b1a20c 100644
--- a/clang/test/OpenMP/taskgroup_codegen.cpp
+++ b/clang/test/OpenMP/taskgroup_codegen.cpp
@@ -16,7 +16,7 @@
 
 // CHECK:       define {{.*}}void [[FOO:@.+]]()
 
-void foo() {}
+void foo() { extern void mayThrow(); mayThrow(); }
 
 // CHECK-LABEL: @main
 // TERM_DEBUG-LABEL: @main
diff --git a/clang/test/OpenMP/teams_codegen.cpp b/clang/test/OpenMP/teams_codegen.cpp
index 54e0f6ea29eb4..7fed40226739d 100644
--- a/clang/test/OpenMP/teams_codegen.cpp
+++ b/clang/test/OpenMP/teams_codegen.cpp
@@ -29,7 +29,7 @@ int teams_argument_global_local(int a){
   int la = 23;
   float lc = 25.0;
 
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
   #pragma omp target
   #pragma omp teams
@@ -37,7 +37,7 @@ int teams_argument_global_local(int a){
     ++comp;
   }
 
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
   #pragma omp target
   {{{
@@ -47,7 +47,7 @@ int teams_argument_global_local(int a){
     }
   }}}
 
-  // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
+  // CK1-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 [[NT:%[^,]+]], i32 0)
   // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
 
   // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
@@ -57,7 +57,7 @@ int teams_argument_global_local(int a){
     ++comp;
   }
 
-  // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
+  // CK1-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 [[NT:%[^,]+]])
   // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
 
   // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
@@ -67,7 +67,7 @@ int teams_argument_global_local(int a){
     ++comp;
   }
 
-  // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+  // CK1-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
 
   // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
   // CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
@@ -86,7 +86,7 @@ int teams_argument_global_local(int a){
     ++comp;
   }
 
-  // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 {{.+}}, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+  // CK1-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 {{.+}}, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
 
   // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], 1
   // CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
@@ -141,7 +141,7 @@ int teams_template_arg(void) {
   SS la;
   SS lb;
 
-  // CK2-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+  // CK2-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
 
   // CK2-DAG: [[NT]] = load i32, i32* getelementptr inbounds ([[SSI]], [[SSI]]* @Gbla, i32 0, i32 0)
 
@@ -157,7 +157,7 @@ int teams_template_arg(void) {
     ++comp;
   }
 
-  // CK2-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+  // CK2-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
 
   // CK2-DAG: [[TL]] = trunc i64 [[TLD:%[^,]+]] to i32
   // CK2-DAG: [[TLD]] = load i64, i64* getelementptr inbounds ([[SSL]], [[SSL]]* @Gblb, i32 0, i32 0),
@@ -205,7 +205,7 @@ struct SS{
   int foo(void) {
     int comp = 1;
 
-    // CK3-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{[^,]+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123)
+    // CK3-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{[^,]+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 [[NT:%[^,]+]], i32 123)
 
     // CK3-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
     // CK3-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0
@@ -218,7 +218,7 @@ struct SS{
       ++comp;
     }
 
-    // CK3-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{[^,]+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]])
+    // CK3-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{[^,]+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 456, i32 [[TL:%[^,]+]])
 
     // CK3-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 123
     // CK3-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i32
diff --git a/clang/test/OpenMP/teams_default_messages.cpp b/clang/test/OpenMP/teams_default_messages.cpp
index a025050406000..b117ef4948a0f 100644
--- a/clang/test/OpenMP/teams_default_messages.cpp
+++ b/clang/test/OpenMP/teams_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   #pragma omp target
   #pragma omp teams default // expected-error {{expected '(' after 'default'}}
   foo();
   #pragma omp target
-  #pragma omp teams default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp teams default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   foo();
   #pragma omp target
-  #pragma omp teams default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
   #pragma omp target
   #pragma omp teams default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -21,7 +30,7 @@ int main(int argc, char **argv) {
   #pragma omp teams default (shared), default(shared) // expected-error {{directive '#pragma omp teams' cannot contain more than one 'default' clause}}
   foo();
   #pragma omp target
-  #pragma omp teams default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
 
   #pragma omp target
@@ -32,5 +41,14 @@ int main(int argc, char **argv) {
   #pragma omp teams default(none) // expected-note {{explicit data sharing attribute requested here}}
   #pragma omp parallel default(shared)
   ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
+
+#ifdef OMP51
+#pragma omp target
+#pragma omp teams default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
   return 0;
 }
diff --git a/clang/test/OpenMP/teams_distribute_codegen.cpp b/clang/test/OpenMP/teams_distribute_codegen.cpp
index a87a40eed6440..fa12fc09f8144 100644
--- a/clang/test/OpenMP/teams_distribute_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_codegen.cpp
@@ -34,7 +34,7 @@ int teams_argument_global(int n) {
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 
   // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 {{.+}}, i32 {{.+}})
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
   #pragma omp target
@@ -43,7 +43,7 @@ int teams_argument_global(int n) {
     a[i] = 0;
   }
 
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
   #pragma omp target
   {{{
@@ -110,7 +110,7 @@ int teams_local_arg(void) {
   int n = 100;
   int a[n];
 
-  // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK2: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK2: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
   #pragma omp target
   #pragma omp teams distribute
@@ -158,7 +158,7 @@ struct SS{
   // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK3: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}})
     #pragma omp target
     #pragma omp teams distribute
@@ -230,7 +230,7 @@ int main (int argc, char **argv) {
 }
 
 // CK4:  define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CK4:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CK4: call void @[[OFFL1:.+]]({{.+}})
 // CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK4:  ret
@@ -245,7 +245,7 @@ int main (int argc, char **argv) {
 // CK4: ret void
 
 // CK4:  define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
+// CK4:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 {{.+}}, i32 {{.+}})
 // CK4: call void @[[OFFLT:.+]]({{.+}})
 // CK4:  ret
 // CK4-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp
index 716e766929134..b0b5a659305b4 100644
--- a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
     
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute collapse(2)
@@ -107,7 +107,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -123,7 +123,7 @@ int main (int argc, char **argv) {
 // CK2: call void @__kmpc_for_static_fini(
 // CK2: ret void
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_default_messages.cpp b/clang/test/OpenMP/teams_distribute_default_messages.cpp
index 7f000208303b7..1d5fd40c53a6b 100644
--- a/clang/test/OpenMP/teams_distribute_default_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   #pragma omp target
   #pragma omp teams distribute default // expected-error {{expected '(' after 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp teams distribute default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
   #pragma omp teams distribute default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -21,12 +30,21 @@ int main(int argc, char **argv) {
   #pragma omp teams distribute default (shared), default(shared) // expected-error {{directive '#pragma omp teams distribute' cannot contain more than one 'default' clause}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
   #pragma omp target
   #pragma omp teams distribute default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (int i=0; i<200; i++) ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
 
+#ifdef OMP51
+#pragma omp target
+#pragma omp teams distribute default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  for (int i = 0; i < 200; i++) {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
index 97ae3871111da..9e18cbc4cd43f 100644
--- a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
@@ -26,21 +26,21 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target
     #pragma omp teams distribute dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target
     #pragma omp teams distribute dist_schedule(static, X/2)
@@ -145,11 +145,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -182,11 +182,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
index a2e6533ba7a46..e6438a41256ea 100644
--- a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
@@ -86,7 +86,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // LAMBDA:  ret
 #pragma omp target
@@ -167,7 +167,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -261,7 +261,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
index 5d54ba774b77b..db6740fdbb473 100644
--- a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
@@ -69,7 +69,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -192,7 +192,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -291,7 +291,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
index 46dd7db2eac5c..679fd549baf85 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
@@ -33,7 +33,7 @@ int teams_argument_global(int n){
   // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
   // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 {{.+}}, i32 {{.+}})
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
   #pragma omp target
@@ -43,7 +43,7 @@ int teams_argument_global(int n){
     #pragma omp cancel for
   }
 
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
   #pragma omp target
   {{{
@@ -112,7 +112,7 @@ int teams_local_arg(void) {
   int n = 100;
   int a[n];
 
-  // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK2: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK2: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
   #pragma omp target
   #pragma omp teams distribute parallel for
@@ -161,7 +161,7 @@ struct SS{
   // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK3: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}})
     #pragma omp target
     #pragma omp teams distribute parallel for
@@ -234,7 +234,7 @@ int main (int argc, char **argv) {
 }
 
 // CK4:  define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CK4:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CK4: call void @[[OFFL1:.+]]({{.+}})
 // CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK4:  ret
@@ -250,7 +250,7 @@ int main (int argc, char **argv) {
 // CK4: ret void
 
 // CK4:  define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
+// CK4:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 {{.+}}, i32 {{.+}})
 // CK4: call void @[[OFFLT:.+]]({{.+}})
 // CK4:  ret
 // CK4-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
index 11a97868ef725..49a8c8bd08a40 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for collapse(2)
@@ -109,7 +109,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -133,7 +133,7 @@ int main (int argc, char **argv) {
 
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
index 399025d8a4f1f..f96f7aef89231 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
@@ -47,7 +47,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -115,7 +115,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(
+// CHECK: call i32 @__tgt_target_teams_mapper(
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -161,7 +161,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(
+// CHECK: call i32 @__tgt_target_teams_mapper(
 // CHECK: call void @[[TOFFL1:.+]](
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp
index 2c46623985070..3a414543be806 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   #pragma omp target
   #pragma omp teams distribute parallel for default // expected-error {{expected '(' after 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute parallel for default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp teams distribute parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute parallel for default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
   #pragma omp teams distribute parallel for default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -21,12 +30,21 @@ int main(int argc, char **argv) {
   #pragma omp teams distribute parallel for default (shared), default(shared) // expected-error {{directive '#pragma omp teams distribute parallel for' cannot contain more than one 'default' clause}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute parallel for default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
   #pragma omp target
   #pragma omp teams distribute parallel for default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (int i=0; i<200; i++) ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
 
+#ifdef OMP51
+#pragma omp target
+#pragma omp teams distribute parallel for default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  for (int i = 0; i < 200; i++) {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
index 03276e4b5ecbe..b0f66e6806867 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
@@ -26,21 +26,21 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for dist_schedule(static, X/2)
@@ -166,11 +166,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -221,11 +221,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
index 6f33c4f754a2b..6cd1f7f435ecf 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
@@ -86,7 +86,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // LAMBDA:  ret
 #pragma omp target
@@ -191,7 +191,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -351,7 +351,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
index f4119fc47d3b9..8310189786a21 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
@@ -22,9 +22,9 @@ int Arg;
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
 #pragma omp target
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #pragma omp teams distribute parallel for
   for(int i = 0 ; i < 100; i++) {}
@@ -82,11 +82,11 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
 #pragma omp target
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
index 3dfb51320ddc4..d0b748f279bc0 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
@@ -69,7 +69,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -247,7 +247,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -428,7 +428,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
index 0f93fe219aae1..94225b8dfb6b7 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
@@ -42,9 +42,9 @@ int tmain() {
 int main() {
   S s(0);
   char a = s;
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 // CHECK: invoke{{.+}} [[TMAIN_5:@.+]]()
 // CHECK: invoke{{.+}} [[TMAIN_1:@.+]]()
@@ -78,16 +78,16 @@ int main() {
 
 // tmain 5
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_1:@.+]](
 
 // tmain 1
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_2:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_3:@.+]](
 
 // CHECK: define internal void [[T_OFFLOADING_FUN_0]](
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
index 5fc7ee12eb0ad..f421ab30b2df1 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
@@ -85,7 +85,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -177,7 +177,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]]()
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -254,7 +254,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // CHECK: call void @[[TOFFL1:.+]]()
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
index 0b7f3b2d8c62f..0f9c01e9f5d1b 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
@@ -46,9 +46,9 @@ int main() {
   return tmain();
 }
 
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL1:@.+]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL2:@.+]]()
 // CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
 // CHECK: ret i32 [[CALL_RET]]
@@ -78,7 +78,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.+}} [[TMAIN]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL3:@.+]]()
 
 // CHECK: define{{.+}} [[OFFL3]]()
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
index 34cc2874f6f3e..382c4b6d1c95a 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
@@ -47,7 +47,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -172,7 +172,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -265,7 +265,7 @@ int main() {
 // CHECK: br
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
index f095296dab5c5..d7cf485a8a76c 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
@@ -33,21 +33,21 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for schedule(static, X/2)
@@ -55,7 +55,7 @@ struct SS{
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL4:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for schedule(dynamic)
@@ -63,7 +63,7 @@ struct SS{
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL5:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for schedule(dynamic, X/2)
@@ -248,15 +248,15 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL5:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -340,15 +340,15 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT5:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
index e1e0d52292afa..4118739226293 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
@@ -34,7 +34,7 @@ int teams_argument_global(int n){
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 
   // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
   #pragma omp target
@@ -44,7 +44,7 @@ int teams_argument_global(int n){
   }
 
   int i;
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
   // CK1: call void @[[OFFL2:.+]](
   #pragma omp target
   {{{
@@ -116,7 +116,7 @@ int teams_local_arg(void) {
   int n = 100;
   int a[n], i;
 
-  // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+  // CK2: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
   // CK2: call void @[[OFFL1:.+]](
   #pragma omp target
   #pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i)
@@ -169,7 +169,7 @@ struct SS{
   // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
     int i;
-  // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{[^,]+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+  // CK3: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{[^,]+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
   // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}})
     #pragma omp target
     #pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i)
@@ -245,7 +245,7 @@ int main (int argc, char **argv) {
 }
 
 // CK4:  define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CK4:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
 // CK4: call void @[[OFFL1:.+]]({{.+}})
 // CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK4:  ret
@@ -261,7 +261,7 @@ int main (int argc, char **argv) {
 // CK4: ret void
 
 // CK4:  define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CK4:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
 // CK4: call void @[[OFFLT:.+]]({{.+}})
 // CK4:  ret
 // CK4-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
index 24bd31e07ba6d..10036e6553085 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd collapse(2)
@@ -112,7 +112,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -136,7 +136,7 @@ int main (int argc, char **argv) {
 
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp
index 93017a8233ffe..ce7f35b479592 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized -fopenmp-version=51 -DOMP51
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized -fopenmp-version=51 -DOMP51
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   #pragma omp target
   #pragma omp teams distribute parallel for simd default // expected-error {{expected '(' after 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute parallel for simd default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp teams distribute parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute parallel for simd default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
   #pragma omp teams distribute parallel for simd default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -21,12 +30,20 @@ int main(int argc, char **argv) {
   #pragma omp teams distribute parallel for simd default (shared), default(shared) // expected-error {{directive '#pragma omp teams distribute parallel for simd' cannot contain more than one 'default' clause}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute parallel for simd default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
   #pragma omp target
   #pragma omp teams distribute parallel for simd default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (int i=0; i<200; i++) ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
 
+#ifdef OpenMP51
+#pragma omp teams distribute parallel for default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  for (int i = 0; i < 200; i++) {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
index 4f11dc67437b8..5a5e370cc63af 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
@@ -26,21 +26,21 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd dist_schedule(static, X/2)
@@ -169,11 +169,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -224,11 +224,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
index c92f9be7ab74e..c694466d2fec4 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -86,7 +86,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
     // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // LAMBDA:  ret
 #pragma omp target
@@ -194,7 +194,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -354,7 +354,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
 // CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
index d5d0d8abeaae8..2f37f23775804 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -28,9 +28,9 @@ int Arg;
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
 #pragma omp target
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #pragma omp teams distribute parallel for simd
   for(int i = 0 ; i < 100; i++) {}
@@ -88,11 +88,11 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
 #pragma omp target
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
index 1b02d0c7394c0..26eca3545cb4c 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -69,7 +69,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -254,7 +254,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -438,7 +438,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
index 4faa99e2ee362..a4b801a729af3 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
@@ -42,9 +42,9 @@ int tmain() {
 int main() {
   S s(0);
   char a = s;
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 // CHECK: invoke{{.+}} [[TMAIN_5:@.+]]()
 // CHECK: invoke{{.+}} [[TMAIN_1:@.+]]()
@@ -78,16 +78,16 @@ int main() {
 
 // tmain 5
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_1:@.+]](
 
 // tmain 1
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_2:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_3:@.+]](
 
 // CHECK: define internal void [[T_OFFLOADING_FUN_0]](
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
index b2c8c22b84d80..71512c74b3f84 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
@@ -85,7 +85,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -180,7 +180,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]]()
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -257,7 +257,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // CHECK: call void @[[TOFFL1:.+]]()
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
index 447a1a60109c2..c876fa9565317 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -46,9 +46,9 @@ int main() {
   return tmain();
 }
 
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL1:@.+]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL2:@.+]]()
 // CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
 // CHECK: ret i32 [[CALL_RET]]
@@ -78,7 +78,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.+}} [[TMAIN]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL3:@.+]]()
 
 // CHECK: define{{.+}} [[OFFL3]]()
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
index f97f0a050c346..6866ea9f13107 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
@@ -47,7 +47,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -175,7 +175,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -268,7 +268,7 @@ int main() {
 // CHECK: br
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
index 4662c46fcce34..4be521f1afd85 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
@@ -34,21 +34,21 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd schedule(static, X/2)
@@ -56,7 +56,7 @@ struct SS{
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL4:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd schedule(dynamic)
@@ -64,7 +64,7 @@ struct SS{
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL5:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd schedule(dynamic, X/2)
@@ -255,15 +255,15 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL5:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -348,15 +348,15 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT5:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_private_codegen.cpp
index 8880e2f517698..0228d1504e9df 100644
--- a/clang/test/OpenMP/teams_distribute_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_private_codegen.cpp
@@ -85,7 +85,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -154,7 +154,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]]()
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -199,7 +199,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // CHECK: call void @[[TOFFL1:.+]]()
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp
index 019306c6106d8..73042b1eb345d 100644
--- a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp
@@ -47,7 +47,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -128,7 +128,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -179,7 +179,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
index 0ad2c27e562b0..ed7c185f2e76c 100644
--- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
@@ -36,7 +36,7 @@ int teams_argument_global(int n) {
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 
   // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 1)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 {{.+}}, i32 1)
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
   #pragma omp target
@@ -45,7 +45,7 @@ int teams_argument_global(int n) {
     a[i] = 0;
   }
 
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
   // CK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
   #pragma omp target
   {{{
@@ -116,7 +116,7 @@ int teams_local_arg(void) {
   int n = 100;
   int a[n];
 
-  // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+  // CK2: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
   // CK2: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
   #pragma omp target
   #pragma omp teams distribute simd
@@ -177,7 +177,7 @@ struct SS{
   // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+  // CK3: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
   // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}})
     #pragma omp target
 #ifdef OMP5
@@ -275,8 +275,8 @@ int main (int argc, char **argv) {
 }
 
 // CK4:  define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// OMP4_45:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
-// OMP4_50:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// OMP4_45:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
+// OMP4_50:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CK4: call void @[[OFFL1:.+]]({{.+}})
 // CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK4:  ret
@@ -292,7 +292,7 @@ int main (int argc, char **argv) {
 // CK4: ret void
 
 // CK4:  define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 1)
+// CK4:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 {{.+}}, i32 1)
 // CK4: call void @[[OFFLT:.+]]({{.+}})
 // CK4:  ret
 // CK4-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
index 41c494a562d70..0622a32cdb372 100644
--- a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
     
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute simd collapse(2)
@@ -109,7 +109,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -125,7 +125,7 @@ int main (int argc, char **argv) {
 // CK2: call void @__kmpc_for_static_fini(
 // CK2: ret void
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_simd_default_messages.cpp b/clang/test/OpenMP/teams_distribute_simd_default_messages.cpp
index 2775210ae048f..11f5d1cd1fc8f 100644
--- a/clang/test/OpenMP/teams_distribute_simd_default_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_default_messages.cpp
@@ -1,18 +1,23 @@
-// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized -fopenmp-version=51
 
-// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized -fopenmp-version=51
 
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   #pragma omp target
   #pragma omp teams distribute simd default // expected-error {{expected '(' after 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute simd default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp teams distribute simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute simd default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
   #pragma omp teams distribute simd default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -21,12 +26,22 @@ int main(int argc, char **argv) {
   #pragma omp teams distribute simd default (shared), default(shared) // expected-error {{directive '#pragma omp teams distribute simd' cannot contain more than one 'default' clause}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute simd default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
   #pragma omp target
   #pragma omp teams distribute simd default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (int i=0; i<200; i++) ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
 
+#pragma omp target
+#pragma omp teams distribute simd default(firstprivate) // expected-note {{explicit data sharing attribute requested here}}
+  for (int i = 0; i < 200; i++)
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+
+#pragma omp target
+#pragma omp teams distribute simd default(firstprivate) // expected-note {{explicit data sharing attribute requested here}}
+  for (int i = 0; i < 200; i++)
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+
   return 0;
 }
diff --git a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
index 8937a44f89e80..3bfd365af3a06 100644
--- a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
@@ -26,21 +26,21 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute simd
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target
     #pragma omp teams distribute simd dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target
     #pragma omp teams distribute simd dist_schedule(static, X/2)
@@ -146,11 +146,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -183,11 +183,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
index 293af4296cea5..20c7514dbd9b6 100644
--- a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
@@ -86,7 +86,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
     // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // LAMBDA:  ret
 #pragma omp target
@@ -167,7 +167,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -261,7 +261,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
index 9f7a186866570..cc899cf3195a8 100644
--- a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
@@ -69,7 +69,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -194,7 +194,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -294,7 +294,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp
index 10ad66ad31218..1c638170296ba 100644
--- a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp
@@ -85,7 +85,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -155,7 +155,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 1)
 // CHECK: call void @[[OFFL1:.+]]()
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -200,7 +200,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // CHECK: call void @[[TOFFL1:.+]]()
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
index 2137913bcdd9d..a3f18db856bc1 100644
--- a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
@@ -47,7 +47,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -128,7 +128,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -179,7 +179,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/Parser/cxx-template-decl.cpp b/clang/test/Parser/cxx-template-decl.cpp
index 24cc13cde91fa..64e7ca921f575 100644
--- a/clang/test/Parser/cxx-template-decl.cpp
+++ b/clang/test/Parser/cxx-template-decl.cpp
@@ -286,17 +286,3 @@ namespace PR45239 {
   template int b;
   template auto f() -> b<0>; // expected-error +{{}}
 }
-
-namespace NoCrashOnNullNNSTypoCorrection {
-
-int AddObservation(); // expected-note {{declared here}}
-
-template  // expected-note {{template parameter is declared here}}
-class UsingImpl {};
-class AddObservation {
-  using Using =
-    UsingImpl; // expected-error {{use of undeclared identifier 'AddObservationFn'; did you mean}} \
-                                               expected-error {{template argument for template type parameter must be a type}}
-};
-
-}
diff --git a/clang/test/Parser/lambda-misplaced-capture-default.cpp b/clang/test/Parser/lambda-misplaced-capture-default.cpp
new file mode 100644
index 0000000000000..d65b875102da7
--- /dev/null
+++ b/clang/test/Parser/lambda-misplaced-capture-default.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-unused-value -fsyntax-only -verify %s
+
+namespace misplaced_capture_default {
+void Test() {
+  int i = 0;
+  [&, i, &] {};   // expected-error {{expected variable name or 'this' in lambda capture list}}
+  [&, i, = ] {};  // expected-error {{expected variable name or 'this' in lambda capture list}}
+  [=, &i, &] {};  // expected-error {{expected variable name or 'this' in lambda capture list}}
+  [=, &i, = ] {}; // expected-error {{expected variable name or 'this' in lambda capture list}}
+
+  [i, &] {};   // expected-error {{capture default must be first}}
+  [i, = ] {};  // expected-error {{capture default must be first}}
+  [i, = x] {}; // expected-error {{expected variable name or 'this' in lambda capture list}}
+  [=, &i] {};  // ok
+  [&, &i] {};  // expected-error {{'&' cannot precede a capture when the capture default is '&'}}
+  [&x = i] {}; // ok
+  [=, &x = i] {};  // ok
+  [x = &i] {};     // ok
+  [=, &x = &i] {}; // expected-error {{non-const lvalue reference to type 'int *' cannot bind to a temporary of type 'int *'}}
+  [&, this] {}; // expected-error {{'this' cannot be captured in this context}}
+
+  [i, &, x = 2] {}; // expected-error {{capture default must be first}}
+  [i, =, x = 2] {}; // expected-error {{capture default must be first}}
+}
+} // namespace misplaced_capture_default
+
+namespace misplaced_capture_default_pack {
+template  void Test(Args... args) {
+  [&, args...] {};         // ok
+  [args..., &] {};         // expected-error {{capture default must be first}}
+  [=, &args...] {};        // ok
+  [&, ... xs = &args] {};  // ok
+  [&, ... xs = &] {};      // expected-error {{expected expression}}
+  [... xs = &] {};         // expected-error {{expected expression}}
+  [... xs = &args, = ] {}; // expected-error {{capture default must be first}}
+  [... xs = &args, &] {};  // expected-error {{capture default must be first}}
+}
+} // namespace misplaced_capture_default_pack
diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c
index bef145930697f..905a77785a9d8 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -44,6 +44,12 @@
 // CHECK-NOT: __ARM_BF16_FORMAT_ALTERNATIVE 1
 // CHECK-NOT: __ARM_FEATURE_BF16 1
 // CHECK-NOT: __ARM_FEATURE_BF16_VECTOR_ARITHMETIC 1
+// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 0
+// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 128
+// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 256
+// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 512
+// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 1024
+// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 2048
 
 // RUN: %clang -target aarch64_be-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-BIGENDIAN
 // CHECK-BIGENDIAN: __ARM_BIG_ENDIAN 1
@@ -112,6 +118,24 @@
 // CHECK-SVE-F64MM: __ARM_FEATURE_SVE 1
 // CHECK-SVE-F64MM: __ARM_FEATURE_SVE_MATMUL_FP64 1
 
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.5-a+sve -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE-8_5 %s
+// CHECK-SVE-8_5-NOT: __ARM_FEATURE_SVE_BF16 1
+// CHECK-SVE-8_5-NOT: __ARM_FEATURE_SVE_MATMUL_FP32 1
+// CHECK-SVE-8_5-NOT: __ARM_FEATURE_SVE_MATMUL_INT8 1
+// CHECK-SVE-8_5: __ARM_FEATURE_SVE 1
+
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.6-a+sve -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE-8_6 %s
+// CHECK-SVE-8_6: __ARM_FEATURE_SVE 1
+// CHECK-SVE-8_6: __ARM_FEATURE_SVE_BF16 1
+// CHECK-SVE-8_6: __ARM_FEATURE_SVE_MATMUL_FP32 1
+// CHECK-SVE-8_6: __ARM_FEATURE_SVE_MATMUL_INT8 1
+
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.6-a+sve+noi8mm+nobf16+nof32mm -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE-8_6-NOFEATURES %s
+// CHECK-SVE-8_6-NOFEATURES-NOT: __ARM_FEATURE_SVE_BF16 1
+// CHECK-SVE-8_6-NOFEATURES-NOT: __ARM_FEATURE_SVE_MATMUL_FP32 1
+// CHECK-SVE-8_6-NOFEATURES-NOT: __ARM_FEATURE_SVE_MATMUL_INT8 1
+// CHECK-SVE-8_6-NOFEATURES:     __ARM_FEATURE_SVE 1
+
 // The following tests may need to be revised in the future since
 // SVE2 is currently still part of Future Architecture Technologies
 // (https://developer.arm.com/docs/ddi0602/latest)
@@ -413,3 +437,17 @@
 // CHECK-BFLOAT: __ARM_FEATURE_BF16 1
 // CHECK-BFLOAT: __ARM_FEATURE_BF16_VECTOR_ARITHMETIC 1
 
+// ================== Check sve-vector-bits flag.
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=128 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-128 %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=256 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-256 %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=512 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-512 %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=1024 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-1024 %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=2048 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-2048 %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=2048 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-2048 %s
+// NOTE: The __ARM_FEATURE_SVE_BITS feature macro is experimental until the
+// feature is complete.
+// CHECK-SVE-VECTOR-BITS-128: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 128
+// CHECK-SVE-VECTOR-BITS-256: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 256
+// CHECK-SVE-VECTOR-BITS-512: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 512
+// CHECK-SVE-VECTOR-BITS-1024: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 1024
+// CHECK-SVE-VECTOR-BITS-2048: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 2048
diff --git a/clang/test/Sema/attr-arm-sve-vector-bits.c b/clang/test/Sema/attr-arm-sve-vector-bits.c
new file mode 100644
index 0000000000000..48ca7d8fa8125
--- /dev/null
+++ b/clang/test/Sema/attr-arm-sve-vector-bits.c
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=128 -fallow-half-arguments-and-returns %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=256 -fallow-half-arguments-and-returns %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=512 -fallow-half-arguments-and-returns %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=1024 -fallow-half-arguments-and-returns %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=2048 -fallow-half-arguments-and-returns %s
+
+#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL
+
+typedef __SVInt8_t svint8_t;
+typedef __SVInt16_t svint16_t;
+typedef __SVInt32_t svint32_t;
+typedef __SVInt64_t svint64_t;
+typedef __SVUint8_t svuint8_t;
+typedef __SVUint16_t svuint16_t;
+typedef __SVUint32_t svuint32_t;
+typedef __SVUint64_t svuint64_t;
+typedef __SVFloat16_t svfloat16_t;
+typedef __SVFloat32_t svfloat32_t;
+typedef __SVFloat64_t svfloat64_t;
+
+#if defined(__ARM_FEATURE_SVE_BF16)
+typedef __SVBFloat16_t svbfloat16_t;
+#endif
+
+typedef __SVBool_t svbool_t;
+
+// Define valid fixed-width SVE types
+typedef svint8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint16_t fixed_int16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svuint8_t fixed_uint8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint16_t fixed_uint16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint32_t fixed_uint32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint64_t fixed_uint64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svfloat16_t fixed_float16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat32_t fixed_float32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svbfloat16_t fixed_bfloat16_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
+
+// Attribute must have a single argument
+typedef svint8_t no_argument __attribute__((arm_sve_vector_bits));         // expected-error {{'arm_sve_vector_bits' attribute takes one argument}}
+typedef svint8_t two_arguments __attribute__((arm_sve_vector_bits(2, 4))); // expected-error {{'arm_sve_vector_bits' attribute takes one argument}}
+
+// The number of SVE vector bits must be an integer constant expression
+typedef svint8_t non_int_size1 __attribute__((arm_sve_vector_bits(2.0)));   // expected-error {{'arm_sve_vector_bits' attribute requires an integer constant}}
+typedef svint8_t non_int_size2 __attribute__((arm_sve_vector_bits("256"))); // expected-error {{'arm_sve_vector_bits' attribute requires an integer constant}}
+
+typedef __clang_svint8x2_t svint8x2_t;
+typedef __clang_svfloat32x3_t svfloat32x3_t;
+
+// Attribute must be attached to a single SVE vector or predicate type.
+typedef void *badtype1 __attribute__((arm_sve_vector_bits(N)));         // expected-error {{'arm_sve_vector_bits' attribute applied to non-SVE type 'void *'}}
+typedef int badtype2 __attribute__((arm_sve_vector_bits(N)));           // expected-error {{'arm_sve_vector_bits' attribute applied to non-SVE type 'int'}}
+typedef float badtype3 __attribute__((arm_sve_vector_bits(N)));         // expected-error {{'arm_sve_vector_bits' attribute applied to non-SVE type 'float'}}
+typedef svint8x2_t badtype4 __attribute__((arm_sve_vector_bits(N)));    // expected-error {{'arm_sve_vector_bits' attribute applied to non-SVE type 'svint8x2_t' (aka '__clang_svint8x2_t')}}
+typedef svfloat32x3_t badtype5 __attribute__((arm_sve_vector_bits(N))); // expected-error {{'arm_sve_vector_bits' attribute applied to non-SVE type 'svfloat32x3_t' (aka '__clang_svfloat32x3_t')}}
diff --git a/clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp b/clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp
index 9351b4ecb032d..88fcbd716ef45 100644
--- a/clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp
+++ b/clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp
@@ -1,19 +1,26 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: not %clang_cc1 %s -x hip -fcuda-is-device -o - -emit-llvm -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s
+// RUN: %clang_cc1 %s -x hip -fcuda-is-device -o - \
+// RUN:   -triple=amdgcn-amd-amdhsa -fsyntax-only \
+// RUN:   -verify=dev
+// RUN: %clang_cc1 %s -x hip -triple x86_64 -o - \
+// RUN:   -aux-triple amdgcn-amd-amdhsa -fsyntax-only \
+// RUN:   -verify=host
+
+// dev-no-diagnostics
 
 void test_host() {
   __UINT32_TYPE__ val32;
   __UINT64_TYPE__ val64;
 
-  // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_inc32' in __host__ function
+  // host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_inc32' in __host__ function}}
   val32 = __builtin_amdgcn_atomic_inc32(&val32, val32, __ATOMIC_SEQ_CST, "");
 
-  // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_inc64' in __host__ function
+  // host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_inc64' in __host__ function}}
   val64 = __builtin_amdgcn_atomic_inc64(&val64, val64, __ATOMIC_SEQ_CST, "");
 
-  // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_dec32' in __host__ function
+  // host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_dec32' in __host__ function}}
   val32 = __builtin_amdgcn_atomic_dec32(&val32, val32, __ATOMIC_SEQ_CST, "");
 
-  // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_dec64' in __host__ function
+  // host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_dec64' in __host__ function}}
   val64 = __builtin_amdgcn_atomic_dec64(&val64, val64, __ATOMIC_SEQ_CST, "");
 }
diff --git a/clang/test/Sema/builtins.c b/clang/test/Sema/builtins.c
index 1d41bcf9f0865..90c033e47cd17 100644
--- a/clang/test/Sema/builtins.c
+++ b/clang/test/Sema/builtins.c
@@ -281,6 +281,42 @@ void test21(const int *ptr) {
   __atomic_fetch_add(ptr, 1, 0);  // expected-error {{address argument to atomic operation must be a pointer to non-const type ('const int *' invalid)}}
 }
 
+void test_ei_i42i(_ExtInt(42) *ptr, int value) {
+  __sync_fetch_and_add(ptr, value); // expected-error {{Atomic memory operand must have a power-of-two size}}
+  // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}}
+  __sync_nand_and_fetch(ptr, value); // expected-error {{Atomic memory operand must have a power-of-two size}}
+}
+
+void test_ei_i64i(_ExtInt(64) *ptr, int value) {
+  __sync_fetch_and_add(ptr, value); // expect success
+  // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}}
+  __sync_nand_and_fetch(ptr, value); // expect success
+}
+
+void test_ei_ii42(int *ptr, _ExtInt(42) value) {
+  __sync_fetch_and_add(ptr, value); // expect success
+  // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}}
+  __sync_nand_and_fetch(ptr, value); // expect success
+}
+
+void test_ei_ii64(int *ptr, _ExtInt(64) value) {
+  __sync_fetch_and_add(ptr, value); // expect success
+  // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}}
+  __sync_nand_and_fetch(ptr, value); // expect success
+}
+
+void test_ei_i42i42(_ExtInt(42) *ptr, _ExtInt(42) value) {
+  __sync_fetch_and_add(ptr, value); // expected-error {{Atomic memory operand must have a power-of-two size}}
+  // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}}
+  __sync_nand_and_fetch(ptr, value); // expected-error {{Atomic memory operand must have a power-of-two size}}
+}
+
+void test_ei_i64i64(_ExtInt(64) *ptr, _ExtInt(64) value) {
+  __sync_fetch_and_add(ptr, value); // expect success
+  // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}}
+  __sync_nand_and_fetch(ptr, value); // expect success
+}
+
 void test22(void) {
   (void)__builtin_signbit(); // expected-error{{too few arguments to function call, expected 1, have 0}}
   (void)__builtin_signbit(1.0, 2.0, 3.0); // expected-error{{too many arguments to function call, expected 1, have 3}}
diff --git a/clang/test/Sema/return-non-void.c b/clang/test/Sema/return-non-void.c
new file mode 100644
index 0000000000000..f1ee3722af489
--- /dev/null
+++ b/clang/test/Sema/return-non-void.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -Wreturn-type -std=c99 -fsyntax-only -verify=c99 %s
+// RUN: %clang_cc1 -Wreturn-type -std=c90 -fsyntax-only -verify=c90 %s
+
+int foo(void) { return; } // c99-error {{non-void function 'foo' should return a value}}
+                          // c90-error@-1 {{non-void function 'foo' should return a value}}
diff --git a/clang/test/SemaCUDA/builtins.cu b/clang/test/SemaCUDA/builtins.cu
index 814fda2ac7d34..78a333e511a5d 100644
--- a/clang/test/SemaCUDA/builtins.cu
+++ b/clang/test/SemaCUDA/builtins.cu
@@ -7,10 +7,10 @@
 // REQUIRES: nvptx-registered-target
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
 // RUN:     -aux-triple nvptx64-unknown-cuda \
-// RUN:     -fsyntax-only -verify %s
+// RUN:     -fsyntax-only -verify=host %s
 // RUN: %clang_cc1 -triple nvptx64-unknown-cuda -fcuda-is-device \
 // RUN:     -aux-triple x86_64-unknown-unknown \
-// RUN:     -fsyntax-only -verify %s
+// RUN:     -fsyntax-only -verify=dev %s
 
 #if !(defined(__amd64__) && defined(__PTX__))
 #error "Expected to see preprocessor macros from both sides of compilation."
@@ -18,14 +18,13 @@
 
 void hf() {
   int x = __builtin_ia32_rdtsc();
-  int y = __nvvm_read_ptx_sreg_tid_x(); // expected-note  {{'__nvvm_read_ptx_sreg_tid_x' declared here}}
-  // expected-error@-1 {{reference to __device__ function '__nvvm_read_ptx_sreg_tid_x' in __host__ function}}
+  int y = __nvvm_read_ptx_sreg_tid_x();
+  // host-error@-1 {{reference to __device__ function '__nvvm_read_ptx_sreg_tid_x' in __host__ function}}
   x = __builtin_abs(1);
 }
 
 __attribute__((device)) void df() {
   int x = __nvvm_read_ptx_sreg_tid_x();
-  int y = __builtin_ia32_rdtsc(); // expected-error {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}}
-                                  // expected-note@20 {{'__builtin_ia32_rdtsc' declared here}}
+  int y = __builtin_ia32_rdtsc(); // dev-error {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}}
   x = __builtin_abs(1);
 }
diff --git a/clang/test/SemaCUDA/call-kernel-from-kernel.cu b/clang/test/SemaCUDA/call-kernel-from-kernel.cu
index c89037c52bff4..900efcef43b80 100644
--- a/clang/test/SemaCUDA/call-kernel-from-kernel.cu
+++ b/clang/test/SemaCUDA/call-kernel-from-kernel.cu
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \
-// RUN:   -verify -fsyntax-only -verify-ignore-unexpected=note
+// RUN: %clang_cc1 %s --std=c++11 -triple nvptx -emit-llvm -o - \
+// RUN:   -verify -fcuda-is-device -fsyntax-only -verify-ignore-unexpected=note
 
 #include "Inputs/cuda.h"
 
diff --git a/clang/test/SemaCUDA/function-overload.cu b/clang/test/SemaCUDA/function-overload.cu
index b9efd1c09e699..191268c9a5f14 100644
--- a/clang/test/SemaCUDA/function-overload.cu
+++ b/clang/test/SemaCUDA/function-overload.cu
@@ -1,8 +1,8 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: nvptx-registered-target
 
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify=host,expected %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify=dev,expected %s
 
 #include "Inputs/cuda.h"
 
@@ -75,37 +75,37 @@ extern "C" __host__ __device__ int chhd2() { return 0; }
 
 // Helper functions to verify calling restrictions.
 __device__ DeviceReturnTy d() { return DeviceReturnTy(); }
-// expected-note@-1 1+ {{'d' declared here}}
+// host-note@-1 1+ {{'d' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __device__ function from __host__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
 
 __host__ HostReturnTy h() { return HostReturnTy(); }
-// expected-note@-1 1+ {{'h' declared here}}
+// dev-note@-1 1+ {{'h' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __host__ function from __device__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
 // expected-note@-4 1+ {{candidate function not viable: call to __host__ function from __global__ function}}
 
 __global__ void g() {}
-// expected-note@-1 1+ {{'g' declared here}}
+// dev-note@-1 1+ {{'g' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __global__ function from __device__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __global__ function from __host__ __device__ function}}
 // expected-note@-4 1+ {{candidate function not viable: call to __global__ function from __global__ function}}
 
 extern "C" __device__ DeviceReturnTy cd() { return DeviceReturnTy(); }
-// expected-note@-1 1+ {{'cd' declared here}}
+// host-note@-1 1+ {{'cd' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __device__ function from __host__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
 
 extern "C" __host__ HostReturnTy ch() { return HostReturnTy(); }
-// expected-note@-1 1+ {{'ch' declared here}}
+// dev-note@-1 1+ {{'ch' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __host__ function from __device__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
 // expected-note@-4 1+ {{candidate function not viable: call to __host__ function from __global__ function}}
 
 __host__ void hostf() {
-  DeviceFnPtr fp_d = d;         // expected-error {{reference to __device__ function 'd' in __host__ function}}
+  DeviceFnPtr fp_d = d;         // host-error {{reference to __device__ function 'd' in __host__ function}}
   DeviceReturnTy ret_d = d();   // expected-error {{no matching function for call to 'd'}}
-  DeviceFnPtr fp_cd = cd;       // expected-error {{reference to __device__ function 'cd' in __host__ function}}
+  DeviceFnPtr fp_cd = cd;       // host-error {{reference to __device__ function 'cd' in __host__ function}}
   DeviceReturnTy ret_cd = cd(); // expected-error {{no matching function for call to 'cd'}}
 
   HostFnPtr fp_h = h;
@@ -129,9 +129,9 @@ __device__ void devicef() {
   DeviceFnPtr fp_cd = cd;
   DeviceReturnTy ret_cd = cd();
 
-  HostFnPtr fp_h = h;         // expected-error {{reference to __host__ function 'h' in __device__ function}}
+  HostFnPtr fp_h = h;         // dev-error {{reference to __host__ function 'h' in __device__ function}}
   HostReturnTy ret_h = h();   // expected-error {{no matching function for call to 'h'}}
-  HostFnPtr fp_ch = ch;       // expected-error {{reference to __host__ function 'ch' in __device__ function}}
+  HostFnPtr fp_ch = ch;       // dev-error {{reference to __host__ function 'ch' in __device__ function}}
   HostReturnTy ret_ch = ch(); // expected-error {{no matching function for call to 'ch'}}
 
   DeviceFnPtr fp_dh = dh;
@@ -139,9 +139,9 @@ __device__ void devicef() {
   DeviceFnPtr fp_cdh = cdh;
   DeviceReturnTy ret_cdh = cdh();
 
-  GlobalFnPtr fp_g = g; // expected-error {{reference to __global__ function 'g' in __device__ function}}
+  GlobalFnPtr fp_g = g; // dev-error {{reference to __global__ function 'g' in __device__ function}}
   g(); // expected-error {{no matching function for call to 'g'}}
-  g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __device__ function}}
+  g<<<0,0>>>(); // dev-error {{reference to __global__ function 'g' in __device__ function}}
 }
 
 __global__ void globalf() {
@@ -150,9 +150,9 @@ __global__ void globalf() {
   DeviceFnPtr fp_cd = cd;
   DeviceReturnTy ret_cd = cd();
 
-  HostFnPtr fp_h = h;         // expected-error {{reference to __host__ function 'h' in __global__ function}}
+  HostFnPtr fp_h = h;         // dev-error {{reference to __host__ function 'h' in __global__ function}}
   HostReturnTy ret_h = h();   // expected-error {{no matching function for call to 'h'}}
-  HostFnPtr fp_ch = ch;       // expected-error {{reference to __host__ function 'ch' in __global__ function}}
+  HostFnPtr fp_ch = ch;       // dev-error {{reference to __host__ function 'ch' in __global__ function}}
   HostReturnTy ret_ch = ch(); // expected-error {{no matching function for call to 'ch'}}
 
   DeviceFnPtr fp_dh = dh;
@@ -160,9 +160,9 @@ __global__ void globalf() {
   DeviceFnPtr fp_cdh = cdh;
   DeviceReturnTy ret_cdh = cdh();
 
-  GlobalFnPtr fp_g = g; // expected-error {{reference to __global__ function 'g' in __global__ function}}
+  GlobalFnPtr fp_g = g; // dev-error {{reference to __global__ function 'g' in __global__ function}}
   g(); // expected-error {{no matching function for call to 'g'}}
-  g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __global__ function}}
+  g<<<0,0>>>(); // dev-error {{reference to __global__ function 'g' in __global__ function}}
 }
 
 __host__ __device__ void hostdevicef() {
diff --git a/clang/test/SemaCUDA/function-target.cu b/clang/test/SemaCUDA/function-target.cu
index 83dce50b4af83..48f7229df21f2 100644
--- a/clang/test/SemaCUDA/function-target.cu
+++ b/clang/test/SemaCUDA/function-target.cu
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify=dev,expected %s
 
 #include "Inputs/cuda.h"
 
@@ -23,11 +23,11 @@ __host__ void h1(void) {
 __host__ void d1h(void); // expected-note {{candidate function not viable: call to __host__ function from __device__ function}}
 __device__ void d1d(void);
 __host__ __device__ void d1hd(void);
-__global__ void d1g(void); // expected-note {{'d1g' declared here}}
+__global__ void d1g(void); // dev-note {{'d1g' declared here}}
 
 __device__ void d1(void) {
   d1h(); // expected-error {{no matching function}}
   d1d();
   d1hd();
-  d1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'd1g' in __device__ function}}
+  d1g<<<1, 1>>>(); // dev-error {{reference to __global__ function 'd1g' in __device__ function}}
 }
diff --git a/clang/test/SemaCUDA/implicit-device-lambda.cu b/clang/test/SemaCUDA/implicit-device-lambda.cu
index 8e5b7ddddb8f6..d2e59b8033c31 100644
--- a/clang/test/SemaCUDA/implicit-device-lambda.cu
+++ b/clang/test/SemaCUDA/implicit-device-lambda.cu
@@ -1,5 +1,7 @@
-// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -verify -fsyntax-only -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
-// RUN: %clang_cc1 -std=c++11 -verify -fsyntax-only -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
+// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -verify=dev,expected -fsyntax-only \
+// RUN:   -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
+// RUN: %clang_cc1 -std=c++11 -verify -fsyntax-only \
+// RUN:   -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
 
 #include "Inputs/cuda.h"
 
@@ -102,5 +104,5 @@ __device__ void foo() {
     void foo() {}
   };
   X x;
-  x.foo(); // expected-error {{reference to __host__ function 'foo' in __device__ function}}
+  x.foo(); // dev-error {{reference to __host__ function 'foo' in __device__ function}}
 }
diff --git a/clang/test/SemaCUDA/method-target.cu b/clang/test/SemaCUDA/method-target.cu
index 8e17daa0c1233..85c27ce436322 100644
--- a/clang/test/SemaCUDA/method-target.cu
+++ b/clang/test/SemaCUDA/method-target.cu
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify=host,expected %s
+// RUN: %clang_cc1 -fcuda-is-device -fsyntax-only -verify=dev,expected %s
 
 #include "Inputs/cuda.h"
 
@@ -6,11 +7,11 @@
 // Test 1: host method called from device function
 
 struct S1 {
-  void method() {} // expected-note {{'method' declared here}}
+  void method() {} // dev-note {{'method' declared here}}
 };
 
 __device__ void foo1(S1& s) {
-  s.method(); // expected-error {{reference to __host__ function 'method' in __device__ function}}
+  s.method(); // dev-error {{reference to __host__ function 'method' in __device__ function}}
 }
 
 //------------------------------------------------------------------------------
@@ -29,22 +30,22 @@ __device__ void foo2(S2& s, int i, float f) {
 // Test 3: device method called from host function
 
 struct S3 {
-  __device__ void method() {} // expected-note {{'method' declared here}}
+  __device__ void method() {} // host-note {{'method' declared here}}
 };
 
 void foo3(S3& s) {
-  s.method(); // expected-error {{reference to __device__ function 'method' in __host__ function}}
+  s.method(); // host-error {{reference to __device__ function 'method' in __host__ function}}
 }
 
 //------------------------------------------------------------------------------
 // Test 4: device method called from host&device function
 
 struct S4 {
-  __device__ void method() {}  // expected-note {{'method' declared here}}
+  __device__ void method() {}  // host-note {{'method' declared here}}
 };
 
 __host__ __device__ void foo4(S4& s) {
-  s.method(); // expected-error {{reference to __device__ function 'method' in __host__ __device__ function}}
+  s.method(); // host-error {{reference to __device__ function 'method' in __host__ __device__ function}}
 }
 
 //------------------------------------------------------------------------------
@@ -63,9 +64,9 @@ __device__ void foo5(S5& s, S5& t) {
 // Test 6: call method through pointer
 
 struct S6 {
-  void method() {} // expected-note {{'method' declared here}};
+  void method() {} // dev-note {{'method' declared here}};
 };
 
 __device__ void foo6(S6* s) {
-  s->method(); // expected-error {{reference to __host__ function 'method' in __device__ function}}
+  s->method(); // dev-error {{reference to __host__ function 'method' in __device__ function}}
 }
diff --git a/clang/test/SemaCUDA/reference-to-kernel-fn.cu b/clang/test/SemaCUDA/reference-to-kernel-fn.cu
index e502d134b0869..70a1cda6ab0c8 100644
--- a/clang/test/SemaCUDA/reference-to-kernel-fn.cu
+++ b/clang/test/SemaCUDA/reference-to-kernel-fn.cu
@@ -1,12 +1,14 @@
-// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify \
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify=host \
+// RUN:   -verify-ignore-unexpected=note %s
+// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -fsyntax-only -verify=dev \
 // RUN:   -verify-ignore-unexpected=note %s
-// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -fsyntax-only -verify \
-// RUN:   -verify-ignore-unexpected=note -DDEVICE %s
 
 // Check that we can reference (get a function pointer to) a __global__
 // function from the host side, but not the device side.  (We don't yet support
 // device-side kernel launches.)
 
+// host-no-diagnostics
+
 #include "Inputs/cuda.h"
 
 struct Dummy {};
@@ -17,13 +19,11 @@ typedef void (*fn_ptr_t)();
 
 __host__ __device__ fn_ptr_t get_ptr_hd() {
   return kernel;
-#ifdef DEVICE
-  // expected-error@-2 {{reference to __global__ function}}
-#endif
+  // dev-error@-1 {{reference to __global__ function}}
 }
 __host__ fn_ptr_t get_ptr_h() {
   return kernel;
 }
 __device__ fn_ptr_t get_ptr_d() {
-  return kernel;  // expected-error {{reference to __global__ function}}
+  return kernel;  // dev-error {{reference to __global__ function}}
 }
diff --git a/clang/test/SemaCXX/attr-unused.cpp b/clang/test/SemaCXX/attr-unused.cpp
index b74bc915ce070..e3878152eca97 100644
--- a/clang/test/SemaCXX/attr-unused.cpp
+++ b/clang/test/SemaCXX/attr-unused.cpp
@@ -3,7 +3,17 @@
 namespace ns_unused { typedef int Int_unused __attribute__((unused)); }
 namespace ns_not_unused { typedef int Int_not_unused; }
 
+template  class C;
+template <> class __attribute__((unused)) C {};
+
 void f() {
   ns_not_unused::Int_not_unused i1; // expected-warning {{unused variable}}
   ns_unused::Int_unused i0; // expected-warning {{'Int_unused' was marked unused but was used}}
+
+  union __attribute__((unused)) { // expected-warning {{'' was marked unused but was used}}
+    int i;
+  };
+  (void) i;
+
+  C(); // expected-warning {{'C' was marked unused but was used}}
 }
diff --git a/clang/test/SemaCXX/constant-expression-cxx11.cpp b/clang/test/SemaCXX/constant-expression-cxx11.cpp
index 78e9fef96c8da..eac0256c4fb21 100644
--- a/clang/test/SemaCXX/constant-expression-cxx11.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx11.cpp
@@ -2167,6 +2167,11 @@ namespace PR21786 {
 namespace PR21859 {
   constexpr int Fun() { return; } // expected-error {{non-void constexpr function 'Fun' should return a value}}
   constexpr int Var = Fun();
+
+  template  constexpr int FunT1() { return; } // expected-error {{non-void constexpr function 'FunT1' should return a value}}
+  template  constexpr int FunT2() { return 0; }
+  template <> constexpr int FunT2() { return 0; }
+  template <> constexpr int FunT2() { return; } // expected-error {{non-void constexpr function 'FunT2' should return a value}}
 }
 
 struct InvalidRedef {
@@ -2322,3 +2327,22 @@ namespace array_size {
     f3(a);
   }
 }
+
+namespace flexible_array {
+  struct A { int x; char arr[]; }; // expected-warning {{C99}} expected-note {{here}}
+  constexpr A a = {1};
+  static_assert(a.x == 1, "");
+  static_assert(&a.arr != nullptr, "");
+  static_assert(a.arr[0], ""); // expected-error {{constant expression}} expected-note {{array member without known bound}}
+  static_assert(a.arr[1], ""); // expected-error {{constant expression}} expected-note {{array member without known bound}}
+
+  constexpr A b[] = {{1}, {2}, {3}}; // expected-warning {{flexible array member}}
+  static_assert(b[0].x == 1, "");
+  static_assert(b[1].x == 2, "");
+  static_assert(b[2].x == 3, "");
+  static_assert(b[2].arr[0], ""); // expected-error {{constant expression}} expected-note {{array member without known bound}}
+
+  // If we ever start to accept this, we'll need to ensure we can
+  // constant-evaluate it properly.
+  constexpr A c = {1, 2, 3}; // expected-error {{initialization of flexible array member}}
+}
diff --git a/clang/test/SemaCXX/consteval-return-void.cpp b/clang/test/SemaCXX/consteval-return-void.cpp
new file mode 100644
index 0000000000000..39e1418306f50
--- /dev/null
+++ b/clang/test/SemaCXX/consteval-return-void.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s
+
+consteval int Fun() { return; } // expected-error {{non-void consteval function 'Fun' should return a value}}
+
+template  consteval int FunT1() { return; } // expected-error {{non-void consteval function 'FunT1' should return a value}}
+template  consteval int FunT2() { return 0; }
+template <> consteval int FunT2() { return 0; }
+template <> consteval int FunT2() { return; } // expected-error {{non-void consteval function 'FunT2' should return a value}}
+
+enum E {};
+
+constexpr E operator+(E,E) { return; }	// expected-error {{non-void constexpr function 'operator+' should return a value}}
+consteval E operator+(E,E) { return; }  // expected-error {{non-void consteval function 'operator+' should return a value}}
+template  constexpr E operator-(E,E) { return; } // expected-error {{non-void constexpr function 'operator-' should return a value}}
+template  consteval E operator-(E,E) { return; } // expected-error {{non-void consteval function 'operator-' should return a value}}
+
+template  constexpr E operator*(E,E);
+template  consteval E operator/(E,E);
+template <> constexpr E operator*(E,E) { return; } // expected-error {{non-void constexpr function 'operator*' should return a value}}
+template <> consteval E operator/(E,E) { return; } // expected-error {{non-void consteval function 'operator/' should return a value}}
diff --git a/clang/test/SemaCXX/default2.cpp b/clang/test/SemaCXX/default2.cpp
index 4c8e8ce6941a4..7651233f8636f 100644
--- a/clang/test/SemaCXX/default2.cpp
+++ b/clang/test/SemaCXX/default2.cpp
@@ -117,6 +117,12 @@ class C2 {
   static int f(int = 10); // expected-note{{default argument declared here}}
 };
 
+template  class C3;
+template <> class C3 {
+  static void g(int = f()); // expected-error {{use of default argument to function 'f' that is declared later in class 'C3'}}
+  static int f(int = 10); // expected-note {{default argument declared here}}
+};
+
 // Make sure we actually parse the default argument for an inline definition
 class XX {
   void A(int length = -1 ) {  } 
diff --git a/clang/test/SemaCXX/incomplete-call.cpp b/clang/test/SemaCXX/incomplete-call.cpp
index 0fb1ef5f07a50..46f470e4a8810 100644
--- a/clang/test/SemaCXX/incomplete-call.cpp
+++ b/clang/test/SemaCXX/incomplete-call.cpp
@@ -1,7 +1,8 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
-struct A; // expected-note 14 {{forward declaration of 'A'}}
+struct A; // expected-note 15 {{forward declaration of 'A'}}
 
 A f(); // expected-note {{'f' declared here}}
+template  A ft(T); // expected-note {{'ft' declared here}}
 
 struct B {
   A f(); // expected-note {{'f' declared here}}
@@ -38,7 +39,8 @@ void g() {
   
   A (B::*mfp)() = 0;
   (b.*mfp)(); // expected-error {{calling function with incomplete return type 'A'}}
-  
+
+  ft(42); // expected-error {{calling 'ft' with incomplete return type 'A'}}
 }
 
 
diff --git a/clang/test/SemaCXX/lambda-expressions.cpp b/clang/test/SemaCXX/lambda-expressions.cpp
index 3240d5351fc5b..7f7f9c5704872 100644
--- a/clang/test/SemaCXX/lambda-expressions.cpp
+++ b/clang/test/SemaCXX/lambda-expressions.cpp
@@ -649,3 +649,14 @@ void Run(const int& points) {
 void operator_parens() {
   [&](int x){ operator()(); }(0); // expected-error {{undeclared 'operator()'}}
 }
+
+namespace captured_name {
+void Test() {
+  union {           // expected-note {{'' declared here}}
+    int i;
+  };
+  [] { return i; }; // expected-error {{variable '' cannot be implicitly captured in a lambda with no capture-default specified}}
+                    // expected-note@-1 {{lambda expression begins here}}
+
+}
+};
diff --git a/clang/test/SemaCXX/return-void.cpp b/clang/test/SemaCXX/return-void.cpp
new file mode 100644
index 0000000000000..b3aa203133dc3
--- /dev/null
+++ b/clang/test/SemaCXX/return-void.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 %s -std=c++11 -fsyntax-only -verify
+
+void f1() { return {1,2}; } // expected-error {{void function 'f1' must not return a value}}
+
+template  void f2() { return {1,2}; } // expected-error {{void function 'f2' must not return a value}}
+
+template <> void f2() { return {1,2}; } // expected-error {{void function 'f2' must not return a value}}
+
+void test_f2() {
+  f2();
+  f2();
+}
+
+struct S {
+  void f3() { return {1,2}; } // expected-error {{void function 'f3' must not return a value}}
+  S() { return {1,2}; } // expected-error {{constructor 'S' must not return a value}}
+  ~S() { return {1,2}; } // expected-error {{destructor '~S' must not return a value}}
+};
+
+template  struct ST {
+  void f4() { return {1,2}; } // expected-error {{void function 'f4' must not return a value}}
+  ST() { return {1,2}; } // expected-error {{constructor 'ST' must not return a value}}
+  ~ST() { return {1,2}; } // expected-error {{destructor '~ST' must not return a value}}
+};
+
+ST st;
diff --git a/clang/test/SemaCXX/return.cpp b/clang/test/SemaCXX/return.cpp
index db289240d1ce6..1550d009b0617 100644
--- a/clang/test/SemaCXX/return.cpp
+++ b/clang/test/SemaCXX/return.cpp
@@ -108,9 +108,19 @@ namespace return_has_expr {
 namespace ctor_returns_void {
   void f() {}
   struct S { 
-    S() { return f(); }; // expected-error {{constructor 'S' must not return void expression}}
+    S() { return f(); } // expected-error {{constructor 'S' must not return void expression}}
     ~S() { return f(); } // expected-error {{destructor '~S' must not return void expression}}
   };
+
+  template  struct ST {
+    ST() { return f(); } // expected-error {{constructor 'ST' must not return void expression}}
+                         // expected-error@-1 {{constructor 'ST' must not return void expression}}
+    ~ST() { return f(); } // expected-error {{destructor '~ST' must not return void expression}}
+                          // expected-error@-1 {{destructor '~ST' must not return void expression}}
+  };
+
+  ST st; // expected-note {{in instantiation of member function 'ctor_returns_void::ST::ST'}}
+              // expected-note@-1 {{in instantiation of member function 'ctor_returns_void::ST::~ST'}}
 }
 
 void cxx_unresolved_expr() {
diff --git a/clang/test/SemaCXX/typo-correction.cpp b/clang/test/SemaCXX/typo-correction.cpp
index 92a145074e728..e0325b3ba09bf 100644
--- a/clang/test/SemaCXX/typo-correction.cpp
+++ b/clang/test/SemaCXX/typo-correction.cpp
@@ -611,6 +611,41 @@ int bar() {
 }
 }
 
+namespace testIncludeTypeInTemplateArgument {
+template 
+void foo(T t = {}, U = {}); // expected-note {{candidate template ignored}}
+
+class AddObservation {}; // expected-note {{declared here}}
+int bar1() {
+  // should resolve to a class.
+  foo(); // expected-error {{unknown type name 'AddObservationFn'; did you mean 'AddObservation'?}}
+
+  // should not resolve to a class.
+  foo(AddObservationFn, 1);    // expected-error-re {{use of undeclared identifier 'AddObservationFn'{{$}}}}
+  int a = AddObservationFn, b; // expected-error-re {{use of undeclared identifier 'AddObservationFn'{{$}}}}
+
+  int AddObservation; // expected-note 3{{declared here}}
+  // should resolve to a local variable.
+  foo(AddObservationFn, 1);    // expected-error {{use of undeclared identifier 'AddObservationFn'; did you mean}}
+  int c = AddObservationFn, d; // expected-error {{use of undeclared identifier 'AddObservationFn'; did you mean}}
+
+  // FIXME: would be nice to not resolve to a variable.
+  foo(); // expected-error {{use of undeclared identifier 'AddObservationFn'; did you mean}} \
+                                   expected-error {{no matching function for call}}
+}
+} // namespace testIncludeTypeInTemplateArgument
+
+namespace testNoCrashOnNullNNSTypoCorrection {
+int AddObservation();
+template 
+class UsingImpl {};
+class AddObservation { // expected-note {{declared here}}
+  using Using =
+      // should resolve to a class.
+      UsingImpl; // expected-error {{unknown type name 'AddObservationFn'; did you mean}}
+};
+} // namespace testNoCrashOnNullNNSTypoCorrection
+
 namespace testNonStaticMemberHandling {
 struct Foo {
   bool usesMetadata;  // expected-note {{'usesMetadata' declared here}}
diff --git a/clang/test/SemaCXX/warn-pure-virtual-call-from-ctor-dtor.cpp b/clang/test/SemaCXX/warn-pure-virtual-call-from-ctor-dtor.cpp
index 3312b5635f062..789935e3470ac 100644
--- a/clang/test/SemaCXX/warn-pure-virtual-call-from-ctor-dtor.cpp
+++ b/clang/test/SemaCXX/warn-pure-virtual-call-from-ctor-dtor.cpp
@@ -20,3 +20,35 @@ struct C {
         C::f();
     }
 };
+
+template  struct TA {
+  TA() { f(); } // expected-warning {{call to pure virtual member function 'f' has undefined behavior; overrides of 'f' in subclasses are not available in the constructor of 'TA'}}
+  ~TA() { f(); } // expected-warning {{call to pure virtual member function 'f' has undefined behavior; overrides of 'f' in subclasses are not available in the destructor of 'TA'}}
+
+  virtual void f() = 0; // expected-note 2{{'f' declared here}}
+};
+
+template <> struct TA {
+  TA() { f(); }
+  ~TA() { f(); }
+  void f();
+};
+
+template <> struct TA {
+  TA() { f(); }  // expected-warning {{call to pure virtual member function 'f' has undefined behavior; overrides of 'f' in subclasses are not available in the constructor of 'TA'}}
+  ~TA() { f(); } // expected-warning {{call to pure virtual member function 'f' has undefined behavior; overrides of 'f' in subclasses are not available in the destructor of 'TA'}}
+  virtual void f() = 0; // expected-note 2{{'f' declared here}}
+};
+
+struct TB : TA { // expected-note {{in instantiation of member function 'TA::TA' requested here}}
+  void f() override;    // expected-note@-1 {{in instantiation of member function 'TA::~TA' requested here}}
+};
+TB tb;
+
+struct TC : TA {}; // ok
+TC tc; // ok
+
+struct TD : TA {
+  void f() override;
+};
+TD td;
diff --git a/clang/test/SemaCXX/warn-pure-virtual-kext.cpp b/clang/test/SemaCXX/warn-pure-virtual-kext.cpp
index e681a02cc9166..8431e202ad714 100644
--- a/clang/test/SemaCXX/warn-pure-virtual-kext.cpp
+++ b/clang/test/SemaCXX/warn-pure-virtual-kext.cpp
@@ -6,3 +6,15 @@ struct A {
         A::f(); // expected-warning {{call to pure virtual member function 'f' has undefined behavior; overrides of 'f' in subclasses are not available in the constructor of 'A'}} // expected-note {{qualified call to 'A'::'f' is treated as a virtual call to 'f' due to -fapple-kext}}
     }
 };
+
+template  struct TA {
+  virtual void f() = 0; // expected-note {{'f' declared here}}
+
+  TA() { TA::f(); } // expected-warning {{call to pure virtual member function 'f' has undefined behavior; overrides of 'f' in subclasses are not available in the constructor of 'TA'}} // expected-note {{qualified call to 'TA'::'f' is treated as a virtual call to 'f' due to -fapple-kext}}
+};
+
+struct B : TA { // expected-note {{in instantiation of member function 'TA::TA' requested here}}
+  void f() override;
+};
+
+B b;
diff --git a/clang/test/SemaCXX/warn-suggest-destructor-override b/clang/test/SemaCXX/warn-suggest-destructor-override
new file mode 100644
index 0000000000000..1cfff748678f2
--- /dev/null
+++ b/clang/test/SemaCXX/warn-suggest-destructor-override
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 %s -verify -Wsuggest-destructor-override
+
+struct A {
+  ~A();
+  virtual void run();
+};
+
+struct B : public A {
+  ~B();
+};
+
+struct C {
+  virtual void run();
+  virtual ~C();  // expected-note 2{{overridden virtual function is here}}
+};
+
+struct D : public C {
+  void run();
+  ~D();
+  // expected-warning@-1 {{'~D' overrides a destructor but is not marked 'override'}}
+};
+
+struct E : public C {
+  void run();
+  virtual ~E();
+  // expected-warning@-1 {{'~E' overrides a destructor but is not marked 'override'}}
+};
diff --git a/clang/test/SemaCXX/warn-suggest-override b/clang/test/SemaCXX/warn-suggest-override
new file mode 100644
index 0000000000000..e06c939ff001f
--- /dev/null
+++ b/clang/test/SemaCXX/warn-suggest-override
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 %s -verify -Wsuggest-override
+
+struct A {
+  ~A();
+  void run();
+};
+
+struct B : public A {
+  ~B();
+  void run();
+};
+
+struct C {
+  virtual void run(); // expected-note 2{{overridden virtual function is here}}
+  virtual ~C();
+};
+
+struct D : public C {
+  void run();
+  // expected-warning@-1 {{'run()' overrides a member function but is not marked 'override'}}
+  ~D();
+};
+
+struct E : public C {
+  virtual void run();
+  // expected-warning@-1 {{'run()' overrides a member function but is not marked 'override'}}
+  virtual ~E();
+};
+
+struct F : public C {
+  void run() override;
+  ~F() override;
+};
+
+struct G : public C {
+  void run() final;
+  ~G() final;
+};
diff --git a/clang/test/SemaObjC/method-return-void.m b/clang/test/SemaObjC/method-return-void.m
new file mode 100644
index 0000000000000..850c81bad1fca
--- /dev/null
+++ b/clang/test/SemaObjC/method-return-void.m
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -Wmethod-signatures -fsyntax-only -verify -Wno-objc-root-class %s
+
+@interface Test
+- (int)foo;
+@end
+
+@implementation Test
+- (int)foo { return; } // expected-error {{non-void method 'foo' should return a value}}
+@end
diff --git a/clang/test/SemaOpenCLCXX/address-space-deduction.cl b/clang/test/SemaOpenCLCXX/address-space-deduction.cl
index 6a81a8b2d7c76..ddfdb6da4347c 100644
--- a/clang/test/SemaOpenCLCXX/address-space-deduction.cl
+++ b/clang/test/SemaOpenCLCXX/address-space-deduction.cl
@@ -5,6 +5,11 @@
 //CHECK: |-VarDecl {{.*}} foo 'const __global int'
 constexpr int foo = 0;
 
+//CHECK: |-VarDecl {{.*}} foo1 'T' cinit
+//CHECK: `-VarTemplateSpecializationDecl {{.*}} used foo1 '__global long':'__global long' cinit
+template 
+T foo1 = 0;
+
 class c {
 public:
   //CHECK: `-VarDecl {{.*}} foo2 'const __global int'
@@ -30,7 +35,7 @@ struct c2 {
 
 template 
 struct x1 {
-//CHECK: -CXXMethodDecl {{.*}} operator= 'x1 &(const x1 &__private){{( __attribute__.*)?}} __generic'
+//CHECK: -CXXMethodDecl {{.*}} operator= 'x1 &(const x1 &){{( __attribute__.*)?}} __generic'
 //CHECK: -CXXMethodDecl {{.*}} operator= '__generic x1 &(const __generic x1 &__private){{( __attribute__.*)?}} __generic'
   x1& operator=(const x1& xx) {
     y = xx.y;
@@ -41,7 +46,7 @@ struct x1 {
 
 template 
 struct x2 {
-//CHECK: -CXXMethodDecl {{.*}} foo 'void (x1 *__private){{( __attribute__.*)?}} __generic'
+//CHECK: -CXXMethodDecl {{.*}} foo 'void (x1 *){{( __attribute__.*)?}} __generic'
 //CHECK: -CXXMethodDecl {{.*}} foo 'void (__generic x1 *__private){{( __attribute__.*)?}} __generic'
   void foo(x1* xx) {
     m[0] = *xx;
@@ -57,10 +62,10 @@ void bar(__global x1 *xx, __global x2 *bar) {
 template 
 class x3 : public T {
 public:
-  //CHECK: -CXXConstructorDecl {{.*}} x3 'void (const x3 &__private){{( __attribute__.*)?}} __generic'
+  //CHECK: -CXXConstructorDecl {{.*}} x3 'void (const x3 &){{( __attribute__.*)?}} __generic'
   x3(const x3 &t);
 };
-//CHECK: -CXXConstructorDecl {{.*}} x3 'void (const x3 &__private){{( __attribute__.*)?}} __generic'
+//CHECK: -CXXConstructorDecl {{.*}} x3 'void (const x3 &){{( __attribute__.*)?}} __generic'
 template 
 x3::x3(const x3 &t) {}
 
@@ -68,7 +73,8 @@ template 
 T xxx(T *in1, T in2) {
   // This pointer can't be deduced to generic because addr space
   // will be taken from the template argument.
-  //CHECK: `-VarDecl {{.*}} '__private T *__private' cinit
+  //CHECK: `-VarDecl {{.*}} 'T *' cinit
+  //CHECK: `-VarDecl {{.*}} i '__private int *__private' cinit
   T *i = in1;
   T ii;
   __private T *ptr = ⅈ
@@ -111,4 +117,5 @@ __kernel void k() {
   t3(&x);
   t4(&p);
   t5(&p);
+  long f1 = foo1;
 }
diff --git a/clang/test/SemaTemplate/deduction.cpp b/clang/test/SemaTemplate/deduction.cpp
index 5218543ab8a41..a068bcaea0483 100644
--- a/clang/test/SemaTemplate/deduction.cpp
+++ b/clang/test/SemaTemplate/deduction.cpp
@@ -581,3 +581,19 @@ namespace PR44890 {
     return w.get<0>();
   }
 }
+
+namespace merge_size_only_deductions {
+#if __cplusplus >= 201703L
+  // Based on a testcase by Hubert Tong.
+  template struct X {};
+  template struct Y {};
+  template struct id { using Type = T; };
+
+  template
+    int f(X, Y, X);
+
+  using size_t = __SIZE_TYPE__;
+  int a = f(X(), Y<(size_t)1, (size_t)2>(), X, id>());
+  int b = f(X(), Y<1, 2>(), X, id>());
+#endif
+}
diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py
index ade32988b9a82..dacda6894a045 100644
--- a/clang/test/lit.cfg.py
+++ b/clang/test/lit.cfg.py
@@ -46,6 +46,8 @@
 config.substitutions.append(
     ('%src_include_dir', config.clang_src_dir + '/include'))
 
+config.substitutions.append(
+    ('%target_triple', config.target_triple))
 
 # Propagate path to symbolizer for ASan/MSan.
 llvm_config.with_system_environment(
diff --git a/clang/tools/clang-fuzzer/handle-llvm/CMakeLists.txt b/clang/tools/clang-fuzzer/handle-llvm/CMakeLists.txt
index 47f9fdf68f409..9ceb1d3318283 100644
--- a/clang/tools/clang-fuzzer/handle-llvm/CMakeLists.txt
+++ b/clang/tools/clang-fuzzer/handle-llvm/CMakeLists.txt
@@ -16,15 +16,9 @@ set(LLVM_LINK_COMPONENTS
   native
 )
 
-# Depend on LLVM IR intrinsic generation.
-set(handle_llvm_deps intrinsics_gen)
-if (CLANG_BUILT_STANDALONE)
-  set(handle_llvm_deps)
-endif()
-
 add_clang_library(clangHandleLLVM
   handle_llvm.cpp
 
   DEPENDS
-  ${handle_llvm_deps}
+  intrinsics_gen
   )
diff --git a/clang/tools/clang-import-test/CMakeLists.txt b/clang/tools/clang-import-test/CMakeLists.txt
index 4ccc2d752aac4..e459de8f635f5 100644
--- a/clang/tools/clang-import-test/CMakeLists.txt
+++ b/clang/tools/clang-import-test/CMakeLists.txt
@@ -3,14 +3,10 @@ set(LLVM_LINK_COMPONENTS
   Support
   )
 
-if(NOT CLANG_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 add_clang_executable(clang-import-test
   clang-import-test.cpp
   DEPENDS
-  ${tablegen_deps}
+  intrinsics_gen
   )
 
 set(CLANG_IMPORT_TEST_LIB_DEPS
diff --git a/clang/tools/clang-offload-bundler/CMakeLists.txt b/clang/tools/clang-offload-bundler/CMakeLists.txt
index d1bd248e3ef45..c27d809701f91 100644
--- a/clang/tools/clang-offload-bundler/CMakeLists.txt
+++ b/clang/tools/clang-offload-bundler/CMakeLists.txt
@@ -1,14 +1,10 @@
 set(LLVM_LINK_COMPONENTS Object Support)
 
-if(NOT CLANG_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 add_clang_tool(clang-offload-bundler
   ClangOffloadBundler.cpp
   
   DEPENDS
-  ${tablegen_deps}
+  intrinsics_gen
   )
 
 set(CLANG_OFFLOAD_BUNDLER_LIB_DEPS
diff --git a/clang/tools/clang-offload-wrapper/CMakeLists.txt b/clang/tools/clang-offload-wrapper/CMakeLists.txt
index 6f8940f88eabd..8bcb46267a37c 100644
--- a/clang/tools/clang-offload-wrapper/CMakeLists.txt
+++ b/clang/tools/clang-offload-wrapper/CMakeLists.txt
@@ -1,14 +1,10 @@
 set(LLVM_LINK_COMPONENTS BitWriter Core Support TransformUtils)
 
-if(NOT CLANG_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 add_clang_tool(clang-offload-wrapper
   ClangOffloadWrapper.cpp
 
   DEPENDS
-  ${tablegen_deps}
+  intrinsics_gen
   )
 
 set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS
diff --git a/clang/tools/driver/CMakeLists.txt b/clang/tools/driver/CMakeLists.txt
index 8d4a6a597a208..94d84423217f5 100644
--- a/clang/tools/driver/CMakeLists.txt
+++ b/clang/tools/driver/CMakeLists.txt
@@ -25,10 +25,6 @@ if(CLANG_PLUGIN_SUPPORT)
   set(support_plugins SUPPORT_PLUGINS)
 endif()
 
-if(NOT CLANG_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 add_clang_tool(clang
   driver.cpp
   cc1_main.cpp
@@ -36,7 +32,7 @@ add_clang_tool(clang
   cc1gen_reproducer_main.cpp
 
   DEPENDS
-  ${tablegen_deps}
+  intrinsics_gen
   ${support_plugins}
   )
 
diff --git a/clang/tools/scan-build/bin/scan-build b/clang/tools/scan-build/bin/scan-build
index 11334a0b96269..aed8c417b6ccd 100755
--- a/clang/tools/scan-build/bin/scan-build
+++ b/clang/tools/scan-build/bin/scan-build
@@ -1973,11 +1973,13 @@ my $CCC_ANALYZER_ANALYSIS = join ' ', @AnalysesToRun;
 my $CCC_ANALYZER_PLUGINS = join ' ', map { "-load ".$_ } @{$Options{PluginsToLoad}};
 my $CCC_ANALYZER_CONFIG = join ' ', map { "-analyzer-config ".$_ } @{$Options{ConfigOptions}};
 
-foreach (sort { $Options{SilenceCheckers}{$a} <=> $Options{SilenceCheckers}{$b} }
-         keys %{$Options{SilenceCheckers}}) {
-  # Add checkers in order they were silenced.
+if (%{$Options{SilenceCheckers}}) {
   $CCC_ANALYZER_CONFIG =
-      $CCC_ANALYZER_CONFIG." -analyzer-config silence-checkers=".$_;
+      $CCC_ANALYZER_CONFIG." -analyzer-config silence-checkers="
+                          .join(';', sort {
+                                            $Options{SilenceCheckers}{$a} <=>
+                                            $Options{SilenceCheckers}{$b}
+                                          } keys %{$Options{SilenceCheckers}});
 }
 
 my %EnvVars = (
diff --git a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
index aeb4fd098d224..36e92c632c03a 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
@@ -18,7 +18,7 @@
 namespace clang {
 namespace ast_matchers {
 
-TEST(IsExpandedFromMacro, ShouldMatchInFile) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesInFile) {
   StringRef input = R"cc(
 #define MY_MACRO(a) (4 + (a))
     void Test() { MY_MACRO(4); }
@@ -26,7 +26,7 @@ TEST(IsExpandedFromMacro, ShouldMatchInFile) {
   EXPECT_TRUE(matches(input, binaryOperator(isExpandedFromMacro("MY_MACRO"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldMatchNested) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesNested) {
   StringRef input = R"cc(
 #define MY_MACRO(a) (4 + (a))
 #define WRAPPER(a) MY_MACRO(a)
@@ -35,7 +35,7 @@ TEST(IsExpandedFromMacro, ShouldMatchNested) {
   EXPECT_TRUE(matches(input, binaryOperator(isExpandedFromMacro("MY_MACRO"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldMatchIntermediate) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesIntermediate) {
   StringRef input = R"cc(
 #define IMPL(a) (4 + (a))
 #define MY_MACRO(a) IMPL(a)
@@ -45,7 +45,7 @@ TEST(IsExpandedFromMacro, ShouldMatchIntermediate) {
   EXPECT_TRUE(matches(input, binaryOperator(isExpandedFromMacro("MY_MACRO"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldMatchTransitive) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesTransitive) {
   StringRef input = R"cc(
 #define MY_MACRO(a) (4 + (a))
 #define WRAPPER(a) MY_MACRO(a)
@@ -54,7 +54,7 @@ TEST(IsExpandedFromMacro, ShouldMatchTransitive) {
   EXPECT_TRUE(matches(input, binaryOperator(isExpandedFromMacro("WRAPPER"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldMatchArgument) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesArgument) {
   StringRef input = R"cc(
 #define MY_MACRO(a) (4 + (a))
     void Test() {
@@ -65,9 +65,9 @@ TEST(IsExpandedFromMacro, ShouldMatchArgument) {
   EXPECT_TRUE(matches(input, declRefExpr(isExpandedFromMacro("MY_MACRO"))));
 }
 
-// Like IsExpandedFromMacroShouldMatchArgumentMacro, but the argument is itself
-// a macro.
-TEST(IsExpandedFromMacro, ShouldMatchArgumentMacroExpansion) {
+// Like IsExpandedFromMacro_MatchesArgument, but the argument is itself a
+// macro.
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesArgumentMacroExpansion) {
   StringRef input = R"cc(
 #define MY_MACRO(a) (4 + (a))
 #define IDENTITY(a) (a)
@@ -78,7 +78,7 @@ TEST(IsExpandedFromMacro, ShouldMatchArgumentMacroExpansion) {
   EXPECT_TRUE(matches(input, binaryOperator(isExpandedFromMacro("IDENTITY"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldMatchWhenInArgument) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesWhenInArgument) {
   StringRef input = R"cc(
 #define MY_MACRO(a) (4 + (a))
 #define IDENTITY(a) (a)
@@ -89,7 +89,7 @@ TEST(IsExpandedFromMacro, ShouldMatchWhenInArgument) {
   EXPECT_TRUE(matches(input, binaryOperator(isExpandedFromMacro("MY_MACRO"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldMatchObjectMacro) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesObjectMacro) {
   StringRef input = R"cc(
 #define PLUS (2 + 2)
     void Test() {
@@ -99,16 +99,16 @@ TEST(IsExpandedFromMacro, ShouldMatchObjectMacro) {
   EXPECT_TRUE(matches(input, binaryOperator(isExpandedFromMacro("PLUS"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldMatchFromCommandLine) {
+TEST(IsExpandedFromMacro, MatchesFromCommandLine) {
   StringRef input = R"cc(
     void Test() { FOUR_PLUS_FOUR; }
   )cc";
-  EXPECT_TRUE(matchesConditionally(input,
-                                   binaryOperator(isExpandedFromMacro("FOUR_PLUS_FOUR")),
-                                   true, {"-std=c++11", "-DFOUR_PLUS_FOUR=4+4"}));
+  EXPECT_TRUE(matchesConditionally(
+      input, binaryOperator(isExpandedFromMacro("FOUR_PLUS_FOUR")), true,
+      {"-std=c++11", "-DFOUR_PLUS_FOUR=4+4"}));
 }
 
-TEST(IsExpandedFromMacro, ShouldNotMatchBeginOnly) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_NotMatchesBeginOnly) {
   StringRef input = R"cc(
 #define ONE_PLUS 1+
   void Test() { ONE_PLUS 4; }
@@ -117,7 +117,7 @@ TEST(IsExpandedFromMacro, ShouldNotMatchBeginOnly) {
       notMatches(input, binaryOperator(isExpandedFromMacro("ONE_PLUS"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldNotMatchEndOnly) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_NotMatchesEndOnly) {
   StringRef input = R"cc(
 #define PLUS_ONE +1
   void Test() { 4 PLUS_ONE; }
@@ -126,7 +126,7 @@ TEST(IsExpandedFromMacro, ShouldNotMatchEndOnly) {
       notMatches(input, binaryOperator(isExpandedFromMacro("PLUS_ONE"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldNotMatchDifferentMacro) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_NotMatchesDifferentMacro) {
   StringRef input = R"cc(
 #define MY_MACRO(a) (4 + (a))
     void Test() { MY_MACRO(4); }
@@ -134,7 +134,7 @@ TEST(IsExpandedFromMacro, ShouldNotMatchDifferentMacro) {
   EXPECT_TRUE(notMatches(input, binaryOperator(isExpandedFromMacro("OTHER"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldNotMatchDifferentInstances) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_NotMatchesDifferentInstances) {
   StringRef input = R"cc(
 #define FOUR 4
     void Test() { FOUR + FOUR; }
@@ -142,166 +142,160 @@ TEST(IsExpandedFromMacro, ShouldNotMatchDifferentInstances) {
   EXPECT_TRUE(notMatches(input, binaryOperator(isExpandedFromMacro("FOUR"))));
 }
 
-TEST(AllOf, AllOverloadsWork) {
-  const char Program[] =
-      "struct T { };"
-      "int f(int, T*, int, int);"
-      "void g(int x) { T t; f(x, &t, 3, 4); }";
-  EXPECT_TRUE(matches(Program,
-      callExpr(allOf(callee(functionDecl(hasName("f"))),
-                     hasArgument(0, declRefExpr(to(varDecl())))))));
-  EXPECT_TRUE(matches(Program,
-      callExpr(allOf(callee(functionDecl(hasName("f"))),
-                     hasArgument(0, declRefExpr(to(varDecl()))),
-                     hasArgument(1, hasType(pointsTo(
-                                        recordDecl(hasName("T")))))))));
-  EXPECT_TRUE(matches(Program,
-      callExpr(allOf(callee(functionDecl(hasName("f"))),
-                     hasArgument(0, declRefExpr(to(varDecl()))),
-                     hasArgument(1, hasType(pointsTo(
-                                        recordDecl(hasName("T"))))),
-                     hasArgument(2, integerLiteral(equals(3)))))));
-  EXPECT_TRUE(matches(Program,
-      callExpr(allOf(callee(functionDecl(hasName("f"))),
-                     hasArgument(0, declRefExpr(to(varDecl()))),
-                     hasArgument(1, hasType(pointsTo(
-                                        recordDecl(hasName("T"))))),
-                     hasArgument(2, integerLiteral(equals(3))),
-                     hasArgument(3, integerLiteral(equals(4)))))));
-}
-
-TEST(DeclarationMatcher, MatchHas) {
+TEST_P(ASTMatchersTest, AllOf) {
+  const char Program[] = "struct T { };"
+                         "int f(int, struct T*, int, int);"
+                         "void g(int x) { struct T t; f(x, &t, 3, 4); }";
+  EXPECT_TRUE(matches(
+      Program, callExpr(allOf(callee(functionDecl(hasName("f"))),
+                              hasArgument(0, declRefExpr(to(varDecl())))))));
+  EXPECT_TRUE(matches(
+      Program,
+      callExpr(
+          allOf(callee(functionDecl(hasName("f"))),
+                hasArgument(0, declRefExpr(to(varDecl()))),
+                hasArgument(1, hasType(pointsTo(recordDecl(hasName("T")))))))));
+  EXPECT_TRUE(matches(
+      Program, callExpr(allOf(
+                   callee(functionDecl(hasName("f"))),
+                   hasArgument(0, declRefExpr(to(varDecl()))),
+                   hasArgument(1, hasType(pointsTo(recordDecl(hasName("T"))))),
+                   hasArgument(2, integerLiteral(equals(3)))))));
+  EXPECT_TRUE(matches(
+      Program, callExpr(allOf(
+                   callee(functionDecl(hasName("f"))),
+                   hasArgument(0, declRefExpr(to(varDecl()))),
+                   hasArgument(1, hasType(pointsTo(recordDecl(hasName("T"))))),
+                   hasArgument(2, integerLiteral(equals(3))),
+                   hasArgument(3, integerLiteral(equals(4)))))));
+}
+
+TEST_P(ASTMatchersTest, Has) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `has()` that does not depend on C++.
+    return;
+  }
+
   DeclarationMatcher HasClassX = recordDecl(has(recordDecl(hasName("X"))));
   EXPECT_TRUE(matches("class Y { class X {}; };", HasClassX));
   EXPECT_TRUE(matches("class X {};", HasClassX));
 
   DeclarationMatcher YHasClassX =
-    recordDecl(hasName("Y"), has(recordDecl(hasName("X"))));
+      recordDecl(hasName("Y"), has(recordDecl(hasName("X"))));
   EXPECT_TRUE(matches("class Y { class X {}; };", YHasClassX));
   EXPECT_TRUE(notMatches("class X {};", YHasClassX));
-  EXPECT_TRUE(
-    notMatches("class Y { class Z { class X {}; }; };", YHasClassX));
+  EXPECT_TRUE(notMatches("class Y { class Z { class X {}; }; };", YHasClassX));
 }
 
-TEST(DeclarationMatcher, MatchHasRecursiveAllOf) {
-  DeclarationMatcher Recursive =
-    recordDecl(
-      has(recordDecl(
-        has(recordDecl(hasName("X"))),
-        has(recordDecl(hasName("Y"))),
-        hasName("Z"))),
-      has(recordDecl(
-        has(recordDecl(hasName("A"))),
-        has(recordDecl(hasName("B"))),
-        hasName("C"))),
-      hasName("F"));
-
-  EXPECT_TRUE(matches(
-    "class F {"
-      "  class Z {"
-      "    class X {};"
-      "    class Y {};"
-      "  };"
-      "  class C {"
-      "    class A {};"
-      "    class B {};"
-      "  };"
-      "};", Recursive));
-
-  EXPECT_TRUE(matches(
-    "class F {"
-      "  class Z {"
-      "    class A {};"
-      "    class X {};"
-      "    class Y {};"
-      "  };"
-      "  class C {"
-      "    class X {};"
-      "    class A {};"
-      "    class B {};"
-      "  };"
-      "};", Recursive));
+TEST_P(ASTMatchersTest, Has_RecursiveAllOf) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
 
-  EXPECT_TRUE(matches(
-    "class O1 {"
-      "  class O2 {"
-      "    class F {"
-      "      class Z {"
-      "        class A {};"
-      "        class X {};"
-      "        class Y {};"
-      "      };"
-      "      class C {"
-      "        class X {};"
-      "        class A {};"
-      "        class B {};"
-      "      };"
-      "    };"
-      "  };"
-      "};", Recursive));
-}
-
-TEST(DeclarationMatcher, MatchHasRecursiveAnyOf) {
   DeclarationMatcher Recursive =
-    recordDecl(
-      anyOf(
-        has(recordDecl(
-          anyOf(
-            has(recordDecl(
-              hasName("X"))),
-            has(recordDecl(
-              hasName("Y"))),
-            hasName("Z")))),
-        has(recordDecl(
-          anyOf(
-            hasName("C"),
-            has(recordDecl(
-              hasName("A"))),
-            has(recordDecl(
-              hasName("B")))))),
-        hasName("F")));
+      recordDecl(has(recordDecl(has(recordDecl(hasName("X"))),
+                                has(recordDecl(hasName("Y"))), hasName("Z"))),
+                 has(recordDecl(has(recordDecl(hasName("A"))),
+                                has(recordDecl(hasName("B"))), hasName("C"))),
+                 hasName("F"));
+
+  EXPECT_TRUE(matches("class F {"
+                      "  class Z {"
+                      "    class X {};"
+                      "    class Y {};"
+                      "  };"
+                      "  class C {"
+                      "    class A {};"
+                      "    class B {};"
+                      "  };"
+                      "};",
+                      Recursive));
+
+  EXPECT_TRUE(matches("class F {"
+                      "  class Z {"
+                      "    class A {};"
+                      "    class X {};"
+                      "    class Y {};"
+                      "  };"
+                      "  class C {"
+                      "    class X {};"
+                      "    class A {};"
+                      "    class B {};"
+                      "  };"
+                      "};",
+                      Recursive));
+
+  EXPECT_TRUE(matches("class O1 {"
+                      "  class O2 {"
+                      "    class F {"
+                      "      class Z {"
+                      "        class A {};"
+                      "        class X {};"
+                      "        class Y {};"
+                      "      };"
+                      "      class C {"
+                      "        class X {};"
+                      "        class A {};"
+                      "        class B {};"
+                      "      };"
+                      "    };"
+                      "  };"
+                      "};",
+                      Recursive));
+}
+
+TEST_P(ASTMatchersTest, Has_RecursiveAnyOf) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  DeclarationMatcher Recursive = recordDecl(
+      anyOf(has(recordDecl(anyOf(has(recordDecl(hasName("X"))),
+                                 has(recordDecl(hasName("Y"))), hasName("Z")))),
+            has(recordDecl(anyOf(hasName("C"), has(recordDecl(hasName("A"))),
+                                 has(recordDecl(hasName("B")))))),
+            hasName("F")));
 
   EXPECT_TRUE(matches("class F {};", Recursive));
   EXPECT_TRUE(matches("class Z {};", Recursive));
   EXPECT_TRUE(matches("class C {};", Recursive));
   EXPECT_TRUE(matches("class M { class N { class X {}; }; };", Recursive));
   EXPECT_TRUE(matches("class M { class N { class B {}; }; };", Recursive));
-  EXPECT_TRUE(
-    matches("class O1 { class O2 {"
-              "  class M { class N { class B {}; }; }; "
-              "}; };", Recursive));
+  EXPECT_TRUE(matches("class O1 { class O2 {"
+                      "  class M { class N { class B {}; }; }; "
+                      "}; };",
+                      Recursive));
 }
 
-TEST(DeclarationMatcher, MatchNot) {
+TEST_P(ASTMatchersTest, Unless) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `unless()` that does not depend on C++.
+    return;
+  }
+
   DeclarationMatcher NotClassX =
-    cxxRecordDecl(
-      isDerivedFrom("Y"),
-      unless(hasName("X")));
+      cxxRecordDecl(isDerivedFrom("Y"), unless(hasName("X")));
   EXPECT_TRUE(notMatches("", NotClassX));
   EXPECT_TRUE(notMatches("class Y {};", NotClassX));
   EXPECT_TRUE(matches("class Y {}; class Z : public Y {};", NotClassX));
   EXPECT_TRUE(notMatches("class Y {}; class X : public Y {};", NotClassX));
   EXPECT_TRUE(
-    notMatches("class Y {}; class Z {}; class X : public Y {};",
-               NotClassX));
+      notMatches("class Y {}; class Z {}; class X : public Y {};", NotClassX));
 
   DeclarationMatcher ClassXHasNotClassY =
-    recordDecl(
-      hasName("X"),
-      has(recordDecl(hasName("Z"))),
-      unless(
-        has(recordDecl(hasName("Y")))));
+      recordDecl(hasName("X"), has(recordDecl(hasName("Z"))),
+                 unless(has(recordDecl(hasName("Y")))));
   EXPECT_TRUE(matches("class X { class Z {}; };", ClassXHasNotClassY));
-  EXPECT_TRUE(notMatches("class X { class Y {}; class Z {}; };",
-                         ClassXHasNotClassY));
+  EXPECT_TRUE(
+      notMatches("class X { class Y {}; class Z {}; };", ClassXHasNotClassY));
 
   DeclarationMatcher NamedNotRecord =
-    namedDecl(hasName("Foo"), unless(recordDecl()));
+      namedDecl(hasName("Foo"), unless(recordDecl()));
   EXPECT_TRUE(matches("void Foo(){}", NamedNotRecord));
   EXPECT_TRUE(notMatches("struct Foo {};", NamedNotRecord));
 }
 
-TEST(CastExpression, HasCastKind) {
+TEST_P(ASTMatchersTest, HasCastKind) {
   EXPECT_TRUE(
       matches("char *p = 0;",
               traverse(ast_type_traits::TK_AsIs,
@@ -316,73 +310,72 @@ TEST(CastExpression, HasCastKind) {
                varDecl(has(implicitCastExpr(hasCastKind(CK_NullToPointer)))))));
 }
 
-TEST(DeclarationMatcher, HasDescendant) {
+TEST_P(ASTMatchersTest, HasDescendant) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `hasDescendant()` that does not depend on C++.
+    return;
+  }
+
   DeclarationMatcher ZDescendantClassX =
-    recordDecl(
-      hasDescendant(recordDecl(hasName("X"))),
-      hasName("Z"));
+      recordDecl(hasDescendant(recordDecl(hasName("X"))), hasName("Z"));
   EXPECT_TRUE(matches("class Z { class X {}; };", ZDescendantClassX));
   EXPECT_TRUE(
-    matches("class Z { class Y { class X {}; }; };", ZDescendantClassX));
-  EXPECT_TRUE(
-    matches("class Z { class A { class Y { class X {}; }; }; };",
-            ZDescendantClassX));
+      matches("class Z { class Y { class X {}; }; };", ZDescendantClassX));
+  EXPECT_TRUE(matches("class Z { class A { class Y { class X {}; }; }; };",
+                      ZDescendantClassX));
   EXPECT_TRUE(
-    matches("class Z { class A { class B { class Y { class X {}; }; }; }; };",
-            ZDescendantClassX));
+      matches("class Z { class A { class B { class Y { class X {}; }; }; }; };",
+              ZDescendantClassX));
   EXPECT_TRUE(notMatches("class Z {};", ZDescendantClassX));
 
-  DeclarationMatcher ZDescendantClassXHasClassY =
-    recordDecl(
-      hasDescendant(recordDecl(has(recordDecl(hasName("Y"))),
-                               hasName("X"))),
+  DeclarationMatcher ZDescendantClassXHasClassY = recordDecl(
+      hasDescendant(recordDecl(has(recordDecl(hasName("Y"))), hasName("X"))),
       hasName("Z"));
   EXPECT_TRUE(matches("class Z { class X { class Y {}; }; };",
                       ZDescendantClassXHasClassY));
   EXPECT_TRUE(
-    matches("class Z { class A { class B { class X { class Y {}; }; }; }; };",
-            ZDescendantClassXHasClassY));
-  EXPECT_TRUE(notMatches(
-    "class Z {"
-      "  class A {"
-      "    class B {"
-      "      class X {"
-      "        class C {"
-      "          class Y {};"
-      "        };"
-      "      };"
-      "    }; "
-      "  };"
-      "};", ZDescendantClassXHasClassY));
+      matches("class Z { class A { class B { class X { class Y {}; }; }; }; };",
+              ZDescendantClassXHasClassY));
+  EXPECT_TRUE(notMatches("class Z {"
+                         "  class A {"
+                         "    class B {"
+                         "      class X {"
+                         "        class C {"
+                         "          class Y {};"
+                         "        };"
+                         "      };"
+                         "    }; "
+                         "  };"
+                         "};",
+                         ZDescendantClassXHasClassY));
 
   DeclarationMatcher ZDescendantClassXDescendantClassY =
-    recordDecl(
-      hasDescendant(recordDecl(hasDescendant(recordDecl(hasName("Y"))),
-                               hasName("X"))),
-      hasName("Z"));
-  EXPECT_TRUE(
-    matches("class Z { class A { class X { class B { class Y {}; }; }; }; };",
-            ZDescendantClassXDescendantClassY));
-  EXPECT_TRUE(matches(
-    "class Z {"
-      "  class A {"
-      "    class X {"
-      "      class B {"
-      "        class Y {};"
-      "      };"
-      "      class Y {};"
-      "    };"
-      "  };"
-      "};", ZDescendantClassXDescendantClassY));
-}
-
-TEST(DeclarationMatcher, HasDescendantMemoization) {
+      recordDecl(hasDescendant(recordDecl(
+                     hasDescendant(recordDecl(hasName("Y"))), hasName("X"))),
+                 hasName("Z"));
+  EXPECT_TRUE(
+      matches("class Z { class A { class X { class B { class Y {}; }; }; }; };",
+              ZDescendantClassXDescendantClassY));
+  EXPECT_TRUE(matches("class Z {"
+                      "  class A {"
+                      "    class X {"
+                      "      class B {"
+                      "        class Y {};"
+                      "      };"
+                      "      class Y {};"
+                      "    };"
+                      "  };"
+                      "};",
+                      ZDescendantClassXDescendantClassY));
+}
+
+TEST_P(ASTMatchersTest, HasDescendant_Memoization) {
   DeclarationMatcher CannotMemoize =
-    decl(hasDescendant(typeLoc().bind("x")), has(decl()));
+      decl(hasDescendant(typeLoc().bind("x")), has(decl()));
   EXPECT_TRUE(matches("void f() { int i; }", CannotMemoize));
 }
 
-TEST(DeclarationMatcher, HasDescendantMemoizationUsesRestrictKind) {
+TEST_P(ASTMatchersTest, HasDescendant_MemoizationUsesRestrictKind) {
   auto Name = hasName("i");
   auto VD = internal::Matcher(Name).dynCastTo();
   auto RD = internal::Matcher(Name).dynCastTo();
@@ -396,44 +389,50 @@ TEST(DeclarationMatcher, HasDescendantMemoizationUsesRestrictKind) {
                       decl(anyOf(hasDescendant(RD), hasDescendant(VD)))));
 }
 
-TEST(DeclarationMatcher, HasAncestorMemoization) {
+TEST_P(ASTMatchersTest, HasAncestor_Memoization) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   // This triggers an hasAncestor with a TemplateArgument in the bound nodes.
   // That node can't be memoized so we have to check for it before trying to put
   // it on the cache.
   DeclarationMatcher CannotMemoize = classTemplateSpecializationDecl(
-    hasAnyTemplateArgument(templateArgument().bind("targ")),
-    forEach(fieldDecl(hasAncestor(forStmt()))));
+      hasAnyTemplateArgument(templateArgument().bind("targ")),
+      forEach(fieldDecl(hasAncestor(forStmt()))));
 
   EXPECT_TRUE(notMatches("template  struct S;"
-                           "template <> struct S{ int i; int j; };",
+                         "template <> struct S{ int i; int j; };",
                          CannotMemoize));
 }
 
-TEST(DeclarationMatcher, HasAttr) {
+TEST_P(ASTMatchersTest, HasAttr) {
   EXPECT_TRUE(matches("struct __attribute__((warn_unused)) X {};",
                       decl(hasAttr(clang::attr::WarnUnused))));
-  EXPECT_FALSE(matches("struct X {};",
-                       decl(hasAttr(clang::attr::WarnUnused))));
+  EXPECT_FALSE(matches("struct X {};", decl(hasAttr(clang::attr::WarnUnused))));
 }
 
+TEST_P(ASTMatchersTest, AnyOf) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `anyOf()` that does not depend on C++.
+    return;
+  }
 
-TEST(DeclarationMatcher, MatchAnyOf) {
   DeclarationMatcher YOrZDerivedFromX = cxxRecordDecl(
-    anyOf(hasName("Y"), allOf(isDerivedFrom("X"), hasName("Z"))));
+      anyOf(hasName("Y"), allOf(isDerivedFrom("X"), hasName("Z"))));
   EXPECT_TRUE(matches("class X {}; class Z : public X {};", YOrZDerivedFromX));
   EXPECT_TRUE(matches("class Y {};", YOrZDerivedFromX));
   EXPECT_TRUE(
-    notMatches("class X {}; class W : public X {};", YOrZDerivedFromX));
+      notMatches("class X {}; class W : public X {};", YOrZDerivedFromX));
   EXPECT_TRUE(notMatches("class Z {};", YOrZDerivedFromX));
 
   DeclarationMatcher XOrYOrZOrU =
-    recordDecl(anyOf(hasName("X"), hasName("Y"), hasName("Z"), hasName("U")));
+      recordDecl(anyOf(hasName("X"), hasName("Y"), hasName("Z"), hasName("U")));
   EXPECT_TRUE(matches("class X {};", XOrYOrZOrU));
   EXPECT_TRUE(notMatches("class V {};", XOrYOrZOrU));
 
-  DeclarationMatcher XOrYOrZOrUOrV =
-    recordDecl(anyOf(hasName("X"), hasName("Y"), hasName("Z"), hasName("U"),
-                     hasName("V")));
+  DeclarationMatcher XOrYOrZOrUOrV = recordDecl(anyOf(
+      hasName("X"), hasName("Y"), hasName("Z"), hasName("U"), hasName("V")));
   EXPECT_TRUE(matches("class X {};", XOrYOrZOrUOrV));
   EXPECT_TRUE(matches("class Y {};", XOrYOrZOrUOrV));
   EXPECT_TRUE(matches("class Z {};", XOrYOrZOrUOrV));
@@ -447,11 +446,15 @@ TEST(DeclarationMatcher, MatchAnyOf) {
   EXPECT_TRUE(notMatches("int F() { return 1; }", MixedTypes));
 
   EXPECT_TRUE(
-    matches("void f() try { } catch (int) { } catch (...) { }",
-            cxxCatchStmt(anyOf(hasDescendant(varDecl()), isCatchAll()))));
+      matches("void f() try { } catch (int) { } catch (...) { }",
+              cxxCatchStmt(anyOf(hasDescendant(varDecl()), isCatchAll()))));
 }
 
-TEST(DeclarationMatcher, ClassIsDerived) {
+TEST_P(ASTMatchersTest, IsDerivedFrom) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   DeclarationMatcher IsDerivedFromX = cxxRecordDecl(isDerivedFrom("X"));
 
   EXPECT_TRUE(matches("class X {}; class Y : public X {};", IsDerivedFromX));
@@ -460,19 +463,17 @@ TEST(DeclarationMatcher, ClassIsDerived) {
   EXPECT_TRUE(notMatches("class Y;", IsDerivedFromX));
   EXPECT_TRUE(notMatches("", IsDerivedFromX));
   EXPECT_TRUE(matches("class X {}; template class Y : Y, X {};",
-    IsDerivedFromX));
+                      IsDerivedFromX));
   EXPECT_TRUE(matches("class X {}; template class Y : X, Y {};",
-    IsDerivedFromX));
+                      IsDerivedFromX));
 
-  DeclarationMatcher IsZDerivedFromX = cxxRecordDecl(hasName("Z"),
-    isDerivedFrom("X"));
-  EXPECT_TRUE(
-    matches(
-      "class X {};"
-      "template class Y : Y {};"
-      "template<> class Y<0> : X {};"
-      "class Z : Y<1> {};",
-      IsZDerivedFromX));
+  DeclarationMatcher IsZDerivedFromX =
+      cxxRecordDecl(hasName("Z"), isDerivedFrom("X"));
+  EXPECT_TRUE(matches("class X {};"
+                      "template class Y : Y {};"
+                      "template<> class Y<0> : X {};"
+                      "class Z : Y<1> {};",
+                      IsZDerivedFromX));
 
   DeclarationMatcher IsDirectlyDerivedFromX =
       cxxRecordDecl(isDirectlyDerivedFrom("X"));
@@ -493,145 +494,138 @@ TEST(DeclarationMatcher, ClassIsDerived) {
   EXPECT_TRUE(notMatches("", IsAX));
 
   DeclarationMatcher ZIsDerivedFromX =
-    cxxRecordDecl(hasName("Z"), isDerivedFrom("X"));
+      cxxRecordDecl(hasName("Z"), isDerivedFrom("X"));
   DeclarationMatcher ZIsDirectlyDerivedFromX =
       cxxRecordDecl(hasName("Z"), isDirectlyDerivedFrom("X"));
   EXPECT_TRUE(
-    matches("class X {}; class Y : public X {}; class Z : public Y {};",
-            ZIsDerivedFromX));
+      matches("class X {}; class Y : public X {}; class Z : public Y {};",
+              ZIsDerivedFromX));
   EXPECT_TRUE(
       notMatches("class X {}; class Y : public X {}; class Z : public Y {};",
                  ZIsDirectlyDerivedFromX));
-  EXPECT_TRUE(
-    matches("class X {};"
-              "template class Y : public X {};"
-              "class Z : public Y {};", ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class X {};"
+                      "template class Y : public X {};"
+                      "class Z : public Y {};",
+                      ZIsDerivedFromX));
   EXPECT_TRUE(notMatches("class X {};"
                          "template class Y : public X {};"
                          "class Z : public Y {};",
                          ZIsDirectlyDerivedFromX));
   EXPECT_TRUE(matches("class X {}; template class Z : public X {};",
                       ZIsDerivedFromX));
+  EXPECT_TRUE(matches("template class X {}; "
+                      "template class Z : public X {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(matches("template class X {}; "
+                      "template class Z : public X {};",
+                      ZIsDerivedFromX));
   EXPECT_TRUE(
-    matches("template class X {}; "
-              "template class Z : public X {};",
-            ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("template class X {}; "
-              "template class Z : public X {};",
-            ZIsDerivedFromX));
-  EXPECT_TRUE(
-    notMatches("template class A { class Z : public X {}; };",
-               ZIsDerivedFromX));
+      notMatches("template class A { class Z : public X {}; };",
+                 ZIsDerivedFromX));
   EXPECT_TRUE(
-    matches("template class A { public: class Z : public X {}; }; "
-              "class X{}; void y() { A::Z z; }", ZIsDerivedFromX));
+      matches("template class A { public: class Z : public X {}; }; "
+              "class X{}; void y() { A::Z z; }",
+              ZIsDerivedFromX));
   EXPECT_TRUE(
-    matches("template  class X {}; "
+      matches("template  class X {}; "
               "template class A { class Z : public X {}; };",
-            ZIsDerivedFromX));
-  EXPECT_TRUE(
-    notMatches("template class X> class A { "
-                 "  class Z : public X {}; };", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("template class X> class A { "
-              "  public: class Z : public X {}; }; "
-              "template class X {}; void y() { A::Z z; }",
-            ZIsDerivedFromX));
-  EXPECT_TRUE(
-    notMatches("template class A { class Z : public X::D {}; };",
-               ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("template class A { public: "
-              "  class Z : public X::D {}; }; "
-              "class Y { public: class X {}; typedef X D; }; "
-              "void y() { A::Z z; }", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("class X {}; typedef X Y; class Z : public Y {};",
-            ZIsDerivedFromX));
+              ZIsDerivedFromX));
+  EXPECT_TRUE(notMatches("template class X> class A { "
+                         "  class Z : public X {}; };",
+                         ZIsDerivedFromX));
+  EXPECT_TRUE(matches("template class X> class A { "
+                      "  public: class Z : public X {}; }; "
+                      "template class X {}; void y() { A::Z z; }",
+                      ZIsDerivedFromX));
   EXPECT_TRUE(
-    matches("template class Y { typedef typename T::U X; "
-              "  class Z : public X {}; };", ZIsDerivedFromX));
-  EXPECT_TRUE(matches("class X {}; class Z : public ::X {};",
+      notMatches("template class A { class Z : public X::D {}; };",
+                 ZIsDerivedFromX));
+  EXPECT_TRUE(matches("template class A { public: "
+                      "  class Z : public X::D {}; }; "
+                      "class Y { public: class X {}; typedef X D; }; "
+                      "void y() { A::Z z; }",
                       ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class X {}; typedef X Y; class Z : public Y {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(matches("template class Y { typedef typename T::U X; "
+                      "  class Z : public X {}; };",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class X {}; class Z : public ::X {};", ZIsDerivedFromX));
   EXPECT_TRUE(
-    notMatches("template class X {}; "
+      notMatches("template class X {}; "
                  "template class A { class Z : public X::D {}; };",
-               ZIsDerivedFromX));
+                 ZIsDerivedFromX));
   EXPECT_TRUE(
-    matches("template class X { public: typedef X D; }; "
+      matches("template class X { public: typedef X D; }; "
               "template class A { public: "
               "  class Z : public X::D {}; }; void y() { A::Z z; }",
-            ZIsDerivedFromX));
-  EXPECT_TRUE(
-    notMatches("template class A { class Z : public X::D::E {}; };",
-               ZIsDerivedFromX));
+              ZIsDerivedFromX));
   EXPECT_TRUE(
-    matches("class X {}; typedef X V; typedef V W; class Z : public W {};",
-            ZIsDerivedFromX));
+      notMatches("template class A { class Z : public X::D::E {}; };",
+                 ZIsDerivedFromX));
   EXPECT_TRUE(
-    matches("class X {}; class Y : public X {}; "
-              "typedef Y V; typedef V W; class Z : public W {};",
-            ZIsDerivedFromX));
+      matches("class X {}; typedef X V; typedef V W; class Z : public W {};",
+              ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class X {}; class Y : public X {}; "
+                      "typedef Y V; typedef V W; class Z : public W {};",
+                      ZIsDerivedFromX));
   EXPECT_TRUE(notMatches("class X {}; class Y : public X {}; "
                          "typedef Y V; typedef V W; class Z : public W {};",
                          ZIsDirectlyDerivedFromX));
   EXPECT_TRUE(
-    matches("template class X {}; "
+      matches("template class X {}; "
               "template class A { class Z : public X {}; };",
-            ZIsDerivedFromX));
+              ZIsDerivedFromX));
   EXPECT_TRUE(
-    notMatches("template class D { typedef X A; typedef A B; "
+      notMatches("template class D { typedef X A; typedef A B; "
                  "  typedef B C; class Z : public C {}; };",
-               ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("class X {}; typedef X A; typedef A B; "
-              "class Z : public B {};", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("class X {}; typedef X A; typedef A B; typedef B C; "
-              "class Z : public C {};", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("class U {}; typedef U X; typedef X V; "
-              "class Z : public V {};", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("class Base {}; typedef Base X; "
-              "class Z : public Base {};", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("class Base {}; typedef Base Base2; typedef Base2 X; "
-              "class Z : public Base {};", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    notMatches("class Base {}; class Base2 {}; typedef Base2 X; "
-                 "class Z : public Base {};", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("class A {}; typedef A X; typedef A Y; "
-              "class Z : public Y {};", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    notMatches("template  class Z;"
-                 "template <> class Z {};"
-                 "template  class Z : public Z {};",
-               IsDerivedFromX));
-  EXPECT_TRUE(
-    matches("template  class X;"
-              "template <> class X {};"
-              "template  class X : public X {};",
-            IsDerivedFromX));
-  EXPECT_TRUE(matches(
-    "class X {};"
-      "template  class Z;"
-      "template <> class Z {};"
-      "template  class Z : public Z, public X {};",
-    ZIsDerivedFromX));
-  EXPECT_TRUE(
-    notMatches("template struct X;"
+                 ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class X {}; typedef X A; typedef A B; "
+                      "class Z : public B {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class X {}; typedef X A; typedef A B; typedef B C; "
+                      "class Z : public C {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class U {}; typedef U X; typedef X V; "
+                      "class Z : public V {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class Base {}; typedef Base X; "
+                      "class Z : public Base {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class Base {}; typedef Base Base2; typedef Base2 X; "
+                      "class Z : public Base {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(notMatches("class Base {}; class Base2 {}; typedef Base2 X; "
+                         "class Z : public Base {};",
+                         ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class A {}; typedef A X; typedef A Y; "
+                      "class Z : public Y {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(notMatches("template  class Z;"
+                         "template <> class Z {};"
+                         "template  class Z : public Z {};",
+                         IsDerivedFromX));
+  EXPECT_TRUE(matches("template  class X;"
+                      "template <> class X {};"
+                      "template  class X : public X {};",
+                      IsDerivedFromX));
+  EXPECT_TRUE(
+      matches("class X {};"
+              "template  class Z;"
+              "template <> class Z {};"
+              "template  class Z : public Z, public X {};",
+              ZIsDerivedFromX));
+  EXPECT_TRUE(
+      notMatches("template struct X;"
                  "template struct X : public X {};",
-               cxxRecordDecl(isDerivedFrom(recordDecl(hasName("Some"))))));
+                 cxxRecordDecl(isDerivedFrom(recordDecl(hasName("Some"))))));
   EXPECT_TRUE(matches(
-    "struct A {};"
+      "struct A {};"
       "template struct X;"
       "template struct X : public X {};"
       "template<> struct X<0> : public A {};"
       "struct B : public X<42> {};",
-    cxxRecordDecl(hasName("B"), isDerivedFrom(recordDecl(hasName("A"))))));
+      cxxRecordDecl(hasName("B"), isDerivedFrom(recordDecl(hasName("A"))))));
   EXPECT_TRUE(notMatches(
       "struct A {};"
       "template struct X;"
@@ -645,7 +639,7 @@ TEST(DeclarationMatcher, ClassIsDerived) {
   // get rid of the Variable(...) matching and match the right template
   // declarations directly.
   const char *RecursiveTemplateOneParameter =
-    "class Base1 {}; class Base2 {};"
+      "class Base1 {}; class Base2 {};"
       "template  class Z;"
       "template <> class Z : public Base1 {};"
       "template <> class Z : public Base2 {};"
@@ -654,21 +648,21 @@ TEST(DeclarationMatcher, ClassIsDerived) {
       "template  class Z : public Z, public Z {};"
       "void f() { Z z_float; Z z_double; Z z_char; }";
   EXPECT_TRUE(matches(
-    RecursiveTemplateOneParameter,
-    varDecl(hasName("z_float"),
-            hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1")))))));
+      RecursiveTemplateOneParameter,
+      varDecl(hasName("z_float"),
+              hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1")))))));
   EXPECT_TRUE(notMatches(
-    RecursiveTemplateOneParameter,
-    varDecl(hasName("z_float"),
-            hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base2")))))));
-  EXPECT_TRUE(matches(
-    RecursiveTemplateOneParameter,
-    varDecl(hasName("z_char"),
-            hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1"),
-                                                 isDerivedFrom("Base2")))))));
+      RecursiveTemplateOneParameter,
+      varDecl(hasName("z_float"),
+              hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base2")))))));
+  EXPECT_TRUE(
+      matches(RecursiveTemplateOneParameter,
+              varDecl(hasName("z_char"),
+                      hasInitializer(hasType(cxxRecordDecl(
+                          isDerivedFrom("Base1"), isDerivedFrom("Base2")))))));
 
   const char *RecursiveTemplateTwoParameters =
-    "class Base1 {}; class Base2 {};"
+      "class Base1 {}; class Base2 {};"
       "template  class Z;"
       "template  class Z : public Base1 {};"
       "template  class Z : public Base2 {};"
@@ -679,44 +673,45 @@ TEST(DeclarationMatcher, ClassIsDerived) {
       "void f() { Z z_float; Z z_double; "
       "           Z z_char; }";
   EXPECT_TRUE(matches(
-    RecursiveTemplateTwoParameters,
-    varDecl(hasName("z_float"),
-            hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1")))))));
+      RecursiveTemplateTwoParameters,
+      varDecl(hasName("z_float"),
+              hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1")))))));
   EXPECT_TRUE(notMatches(
-    RecursiveTemplateTwoParameters,
-    varDecl(hasName("z_float"),
-            hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base2")))))));
-  EXPECT_TRUE(matches(
-    RecursiveTemplateTwoParameters,
-    varDecl(hasName("z_char"),
-            hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1"),
-                                                 isDerivedFrom("Base2")))))));
-  EXPECT_TRUE(matches(
-    "namespace ns { class X {}; class Y : public X {}; }",
-    cxxRecordDecl(isDerivedFrom("::ns::X"))));
-  EXPECT_TRUE(notMatches(
-    "class X {}; class Y : public X {};",
-    cxxRecordDecl(isDerivedFrom("::ns::X"))));
+      RecursiveTemplateTwoParameters,
+      varDecl(hasName("z_float"),
+              hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base2")))))));
+  EXPECT_TRUE(
+      matches(RecursiveTemplateTwoParameters,
+              varDecl(hasName("z_char"),
+                      hasInitializer(hasType(cxxRecordDecl(
+                          isDerivedFrom("Base1"), isDerivedFrom("Base2")))))));
+  EXPECT_TRUE(matches("namespace ns { class X {}; class Y : public X {}; }",
+                      cxxRecordDecl(isDerivedFrom("::ns::X"))));
+  EXPECT_TRUE(notMatches("class X {}; class Y : public X {};",
+                         cxxRecordDecl(isDerivedFrom("::ns::X"))));
 
   EXPECT_TRUE(matches(
-    "class X {}; class Y : public X {};",
-    cxxRecordDecl(isDerivedFrom(recordDecl(hasName("X")).bind("test")))));
+      "class X {}; class Y : public X {};",
+      cxxRecordDecl(isDerivedFrom(recordDecl(hasName("X")).bind("test")))));
 
-  EXPECT_TRUE(matches(
-    "template class X {};"
-      "template using Z = X;"
-      "template  class Y : Z {};",
-    cxxRecordDecl(isDerivedFrom(namedDecl(hasName("X"))))));
+  EXPECT_TRUE(matches("template class X {};"
+                      "template using Z = X;"
+                      "template  class Y : Z {};",
+                      cxxRecordDecl(isDerivedFrom(namedDecl(hasName("X"))))));
 }
 
-TEST(DeclarationMatcher, IsDerivedFromEmptyName) {
+TEST_P(ASTMatchersTest, IsDerivedFrom_EmptyName) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   const char *const Code = "class X {}; class Y : public X {};";
   EXPECT_TRUE(notMatches(Code, cxxRecordDecl(isDerivedFrom(""))));
   EXPECT_TRUE(notMatches(Code, cxxRecordDecl(isDirectlyDerivedFrom(""))));
   EXPECT_TRUE(notMatches(Code, cxxRecordDecl(isSameOrDerivedFrom(""))));
 }
 
-TEST(DeclarationMatcher, ObjCClassIsDerived) {
+TEST_P(ASTMatchersTest, IsDerivedFrom_ObjC) {
   DeclarationMatcher IsDerivedFromX = objcInterfaceDecl(isDerivedFrom("X"));
   EXPECT_TRUE(
       matchesObjC("@interface X @end @interface Y : X @end", IsDerivedFromX));
@@ -737,24 +732,24 @@ TEST(DeclarationMatcher, ObjCClassIsDerived) {
 
   DeclarationMatcher IsDirectlyDerivedFromX =
       objcInterfaceDecl(isDirectlyDerivedFrom("X"));
-  EXPECT_TRUE(
-      matchesObjC("@interface X @end @interface Y : X @end", IsDirectlyDerivedFromX));
+  EXPECT_TRUE(matchesObjC("@interface X @end @interface Y : X @end",
+                          IsDirectlyDerivedFromX));
   EXPECT_TRUE(matchesObjC(
       "@interface X @end @interface Y<__covariant ObjectType> : X @end",
       IsDirectlyDerivedFromX));
   EXPECT_TRUE(matchesObjC(
       "@interface X @end @compatibility_alias Y X; @interface Z : Y @end",
       IsDirectlyDerivedFromX));
-  EXPECT_TRUE(matchesObjC(
-      "@interface X @end typedef X Y; @interface Z : Y @end",
-      IsDirectlyDerivedFromX));
+  EXPECT_TRUE(
+      matchesObjC("@interface X @end typedef X Y; @interface Z : Y @end",
+                  IsDirectlyDerivedFromX));
   EXPECT_TRUE(notMatchesObjC("@interface X @end", IsDirectlyDerivedFromX));
   EXPECT_TRUE(notMatchesObjC("@class X;", IsDirectlyDerivedFromX));
   EXPECT_TRUE(notMatchesObjC("@class Y;", IsDirectlyDerivedFromX));
   EXPECT_TRUE(notMatchesObjC("@interface X @end @compatibility_alias Y X;",
                              IsDirectlyDerivedFromX));
-  EXPECT_TRUE(notMatchesObjC("@interface X @end typedef X Y;",
-                             IsDirectlyDerivedFromX));
+  EXPECT_TRUE(
+      notMatchesObjC("@interface X @end typedef X Y;", IsDirectlyDerivedFromX));
 
   DeclarationMatcher IsAX = objcInterfaceDecl(isSameOrDerivedFrom("X"));
   EXPECT_TRUE(matchesObjC("@interface X @end @interface Y : X @end", IsAX));
@@ -775,9 +770,9 @@ TEST(DeclarationMatcher, ObjCClassIsDerived) {
                           ZIsDerivedFromX));
   EXPECT_TRUE(matchesObjC(
       "@interface X @end typedef X Y; @interface Z : Y @end", ZIsDerivedFromX));
-  EXPECT_TRUE(matchesObjC(
-      "@interface X @end typedef X Y; @interface Z : Y @end",
-      ZIsDirectlyDerivedFromX));
+  EXPECT_TRUE(
+      matchesObjC("@interface X @end typedef X Y; @interface Z : Y @end",
+                  ZIsDirectlyDerivedFromX));
   EXPECT_TRUE(matchesObjC(
       "@interface A @end typedef A X; typedef A Y; @interface Z : Y @end",
       ZIsDerivedFromX));
@@ -798,123 +793,141 @@ TEST(DeclarationMatcher, ObjCClassIsDerived) {
       ZIsDirectlyDerivedFromX));
   EXPECT_TRUE(matchesObjC(
       "@interface A @end @compatibility_alias X A; @compatibility_alias Y A;"
-      "@interface Z : Y @end", ZIsDerivedFromX));
+      "@interface Z : Y @end",
+      ZIsDerivedFromX));
   EXPECT_TRUE(matchesObjC(
       "@interface A @end @compatibility_alias X A; @compatibility_alias Y A;"
-      "@interface Z : Y @end", ZIsDirectlyDerivedFromX));
-  EXPECT_TRUE(matchesObjC(
-      "@interface Y @end typedef Y X; @interface Z : X @end", ZIsDerivedFromX));
-  EXPECT_TRUE(matchesObjC(
-      "@interface Y @end typedef Y X; @interface Z : X @end",
+      "@interface Z : Y @end",
       ZIsDirectlyDerivedFromX));
   EXPECT_TRUE(matchesObjC(
-      "@interface A @end @compatibility_alias Y A; typedef Y X;"
-      "@interface Z : A @end", ZIsDerivedFromX));
-  EXPECT_TRUE(matchesObjC(
-      "@interface A @end @compatibility_alias Y A; typedef Y X;"
-      "@interface Z : A @end", ZIsDirectlyDerivedFromX));
-  EXPECT_TRUE(matchesObjC(
-      "@interface A @end typedef A Y; @compatibility_alias X Y;"
-      "@interface Z : A @end", ZIsDerivedFromX));
-  EXPECT_TRUE(matchesObjC(
-      "@interface A @end typedef A Y; @compatibility_alias X Y;"
-      "@interface Z : A @end", ZIsDirectlyDerivedFromX));
+      "@interface Y @end typedef Y X; @interface Z : X @end", ZIsDerivedFromX));
+  EXPECT_TRUE(
+      matchesObjC("@interface Y @end typedef Y X; @interface Z : X @end",
+                  ZIsDirectlyDerivedFromX));
+  EXPECT_TRUE(
+      matchesObjC("@interface A @end @compatibility_alias Y A; typedef Y X;"
+                  "@interface Z : A @end",
+                  ZIsDerivedFromX));
+  EXPECT_TRUE(
+      matchesObjC("@interface A @end @compatibility_alias Y A; typedef Y X;"
+                  "@interface Z : A @end",
+                  ZIsDirectlyDerivedFromX));
+  EXPECT_TRUE(
+      matchesObjC("@interface A @end typedef A Y; @compatibility_alias X Y;"
+                  "@interface Z : A @end",
+                  ZIsDerivedFromX));
+  EXPECT_TRUE(
+      matchesObjC("@interface A @end typedef A Y; @compatibility_alias X Y;"
+                  "@interface Z : A @end",
+                  ZIsDirectlyDerivedFromX));
 }
 
-TEST(DeclarationMatcher, IsLambda) {
+TEST_P(ASTMatchersTest, IsLambda) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   const auto IsLambda = cxxMethodDecl(ofClass(cxxRecordDecl(isLambda())));
   EXPECT_TRUE(matches("auto x = []{};", IsLambda));
   EXPECT_TRUE(notMatches("struct S { void operator()() const; };", IsLambda));
 }
 
-TEST(Matcher, BindMatchedNodes) {
+TEST_P(ASTMatchersTest, Bind) {
   DeclarationMatcher ClassX = has(recordDecl(hasName("::X")).bind("x"));
 
-  EXPECT_TRUE(matchAndVerifyResultTrue("class X {};",
-                                       ClassX, std::make_unique>("x")));
+  EXPECT_TRUE(matchAndVerifyResultTrue(
+      "class X {};", ClassX,
+      std::make_unique>("x")));
 
-  EXPECT_TRUE(matchAndVerifyResultFalse("class X {};",
-                                        ClassX, std::make_unique>("other-id")));
+  EXPECT_TRUE(matchAndVerifyResultFalse(
+      "class X {};", ClassX,
+      std::make_unique>("other-id")));
 
   TypeMatcher TypeAHasClassB = hasDeclaration(
-    recordDecl(hasName("A"), has(recordDecl(hasName("B")).bind("b"))));
+      recordDecl(hasName("A"), has(recordDecl(hasName("B")).bind("b"))));
 
-  EXPECT_TRUE(matchAndVerifyResultTrue("class A { public: A *a; class B {}; };",
-                                       TypeAHasClassB,
-                                       std::make_unique>("b")));
+  EXPECT_TRUE(matchAndVerifyResultTrue(
+      "class A { public: A *a; class B {}; };", TypeAHasClassB,
+      std::make_unique>("b")));
 
   StatementMatcher MethodX =
-    callExpr(callee(cxxMethodDecl(hasName("x")))).bind("x");
+      callExpr(callee(cxxMethodDecl(hasName("x")))).bind("x");
 
-  EXPECT_TRUE(matchAndVerifyResultTrue("class A { void x() { x(); } };",
-                                       MethodX,
-                                       std::make_unique>("x")));
+  EXPECT_TRUE(matchAndVerifyResultTrue(
+      "class A { void x() { x(); } };", MethodX,
+      std::make_unique>("x")));
 }
 
-TEST(Matcher, BindTheSameNameInAlternatives) {
+TEST_P(ASTMatchersTest, Bind_SameNameInAlternatives) {
   StatementMatcher matcher = anyOf(
-    binaryOperator(hasOperatorName("+"),
-                   hasLHS(expr().bind("x")),
-                   hasRHS(integerLiteral(equals(0)))),
-    binaryOperator(hasOperatorName("+"),
-                   hasLHS(integerLiteral(equals(0))),
-                   hasRHS(expr().bind("x"))));
+      binaryOperator(hasOperatorName("+"), hasLHS(expr().bind("x")),
+                     hasRHS(integerLiteral(equals(0)))),
+      binaryOperator(hasOperatorName("+"), hasLHS(integerLiteral(equals(0))),
+                     hasRHS(expr().bind("x"))));
 
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    // The first branch of the matcher binds x to 0 but then fails.
-    // The second branch binds x to f() and succeeds.
-    "int f() { return 0 + f(); }",
-    matcher,
-    std::make_unique>("x")));
+      // The first branch of the matcher binds x to 0 but then fails.
+      // The second branch binds x to f() and succeeds.
+      "int f() { return 0 + f(); }", matcher,
+      std::make_unique>("x")));
 }
 
-TEST(Matcher, BindsIDForMemoizedResults) {
+TEST_P(ASTMatchersTest, Bind_BindsIDForMemoizedResults) {
   // Using the same matcher in two match expressions will make memoization
   // kick in.
   DeclarationMatcher ClassX = recordDecl(hasName("X")).bind("x");
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "class A { class B { class X {}; }; };",
-    DeclarationMatcher(anyOf(
-      recordDecl(hasName("A"), hasDescendant(ClassX)),
-      recordDecl(hasName("B"), hasDescendant(ClassX)))),
-    std::make_unique>("x", 2)));
+      "class A { class B { class X {}; }; };",
+      DeclarationMatcher(
+          anyOf(recordDecl(hasName("A"), hasDescendant(ClassX)),
+                recordDecl(hasName("B"), hasDescendant(ClassX)))),
+      std::make_unique>("x", 2)));
 }
 
-TEST(HasType, MatchesAsString) {
+TEST_P(ASTMatchersTest, HasType_MatchesAsString) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `hasType()` that does not depend on C++.
+    return;
+  }
+
   EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z() {Y* y; y->x(); }",
-            cxxMemberCallExpr(on(hasType(asString("class Y *"))))));
+      matches("class Y { public: void x(); }; void z() {Y* y; y->x(); }",
+              cxxMemberCallExpr(on(hasType(asString("class Y *"))))));
   EXPECT_TRUE(
-    matches("class X { void x(int x) {} };",
-            cxxMethodDecl(hasParameter(0, hasType(asString("int"))))));
+      matches("class X { void x(int x) {} };",
+              cxxMethodDecl(hasParameter(0, hasType(asString("int"))))));
   EXPECT_TRUE(matches("namespace ns { struct A {}; }  struct B { ns::A a; };",
                       fieldDecl(hasType(asString("ns::A")))));
-  EXPECT_TRUE(matches("namespace { struct A {}; }  struct B { A a; };",
-                      fieldDecl(hasType(asString("struct (anonymous namespace)::A")))));
+  EXPECT_TRUE(
+      matches("namespace { struct A {}; }  struct B { A a; };",
+              fieldDecl(hasType(asString("struct (anonymous namespace)::A")))));
 }
 
-TEST(Matcher, HasOperatorNameForOverloadedOperatorCall) {
+TEST_P(ASTMatchersTest, HasOverloadedOperatorName) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   StatementMatcher OpCallAndAnd =
-    cxxOperatorCallExpr(hasOverloadedOperatorName("&&"));
+      cxxOperatorCallExpr(hasOverloadedOperatorName("&&"));
   EXPECT_TRUE(matches("class Y { }; "
-                        "bool operator&&(Y x, Y y) { return true; }; "
-                        "Y a; Y b; bool c = a && b;", OpCallAndAnd));
+                      "bool operator&&(Y x, Y y) { return true; }; "
+                      "Y a; Y b; bool c = a && b;",
+                      OpCallAndAnd));
   StatementMatcher OpCallLessLess =
-    cxxOperatorCallExpr(hasOverloadedOperatorName("<<"));
+      cxxOperatorCallExpr(hasOverloadedOperatorName("<<"));
   EXPECT_TRUE(notMatches("class Y { }; "
-                           "bool operator&&(Y x, Y y) { return true; }; "
-                           "Y a; Y b; bool c = a && b;",
+                         "bool operator&&(Y x, Y y) { return true; }; "
+                         "Y a; Y b; bool c = a && b;",
                          OpCallLessLess));
   StatementMatcher OpStarCall =
-    cxxOperatorCallExpr(hasOverloadedOperatorName("*"));
-  EXPECT_TRUE(matches("class Y; int operator*(Y &); void f(Y &y) { *y; }",
-                      OpStarCall));
+      cxxOperatorCallExpr(hasOverloadedOperatorName("*"));
+  EXPECT_TRUE(
+      matches("class Y; int operator*(Y &); void f(Y &y) { *y; }", OpStarCall));
   DeclarationMatcher ClassWithOpStar =
-    cxxRecordDecl(hasMethod(hasOverloadedOperatorName("*")));
-  EXPECT_TRUE(matches("class Y { int operator*(); };",
-                      ClassWithOpStar));
-  EXPECT_TRUE(notMatches("class Y { void myOperator(); };",
-                         ClassWithOpStar)) ;
+      cxxRecordDecl(hasMethod(hasOverloadedOperatorName("*")));
+  EXPECT_TRUE(matches("class Y { int operator*(); };", ClassWithOpStar));
+  EXPECT_TRUE(notMatches("class Y { void myOperator(); };", ClassWithOpStar));
   DeclarationMatcher AnyOpStar = functionDecl(hasOverloadedOperatorName("*"));
   EXPECT_TRUE(matches("class Y; int operator*(Y &);", AnyOpStar));
   EXPECT_TRUE(matches("class Y { int operator*(); };", AnyOpStar));
@@ -926,38 +939,43 @@ TEST(Matcher, HasOperatorNameForOverloadedOperatorCall) {
   EXPECT_TRUE(matches("class Y { Y operator&&(Y &); };", AnyAndOp));
 }
 
+TEST_P(ASTMatchersTest, HasOverloadedOperatorName_MatchesNestedCalls) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
 
-TEST(Matcher, NestedOverloadedOperatorCalls) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "class Y { }; "
+      "class Y { }; "
       "Y& operator&&(Y& x, Y& y) { return x; }; "
       "Y a; Y b; Y c; Y d = a && b && c;",
-    cxxOperatorCallExpr(hasOverloadedOperatorName("&&")).bind("x"),
-    std::make_unique>("x", 2)));
+      cxxOperatorCallExpr(hasOverloadedOperatorName("&&")).bind("x"),
+      std::make_unique>("x", 2)));
   EXPECT_TRUE(matches("class Y { }; "
-                        "Y& operator&&(Y& x, Y& y) { return x; }; "
-                        "Y a; Y b; Y c; Y d = a && b && c;",
+                      "Y& operator&&(Y& x, Y& y) { return x; }; "
+                      "Y a; Y b; Y c; Y d = a && b && c;",
                       cxxOperatorCallExpr(hasParent(cxxOperatorCallExpr()))));
   EXPECT_TRUE(
-    matches("class Y { }; "
+      matches("class Y { }; "
               "Y& operator&&(Y& x, Y& y) { return x; }; "
               "Y a; Y b; Y c; Y d = a && b && c;",
-            cxxOperatorCallExpr(hasDescendant(cxxOperatorCallExpr()))));
+              cxxOperatorCallExpr(hasDescendant(cxxOperatorCallExpr()))));
 }
 
-TEST(Matcher, VarDecl_Storage) {
+TEST_P(ASTMatchersTest, HasLocalStorage) {
   auto M = varDecl(hasName("X"), hasLocalStorage());
   EXPECT_TRUE(matches("void f() { int X; }", M));
   EXPECT_TRUE(notMatches("int X;", M));
   EXPECT_TRUE(notMatches("void f() { static int X; }", M));
+}
 
-  M = varDecl(hasName("X"), hasGlobalStorage());
+TEST_P(ASTMatchersTest, HasGlobalStorage) {
+  auto M = varDecl(hasName("X"), hasGlobalStorage());
   EXPECT_TRUE(notMatches("void f() { int X; }", M));
   EXPECT_TRUE(matches("int X;", M));
   EXPECT_TRUE(matches("void f() { static int X; }", M));
 }
 
-TEST(Matcher, VarDecl_IsStaticLocal) {
+TEST_P(ASTMatchersTest, IsStaticLocal) {
   auto M = varDecl(isStaticLocal());
   EXPECT_TRUE(matches("void f() { static int X; }", M));
   EXPECT_TRUE(notMatches("static int X;", M));
@@ -965,15 +983,15 @@ TEST(Matcher, VarDecl_IsStaticLocal) {
   EXPECT_TRUE(notMatches("int X;", M));
 }
 
-TEST(Matcher, VarDecl_StorageDuration) {
+TEST_P(ASTMatchersTest, StorageDuration) {
   StringRef T =
       "void f() { int x; static int y; } int a;static int b;extern int c;";
 
   EXPECT_TRUE(matches(T, varDecl(hasName("x"), hasAutomaticStorageDuration())));
   EXPECT_TRUE(
-    notMatches(T, varDecl(hasName("y"), hasAutomaticStorageDuration())));
+      notMatches(T, varDecl(hasName("y"), hasAutomaticStorageDuration())));
   EXPECT_TRUE(
-    notMatches(T, varDecl(hasName("a"), hasAutomaticStorageDuration())));
+      notMatches(T, varDecl(hasName("a"), hasAutomaticStorageDuration())));
 
   EXPECT_TRUE(matches(T, varDecl(hasName("y"), hasStaticStorageDuration())));
   EXPECT_TRUE(matches(T, varDecl(hasName("a"), hasStaticStorageDuration())));
@@ -981,86 +999,103 @@ TEST(Matcher, VarDecl_StorageDuration) {
   EXPECT_TRUE(matches(T, varDecl(hasName("c"), hasStaticStorageDuration())));
   EXPECT_TRUE(notMatches(T, varDecl(hasName("x"), hasStaticStorageDuration())));
 
-  // FIXME: It is really hard to test with thread_local itself because not all
-  // targets support TLS, which causes this to be an error depending on what
-  // platform the test is being run on. We do not have access to the TargetInfo
-  // object to be able to test whether the platform supports TLS or not.
+  // FIXME: Add thread_local variables to the source code snippet.
   EXPECT_TRUE(notMatches(T, varDecl(hasName("x"), hasThreadStorageDuration())));
   EXPECT_TRUE(notMatches(T, varDecl(hasName("y"), hasThreadStorageDuration())));
   EXPECT_TRUE(notMatches(T, varDecl(hasName("a"), hasThreadStorageDuration())));
 }
 
-TEST(Matcher, FindsVarDeclInFunctionParameter) {
-  EXPECT_TRUE(matches(
-    "void f(int i) {}",
-    varDecl(hasName("i"))));
+TEST_P(ASTMatchersTest, VarDecl_MatchesFunctionParameter) {
+  EXPECT_TRUE(matches("void f(int i) {}", varDecl(hasName("i"))));
 }
 
-TEST(UnaryExpressionOrTypeTraitExpression, MatchesCorrectType) {
-  EXPECT_TRUE(matches("void x() { int a = sizeof(a); }", sizeOfExpr(
-    hasArgumentOfType(asString("int")))));
-  EXPECT_TRUE(notMatches("void x() { int a = sizeof(a); }", sizeOfExpr(
-    hasArgumentOfType(asString("float")))));
+TEST_P(ASTMatchersTest, SizeOfExpr_MatchesCorrectType) {
+  EXPECT_TRUE(matches("void x() { int a = sizeof(a); }",
+                      sizeOfExpr(hasArgumentOfType(asString("int")))));
+  EXPECT_TRUE(notMatches("void x() { int a = sizeof(a); }",
+                         sizeOfExpr(hasArgumentOfType(asString("float")))));
   EXPECT_TRUE(matches(
-    "struct A {}; void x() { A a; int b = sizeof(a); }",
-    sizeOfExpr(hasArgumentOfType(hasDeclaration(recordDecl(hasName("A")))))));
-  EXPECT_TRUE(notMatches("void x() { int a = sizeof(a); }", sizeOfExpr(
-    hasArgumentOfType(hasDeclaration(recordDecl(hasName("string")))))));
+      "struct A {}; void x() { struct A a; int b = sizeof(a); }",
+      sizeOfExpr(hasArgumentOfType(hasDeclaration(recordDecl(hasName("A")))))));
+  EXPECT_TRUE(notMatches("void x() { int a = sizeof(a); }",
+                         sizeOfExpr(hasArgumentOfType(
+                             hasDeclaration(recordDecl(hasName("string")))))));
 }
 
-TEST(IsInteger, MatchesIntegers) {
+TEST_P(ASTMatchersTest, IsInteger_MatchesIntegers) {
   EXPECT_TRUE(matches("int i = 0;", varDecl(hasType(isInteger()))));
-  EXPECT_TRUE(matches(
-    "long long i = 0; void f(long long) { }; void g() {f(i);}",
-    callExpr(hasArgument(0, declRefExpr(
-      to(varDecl(hasType(isInteger()))))))));
+  EXPECT_TRUE(
+      matches("long long i = 0; void f(long long) { }; void g() {f(i);}",
+              callExpr(hasArgument(
+                  0, declRefExpr(to(varDecl(hasType(isInteger()))))))));
 }
 
-TEST(IsInteger, ReportsNoFalsePositives) {
+TEST_P(ASTMatchersTest, IsInteger_ReportsNoFalsePositives) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a similar negative test for `isInteger()` that does not depend
+    // on C++.
+    return;
+  }
+
   EXPECT_TRUE(notMatches("int *i;", varDecl(hasType(isInteger()))));
-  EXPECT_TRUE(notMatches("struct T {}; T t; void f(T *) { }; void g() {f(&t);}",
-                         callExpr(hasArgument(0, declRefExpr(
-                           to(varDecl(hasType(isInteger()))))))));
+  EXPECT_TRUE(
+      notMatches("struct T {}; T t; void f(T *) { }; void g() {f(&t);}",
+                 callExpr(hasArgument(
+                     0, declRefExpr(to(varDecl(hasType(isInteger()))))))));
 }
 
-TEST(IsSignedInteger, MatchesSignedIntegers) {
+TEST_P(ASTMatchersTest, IsSignedInteger_MatchesSignedIntegers) {
   EXPECT_TRUE(matches("int i = 0;", varDecl(hasType(isSignedInteger()))));
-  EXPECT_TRUE(notMatches("unsigned i = 0;",
-                         varDecl(hasType(isSignedInteger()))));
+  EXPECT_TRUE(
+      notMatches("unsigned i = 0;", varDecl(hasType(isSignedInteger()))));
 }
 
-TEST(IsUnsignedInteger, MatchesUnsignedIntegers) {
+TEST_P(ASTMatchersTest, IsUnsignedInteger_MatchesUnsignedIntegers) {
   EXPECT_TRUE(notMatches("int i = 0;", varDecl(hasType(isUnsignedInteger()))));
-  EXPECT_TRUE(matches("unsigned i = 0;",
-                      varDecl(hasType(isUnsignedInteger()))));
+  EXPECT_TRUE(
+      matches("unsigned i = 0;", varDecl(hasType(isUnsignedInteger()))));
 }
 
-TEST(IsAnyPointer, MatchesPointers) {
+TEST_P(ASTMatchersTest, IsAnyPointer_MatchesPointers) {
+  if (!GetParam().isCXX11OrLater()) {
+    // FIXME: Add a test for `isAnyPointer()` that does not depend on C++.
+    return;
+  }
+
   EXPECT_TRUE(matches("int* i = nullptr;", varDecl(hasType(isAnyPointer()))));
 }
 
-TEST(IsAnyPointer, MatchesObjcPointer) {
+TEST_P(ASTMatchersTest, IsAnyPointer_MatchesObjcPointer) {
   EXPECT_TRUE(matchesObjC("@interface Foo @end Foo *f;",
                           varDecl(hasType(isAnyPointer()))));
 }
 
-TEST(IsAnyPointer, ReportsNoFalsePositives) {
+TEST_P(ASTMatchersTest, IsAnyPointer_ReportsNoFalsePositives) {
   EXPECT_TRUE(notMatches("int i = 0;", varDecl(hasType(isAnyPointer()))));
 }
 
-TEST(IsAnyCharacter, MatchesCharacters) {
+TEST_P(ASTMatchersTest, IsAnyCharacter_MatchesCharacters) {
   EXPECT_TRUE(matches("char i = 0;", varDecl(hasType(isAnyCharacter()))));
 }
 
-TEST(IsAnyCharacter, ReportsNoFalsePositives) {
+TEST_P(ASTMatchersTest, IsAnyCharacter_ReportsNoFalsePositives) {
   EXPECT_TRUE(notMatches("int i;", varDecl(hasType(isAnyCharacter()))));
 }
 
-TEST(IsArrow, MatchesMemberVariablesViaArrow) {
+TEST_P(ASTMatchersTest, IsArrow_MatchesMemberVariablesViaArrow) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `isArrow()` that does not depend on C++.
+    return;
+  }
+  if (GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("class Y { void x() { this->y; } int y; };",
                       memberExpr(isArrow())));
-  EXPECT_TRUE(matches("class Y { void x() { y; } int y; };",
-                      memberExpr(isArrow())));
+  EXPECT_TRUE(
+      matches("class Y { void x() { y; } int y; };", memberExpr(isArrow())));
   EXPECT_TRUE(notMatches("class Y { void x() { (*this).y; } int y; };",
                          memberExpr(isArrow())));
   EXPECT_TRUE(matches("template  class Y { void x() { this->m; } };",
@@ -1070,7 +1105,12 @@ TEST(IsArrow, MatchesMemberVariablesViaArrow) {
                  cxxDependentScopeMemberExpr(isArrow())));
 }
 
-TEST(IsArrow, MatchesStaticMemberVariablesViaArrow) {
+TEST_P(ASTMatchersTest, IsArrow_MatchesStaticMemberVariablesViaArrow) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `isArrow()` that does not depend on C++.
+    return;
+  }
+
   EXPECT_TRUE(matches("class Y { void x() { this->y; } static int y; };",
                       memberExpr(isArrow())));
   EXPECT_TRUE(notMatches("class Y { void x() { y; } static int y; };",
@@ -1079,11 +1119,19 @@ TEST(IsArrow, MatchesStaticMemberVariablesViaArrow) {
                          memberExpr(isArrow())));
 }
 
-TEST(IsArrow, MatchesMemberCallsViaArrow) {
-  EXPECT_TRUE(matches("class Y { void x() { this->x(); } };",
-                      memberExpr(isArrow())));
-  EXPECT_TRUE(matches("class Y { void x() { x(); } };",
-                      memberExpr(isArrow())));
+TEST_P(ASTMatchersTest, IsArrow_MatchesMemberCallsViaArrow) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `isArrow()` that does not depend on C++.
+    return;
+  }
+  if (GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
+  EXPECT_TRUE(
+      matches("class Y { void x() { this->x(); } };", memberExpr(isArrow())));
+  EXPECT_TRUE(matches("class Y { void x() { x(); } };", memberExpr(isArrow())));
   EXPECT_TRUE(notMatches("class Y { void x() { Y y; y.x(); } };",
                          memberExpr(isArrow())));
   EXPECT_TRUE(
@@ -1096,79 +1144,121 @@ TEST(IsArrow, MatchesMemberCallsViaArrow) {
                  unresolvedMemberExpr(isArrow())));
 }
 
-TEST(ConversionDeclaration, IsExplicit) {
+TEST_P(ASTMatchersTest, IsExplicit_CXXConversionDecl) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("struct S { explicit operator int(); };",
                       cxxConversionDecl(isExplicit())));
   EXPECT_TRUE(notMatches("struct S { operator int(); };",
                          cxxConversionDecl(isExplicit())));
+}
+
+TEST_P(ASTMatchersTest, IsExplicit_CXXConversionDecl_CXX20) {
+  if (!GetParam().isCXX20OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(
       notMatches("template struct S { explicit(b) operator int(); };",
-                 cxxConversionDecl(isExplicit()), langCxx20OrLater()));
+                 cxxConversionDecl(isExplicit())));
   EXPECT_TRUE(matches("struct S { explicit(true) operator int(); };",
-                      cxxConversionDecl(isExplicit()), langCxx20OrLater()));
+                      cxxConversionDecl(isExplicit())));
   EXPECT_TRUE(notMatches("struct S { explicit(false) operator int(); };",
-                         cxxConversionDecl(isExplicit()), langCxx20OrLater()));
+                         cxxConversionDecl(isExplicit())));
 }
 
-TEST(Matcher, ArgumentCount) {
+TEST_P(ASTMatchersTest, ArgumentCountIs_CallExpr) {
   StatementMatcher Call1Arg = callExpr(argumentCountIs(1));
 
   EXPECT_TRUE(matches("void x(int) { x(0); }", Call1Arg));
-  EXPECT_TRUE(matches("class X { void x(int) { x(0); } };", Call1Arg));
   EXPECT_TRUE(notMatches("void x(int, int) { x(0, 0); }", Call1Arg));
 }
 
-TEST(Matcher, ParameterCount) {
+TEST_P(ASTMatchersTest, ArgumentCountIs_CallExpr_CXX) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  StatementMatcher Call1Arg = callExpr(argumentCountIs(1));
+  EXPECT_TRUE(matches("class X { void x(int) { x(0); } };", Call1Arg));
+}
+
+TEST_P(ASTMatchersTest, ParameterCountIs) {
   DeclarationMatcher Function1Arg = functionDecl(parameterCountIs(1));
   EXPECT_TRUE(matches("void f(int i) {}", Function1Arg));
-  EXPECT_TRUE(matches("class X { void f(int i) {} };", Function1Arg));
   EXPECT_TRUE(notMatches("void f() {}", Function1Arg));
   EXPECT_TRUE(notMatches("void f(int i, int j, int k) {}", Function1Arg));
   EXPECT_TRUE(matches("void f(int i, ...) {};", Function1Arg));
 }
 
-TEST(Matcher, References) {
-  DeclarationMatcher ReferenceClassX = varDecl(
-    hasType(references(recordDecl(hasName("X")))));
-  EXPECT_TRUE(matches("class X {}; void y(X y) { X &x = y; }",
-                      ReferenceClassX));
+TEST_P(ASTMatchersTest, ParameterCountIs_CXX) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  DeclarationMatcher Function1Arg = functionDecl(parameterCountIs(1));
+  EXPECT_TRUE(matches("class X { void f(int i) {} };", Function1Arg));
+}
+
+TEST_P(ASTMatchersTest, References) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `references()` that does not depend on C++.
+    return;
+  }
+
+  DeclarationMatcher ReferenceClassX =
+      varDecl(hasType(references(recordDecl(hasName("X")))));
   EXPECT_TRUE(
-    matches("class X {}; void y(X y) { const X &x = y; }", ReferenceClassX));
+      matches("class X {}; void y(X y) { X &x = y; }", ReferenceClassX));
+  EXPECT_TRUE(
+      matches("class X {}; void y(X y) { const X &x = y; }", ReferenceClassX));
   // The match here is on the implicit copy constructor code for
   // class X, not on code 'X x = y'.
+  EXPECT_TRUE(matches("class X {}; void y(X y) { X x = y; }", ReferenceClassX));
+  EXPECT_TRUE(notMatches("class X {}; extern X x;", ReferenceClassX));
   EXPECT_TRUE(
-    matches("class X {}; void y(X y) { X x = y; }", ReferenceClassX));
-  EXPECT_TRUE(
-    notMatches("class X {}; extern X x;", ReferenceClassX));
-  EXPECT_TRUE(
-    notMatches("class X {}; void y(X *y) { X *&x = y; }", ReferenceClassX));
+      notMatches("class X {}; void y(X *y) { X *&x = y; }", ReferenceClassX));
 }
 
-TEST(QualType, hasLocalQualifiers) {
+TEST_P(ASTMatchersTest, HasLocalQualifiers) {
+  if (!GetParam().isCXX11OrLater()) {
+    // FIXME: Add a test for `hasLocalQualifiers()` that does not depend on C++.
+    return;
+  }
+
   EXPECT_TRUE(notMatches("typedef const int const_int; const_int i = 1;",
                          varDecl(hasType(hasLocalQualifiers()))));
   EXPECT_TRUE(matches("int *const j = nullptr;",
                       varDecl(hasType(hasLocalQualifiers()))));
-  EXPECT_TRUE(matches("int *volatile k;",
-                      varDecl(hasType(hasLocalQualifiers()))));
-  EXPECT_TRUE(notMatches("int m;",
-                         varDecl(hasType(hasLocalQualifiers()))));
+  EXPECT_TRUE(
+      matches("int *volatile k;", varDecl(hasType(hasLocalQualifiers()))));
+  EXPECT_TRUE(notMatches("int m;", varDecl(hasType(hasLocalQualifiers()))));
 }
 
-TEST(IsExternC, MatchesExternCFunctionDeclarations) {
+TEST_P(ASTMatchersTest, IsExternC_MatchesExternCFunctionDeclarations) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("extern \"C\" void f() {}", functionDecl(isExternC())));
-  EXPECT_TRUE(matches("extern \"C\" { void f() {} }",
-                      functionDecl(isExternC())));
+  EXPECT_TRUE(
+      matches("extern \"C\" { void f() {} }", functionDecl(isExternC())));
   EXPECT_TRUE(notMatches("void f() {}", functionDecl(isExternC())));
 }
 
-TEST(IsExternC, MatchesExternCVariableDeclarations) {
+TEST_P(ASTMatchersTest, IsExternC_MatchesExternCVariableDeclarations) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("extern \"C\" int i;", varDecl(isExternC())));
   EXPECT_TRUE(matches("extern \"C\" { int i; }", varDecl(isExternC())));
   EXPECT_TRUE(notMatches("int i;", varDecl(isExternC())));
 }
 
-TEST(IsStaticStorageClass, MatchesStaticDeclarations) {
+TEST_P(ASTMatchersTest, IsStaticStorageClass) {
   EXPECT_TRUE(
       matches("static void f() {}", functionDecl(isStaticStorageClass())));
   EXPECT_TRUE(matches("static int i = 1;", varDecl(isStaticStorageClass())));
@@ -1177,69 +1267,117 @@ TEST(IsStaticStorageClass, MatchesStaticDeclarations) {
   EXPECT_TRUE(notMatches("void f() {}", functionDecl(isStaticStorageClass())));
 }
 
-TEST(IsDefaulted, MatchesDefaultedFunctionDeclarations) {
+TEST_P(ASTMatchersTest, IsDefaulted) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("class A { ~A(); };",
                          functionDecl(hasName("~A"), isDefaulted())));
   EXPECT_TRUE(matches("class B { ~B() = default; };",
                       functionDecl(hasName("~B"), isDefaulted())));
 }
 
-TEST(IsDeleted, MatchesDeletedFunctionDeclarations) {
+TEST_P(ASTMatchersTest, IsDeleted) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
-    notMatches("void Func();", functionDecl(hasName("Func"), isDeleted())));
+      notMatches("void Func();", functionDecl(hasName("Func"), isDeleted())));
   EXPECT_TRUE(matches("void Func() = delete;",
                       functionDecl(hasName("Func"), isDeleted())));
 }
 
-TEST(IsNoThrow, MatchesNoThrowFunctionDeclarations) {
+TEST_P(ASTMatchersTest, IsNoThrow_DynamicExceptionSpec) {
+  if (!GetParam().supportsCXXDynamicExceptionSpecification()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("void f();", functionDecl(isNoThrow())));
   EXPECT_TRUE(notMatches("void f() throw(int);", functionDecl(isNoThrow())));
-  EXPECT_TRUE(
-    notMatches("void f() noexcept(false);", functionDecl(isNoThrow())));
   EXPECT_TRUE(matches("void f() throw();", functionDecl(isNoThrow())));
-  EXPECT_TRUE(matches("void f() noexcept;", functionDecl(isNoThrow())));
 
   EXPECT_TRUE(notMatches("void f();", functionProtoType(isNoThrow())));
-  EXPECT_TRUE(notMatches("void f() throw(int);", functionProtoType(isNoThrow())));
   EXPECT_TRUE(
-    notMatches("void f() noexcept(false);", functionProtoType(isNoThrow())));
+      notMatches("void f() throw(int);", functionProtoType(isNoThrow())));
   EXPECT_TRUE(matches("void f() throw();", functionProtoType(isNoThrow())));
+}
+
+TEST_P(ASTMatchersTest, IsNoThrow_CXX11) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
+  EXPECT_TRUE(
+      notMatches("void f() noexcept(false);", functionDecl(isNoThrow())));
+  EXPECT_TRUE(matches("void f() noexcept;", functionDecl(isNoThrow())));
+
+  EXPECT_TRUE(
+      notMatches("void f() noexcept(false);", functionProtoType(isNoThrow())));
   EXPECT_TRUE(matches("void f() noexcept;", functionProtoType(isNoThrow())));
 }
 
-TEST(isConstexpr, MatchesConstexprDeclarations) {
+TEST_P(ASTMatchersTest, IsConstexpr) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("constexpr int foo = 42;",
                       varDecl(hasName("foo"), isConstexpr())));
   EXPECT_TRUE(matches("constexpr int bar();",
                       functionDecl(hasName("bar"), isConstexpr())));
-  EXPECT_TRUE(matches("void baz() { if constexpr(1 > 0) {} }",
-                      ifStmt(isConstexpr()), langCxx17OrLater()));
-  EXPECT_TRUE(notMatches("void baz() { if (1 > 0) {} }", ifStmt(isConstexpr()),
-                         langCxx17OrLater()));
 }
 
-TEST(hasInitStatement, MatchesSelectionInitializers) {
-  EXPECT_TRUE(matches("void baz() { if (int i = 1; i > 0) {} }",
-                      ifStmt(hasInitStatement(anything())),
-                      langCxx17OrLater()));
-  EXPECT_TRUE(notMatches("void baz() { if (int i = 1) {} }",
-                         ifStmt(hasInitStatement(anything()))));
+TEST_P(ASTMatchersTest, IsConstexpr_MatchesIfConstexpr) {
+  if (!GetParam().isCXX17OrLater()) {
+    return;
+  }
+
+  EXPECT_TRUE(
+      matches("void baz() { if constexpr(1 > 0) {} }", ifStmt(isConstexpr())));
+  EXPECT_TRUE(
+      notMatches("void baz() { if (1 > 0) {} }", ifStmt(isConstexpr())));
+}
+
+TEST_P(ASTMatchersTest, HasInitStatement_MatchesSelectionInitializers) {
   EXPECT_TRUE(notMatches("void baz() { if (1 > 0) {} }",
                          ifStmt(hasInitStatement(anything()))));
-  EXPECT_TRUE(
-      matches("void baz(int i) { switch (int j = i; j) { default: break; } }",
-              switchStmt(hasInitStatement(anything())), langCxx17OrLater()));
   EXPECT_TRUE(notMatches("void baz(int i) { switch (i) { default: break; } }",
                          switchStmt(hasInitStatement(anything()))));
 }
 
-TEST(hasInitStatement, MatchesRangeForInitializers) {
+TEST_P(ASTMatchersTest, HasInitStatement_MatchesSelectionInitializers_CXX) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(notMatches("void baz() { if (int i = 1) {} }",
+                         ifStmt(hasInitStatement(anything()))));
+}
+
+TEST_P(ASTMatchersTest, HasInitStatement_MatchesSelectionInitializers_CXX17) {
+  if (!GetParam().isCXX17OrLater()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("void baz() { if (int i = 1; i > 0) {} }",
+                      ifStmt(hasInitStatement(anything()))));
+  EXPECT_TRUE(
+      matches("void baz(int i) { switch (int j = i; j) { default: break; } }",
+              switchStmt(hasInitStatement(anything()))));
+}
+
+TEST_P(ASTMatchersTest, HasInitStatement_MatchesRangeForInitializers) {
+  if (!GetParam().isCXX20OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("void baz() {"
                       "int items[] = {};"
                       "for (auto &arr = items; auto &item : arr) {}"
                       "}",
-                      cxxForRangeStmt(hasInitStatement(anything())),
-                      langCxx20OrLater()));
+                      cxxForRangeStmt(hasInitStatement(anything()))));
   EXPECT_TRUE(notMatches("void baz() {"
                          "int items[] = {};"
                          "for (auto &item : items) {}"
@@ -1247,46 +1385,62 @@ TEST(hasInitStatement, MatchesRangeForInitializers) {
                          cxxForRangeStmt(hasInitStatement(anything()))));
 }
 
-TEST(TemplateArgumentCountIs, Matches) {
+TEST_P(ASTMatchersTest, TemplateArgumentCountIs) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
-    matches("template struct C {}; C c;",
-            classTemplateSpecializationDecl(templateArgumentCountIs(1))));
+      matches("template struct C {}; C c;",
+              classTemplateSpecializationDecl(templateArgumentCountIs(1))));
   EXPECT_TRUE(
-    notMatches("template struct C {}; C c;",
-               classTemplateSpecializationDecl(templateArgumentCountIs(2))));
+      notMatches("template struct C {}; C c;",
+                 classTemplateSpecializationDecl(templateArgumentCountIs(2))));
 
   EXPECT_TRUE(matches("template struct C {}; C c;",
                       templateSpecializationType(templateArgumentCountIs(1))));
   EXPECT_TRUE(
-    notMatches("template struct C {}; C c;",
-               templateSpecializationType(templateArgumentCountIs(2))));
+      notMatches("template struct C {}; C c;",
+                 templateSpecializationType(templateArgumentCountIs(2))));
 }
 
-TEST(IsIntegral, Matches) {
-  EXPECT_TRUE(matches("template struct C {}; C<42> c;",
-                      classTemplateSpecializationDecl(
-                        hasAnyTemplateArgument(isIntegral()))));
+TEST_P(ASTMatchersTest, IsIntegral) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches(
+      "template struct C {}; C<42> c;",
+      classTemplateSpecializationDecl(hasAnyTemplateArgument(isIntegral()))));
   EXPECT_TRUE(notMatches("template struct C {}; C c;",
                          classTemplateSpecializationDecl(hasAnyTemplateArgument(
-                           templateArgument(isIntegral())))));
+                             templateArgument(isIntegral())))));
 }
 
-TEST(EqualsIntegralValue, Matches) {
+TEST_P(ASTMatchersTest, EqualsIntegralValue) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("template struct C {}; C<42> c;",
                       classTemplateSpecializationDecl(
-                        hasAnyTemplateArgument(equalsIntegralValue("42")))));
+                          hasAnyTemplateArgument(equalsIntegralValue("42")))));
   EXPECT_TRUE(matches("template struct C {}; C<-42> c;",
                       classTemplateSpecializationDecl(
-                        hasAnyTemplateArgument(equalsIntegralValue("-42")))));
+                          hasAnyTemplateArgument(equalsIntegralValue("-42")))));
   EXPECT_TRUE(matches("template struct C {}; C<-0042> c;",
                       classTemplateSpecializationDecl(
-                        hasAnyTemplateArgument(equalsIntegralValue("-34")))));
+                          hasAnyTemplateArgument(equalsIntegralValue("-34")))));
   EXPECT_TRUE(notMatches("template struct C {}; C<42> c;",
                          classTemplateSpecializationDecl(hasAnyTemplateArgument(
-                           equalsIntegralValue("0042")))));
+                             equalsIntegralValue("0042")))));
 }
 
-TEST(Matcher, MatchesAccessSpecDecls) {
+TEST_P(ASTMatchersTest, AccessSpecDecl) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class C { public: int i; };", accessSpecDecl()));
   EXPECT_TRUE(
       matches("class C { public: int i; };", accessSpecDecl(isPublic())));
@@ -1298,38 +1452,58 @@ TEST(Matcher, MatchesAccessSpecDecls) {
   EXPECT_TRUE(notMatches("class C { int i; };", accessSpecDecl()));
 }
 
-TEST(Matcher, MatchesFinal) {
+TEST_P(ASTMatchersTest, IsFinal) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class X final {};", cxxRecordDecl(isFinal())));
   EXPECT_TRUE(matches("class X { virtual void f() final; };",
                       cxxMethodDecl(isFinal())));
   EXPECT_TRUE(notMatches("class X {};", cxxRecordDecl(isFinal())));
   EXPECT_TRUE(
-    notMatches("class X { virtual void f(); };", cxxMethodDecl(isFinal())));
+      notMatches("class X { virtual void f(); };", cxxMethodDecl(isFinal())));
 }
 
-TEST(Matcher, MatchesVirtualMethod) {
+TEST_P(ASTMatchersTest, IsVirtual) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class X { virtual int f(); };",
                       cxxMethodDecl(isVirtual(), hasName("::X::f"))));
   EXPECT_TRUE(notMatches("class X { int f(); };", cxxMethodDecl(isVirtual())));
 }
 
-TEST(Matcher, MatchesVirtualAsWrittenMethod) {
+TEST_P(ASTMatchersTest, IsVirtualAsWritten) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class A { virtual int f(); };"
-                        "class B : public A { int f(); };",
+                      "class B : public A { int f(); };",
                       cxxMethodDecl(isVirtualAsWritten(), hasName("::A::f"))));
   EXPECT_TRUE(
-    notMatches("class A { virtual int f(); };"
+      notMatches("class A { virtual int f(); };"
                  "class B : public A { int f(); };",
-               cxxMethodDecl(isVirtualAsWritten(), hasName("::B::f"))));
+                 cxxMethodDecl(isVirtualAsWritten(), hasName("::B::f"))));
 }
 
-TEST(Matcher, MatchesPureMethod) {
+TEST_P(ASTMatchersTest, IsPure) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class X { virtual int f() = 0; };",
                       cxxMethodDecl(isPure(), hasName("::X::f"))));
   EXPECT_TRUE(notMatches("class X { int f(); };", cxxMethodDecl(isPure())));
 }
 
-TEST(Matcher, MatchesCopyAssignmentOperator) {
+TEST_P(ASTMatchersTest, IsCopyAssignmentOperator) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   auto CopyAssignment =
       cxxMethodDecl(isCopyAssignmentOperator(), unless(isImplicit()));
   EXPECT_TRUE(matches("class X { X &operator=(X); };", CopyAssignment));
@@ -1342,7 +1516,11 @@ TEST(Matcher, MatchesCopyAssignmentOperator) {
   EXPECT_TRUE(notMatches("class X { X &operator=(X &&); };", CopyAssignment));
 }
 
-TEST(Matcher, MatchesMoveAssignmentOperator) {
+TEST_P(ASTMatchersTest, IsMoveAssignmentOperator) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   auto MoveAssignment =
       cxxMethodDecl(isMoveAssignmentOperator(), unless(isImplicit()));
   EXPECT_TRUE(notMatches("class X { X &operator=(X); };", MoveAssignment));
@@ -1356,93 +1534,108 @@ TEST(Matcher, MatchesMoveAssignmentOperator) {
   EXPECT_TRUE(notMatches("class X { X &operator=(X &); };", MoveAssignment));
 }
 
-TEST(Matcher, MatchesConstMethod) {
+TEST_P(ASTMatchersTest, IsConst) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
-    matches("struct A { void foo() const; };", cxxMethodDecl(isConst())));
+      matches("struct A { void foo() const; };", cxxMethodDecl(isConst())));
   EXPECT_TRUE(
-    notMatches("struct A { void foo(); };", cxxMethodDecl(isConst())));
+      notMatches("struct A { void foo(); };", cxxMethodDecl(isConst())));
 }
 
-TEST(Matcher, MatchesOverridingMethod) {
+TEST_P(ASTMatchersTest, IsOverride) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class X { virtual int f(); }; "
-                        "class Y : public X { int f(); };",
+                      "class Y : public X { int f(); };",
                       cxxMethodDecl(isOverride(), hasName("::Y::f"))));
   EXPECT_TRUE(notMatches("class X { virtual int f(); }; "
-                           "class Y : public X { int f(); };",
+                         "class Y : public X { int f(); };",
                          cxxMethodDecl(isOverride(), hasName("::X::f"))));
   EXPECT_TRUE(notMatches("class X { int f(); }; "
-                           "class Y : public X { int f(); };",
+                         "class Y : public X { int f(); };",
                          cxxMethodDecl(isOverride())));
   EXPECT_TRUE(notMatches("class X { int f(); int f(int); }; ",
                          cxxMethodDecl(isOverride())));
   EXPECT_TRUE(
-    matches("template  struct Y : Base { void f() override;};",
-            cxxMethodDecl(isOverride(), hasName("::Y::f"))));
+      matches("template  struct Y : Base { void f() override;};",
+              cxxMethodDecl(isOverride(), hasName("::Y::f"))));
 }
 
-TEST(Matcher, ConstructorArgument) {
+TEST_P(ASTMatchersTest, HasArgument_CXXConstructorDecl) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   auto Constructor = traverse(
       ast_type_traits::TK_AsIs,
       cxxConstructExpr(hasArgument(0, declRefExpr(to(varDecl(hasName("y")))))));
 
+  EXPECT_TRUE(matches(
+      "class X { public: X(int); }; void x() { int y; X x(y); }", Constructor));
   EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { int y; X x(y); }",
-            Constructor));
-  EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { int y; X x = X(y); }",
-            Constructor));
-  EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { int y; X x = y; }",
-            Constructor));
+      matches("class X { public: X(int); }; void x() { int y; X x = X(y); }",
+              Constructor));
   EXPECT_TRUE(
-    notMatches("class X { public: X(int); }; void x() { int z; X x(z); }",
-               Constructor));
+      matches("class X { public: X(int); }; void x() { int y; X x = y; }",
+              Constructor));
+  EXPECT_TRUE(notMatches(
+      "class X { public: X(int); }; void x() { int z; X x(z); }", Constructor));
 
   StatementMatcher WrongIndex =
       traverse(ast_type_traits::TK_AsIs,
                cxxConstructExpr(
                    hasArgument(42, declRefExpr(to(varDecl(hasName("y")))))));
-  EXPECT_TRUE(
-    notMatches("class X { public: X(int); }; void x() { int y; X x(y); }",
-               WrongIndex));
+  EXPECT_TRUE(notMatches(
+      "class X { public: X(int); }; void x() { int y; X x(y); }", WrongIndex));
 }
 
-TEST(Matcher, ConstructorArgumentCount) {
+TEST_P(ASTMatchersTest, ArgumentCountIs_CXXConstructExpr) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   auto Constructor1Arg =
       traverse(ast_type_traits::TK_AsIs, cxxConstructExpr(argumentCountIs(1)));
 
+  EXPECT_TRUE(matches("class X { public: X(int); }; void x() { X x(0); }",
+                      Constructor1Arg));
+  EXPECT_TRUE(matches("class X { public: X(int); }; void x() { X x = X(0); }",
+                      Constructor1Arg));
+  EXPECT_TRUE(matches("class X { public: X(int); }; void x() { X x = 0; }",
+                      Constructor1Arg));
   EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { X x(0); }",
-            Constructor1Arg));
-  EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { X x = X(0); }",
-            Constructor1Arg));
-  EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { X x = 0; }",
-            Constructor1Arg));
-  EXPECT_TRUE(
-    notMatches("class X { public: X(int, int); }; void x() { X x(0, 0); }",
-               Constructor1Arg));
+      notMatches("class X { public: X(int, int); }; void x() { X x(0, 0); }",
+                 Constructor1Arg));
 }
 
-TEST(Matcher, ConstructorListInitialization) {
+TEST_P(ASTMatchersTest, IsListInitialization) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   auto ConstructorListInit =
       traverse(ast_type_traits::TK_AsIs,
                varDecl(has(cxxConstructExpr(isListInitialization()))));
 
-  EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { X x{0}; }",
-            ConstructorListInit));
-  EXPECT_FALSE(
-    matches("class X { public: X(int); }; void x() { X x(0); }",
-            ConstructorListInit));
+  EXPECT_TRUE(matches("class X { public: X(int); }; void x() { X x{0}; }",
+                      ConstructorListInit));
+  EXPECT_FALSE(matches("class X { public: X(int); }; void x() { X x(0); }",
+                       ConstructorListInit));
 }
 
-TEST(ConstructorDeclaration, IsImplicit) {
+TEST_P(ASTMatchersTest, IsImplicit_CXXConstructorDecl) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   // This one doesn't match because the constructor is not added by the
   // compiler (it is not needed).
-  EXPECT_TRUE(notMatches("class Foo { };",
-                         cxxConstructorDecl(isImplicit())));
+  EXPECT_TRUE(notMatches("class Foo { };", cxxConstructorDecl(isImplicit())));
   // The compiler added the implicit default constructor.
   EXPECT_TRUE(matches("class Foo { }; Foo* f = new Foo();",
                       cxxConstructorDecl(isImplicit())));
@@ -1453,44 +1646,68 @@ TEST(ConstructorDeclaration, IsImplicit) {
                       cxxMethodDecl(isImplicit(), hasName("operator="))));
 }
 
-TEST(ConstructorDeclaration, IsExplicit) {
+TEST_P(ASTMatchersTest, IsExplicit_CXXConstructorDecl) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("struct S { explicit S(int); };",
                       cxxConstructorDecl(isExplicit())));
-  EXPECT_TRUE(notMatches("struct S { S(int); };",
-                         cxxConstructorDecl(isExplicit())));
+  EXPECT_TRUE(
+      notMatches("struct S { S(int); };", cxxConstructorDecl(isExplicit())));
+}
+
+TEST_P(ASTMatchersTest, IsExplicit_CXXConstructorDecl_CXX20) {
+  if (!GetParam().isCXX20OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template struct S { explicit(b) S(int);};",
-                         cxxConstructorDecl(isExplicit()), langCxx20OrLater()));
+                         cxxConstructorDecl(isExplicit())));
   EXPECT_TRUE(matches("struct S { explicit(true) S(int);};",
-                      cxxConstructorDecl(isExplicit()), langCxx20OrLater()));
+                      cxxConstructorDecl(isExplicit())));
   EXPECT_TRUE(notMatches("struct S { explicit(false) S(int);};",
-                         cxxConstructorDecl(isExplicit()), langCxx20OrLater()));
+                         cxxConstructorDecl(isExplicit())));
 }
 
-TEST(DeductionGuideDeclaration, IsExplicit) {
+TEST_P(ASTMatchersTest, IsExplicit_CXXDeductionGuideDecl) {
+  if (!GetParam().isCXX17OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template struct S { S(int);};"
                          "S(int) -> S;",
-                         cxxDeductionGuideDecl(isExplicit()),
-                         langCxx17OrLater()));
+                         cxxDeductionGuideDecl(isExplicit())));
   EXPECT_TRUE(matches("template struct S { S(int);};"
                       "explicit S(int) -> S;",
-                      cxxDeductionGuideDecl(isExplicit()), langCxx17OrLater()));
+                      cxxDeductionGuideDecl(isExplicit())));
+}
+
+TEST_P(ASTMatchersTest, IsExplicit_CXXDeductionGuideDecl_CXX20) {
+  if (!GetParam().isCXX20OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("template struct S { S(int);};"
                       "explicit(true) S(int) -> S;",
-                      cxxDeductionGuideDecl(isExplicit()), langCxx20OrLater()));
+                      cxxDeductionGuideDecl(isExplicit())));
   EXPECT_TRUE(notMatches("template struct S { S(int);};"
                          "explicit(false) S(int) -> S;",
-                         cxxDeductionGuideDecl(isExplicit()),
-                         langCxx20OrLater()));
+                         cxxDeductionGuideDecl(isExplicit())));
   EXPECT_TRUE(
       notMatches("template struct S { S(int);};"
                  "template explicit(b) S(int) -> S;",
-                 cxxDeductionGuideDecl(isExplicit()), langCxx20OrLater()));
+                 cxxDeductionGuideDecl(isExplicit())));
 }
 
-TEST(ConstructorDeclaration, Kinds) {
-  EXPECT_TRUE(matches(
-      "struct S { S(); };",
-      cxxConstructorDecl(isDefaultConstructor(), unless(isImplicit()))));
+TEST_P(ASTMatchersTest, CXXConstructorDecl_Kinds) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(
+      matches("struct S { S(); };", cxxConstructorDecl(isDefaultConstructor(),
+                                                       unless(isImplicit()))));
   EXPECT_TRUE(notMatches(
       "struct S { S(); };",
       cxxConstructorDecl(isCopyConstructor(), unless(isImplicit()))));
@@ -1501,9 +1718,9 @@ TEST(ConstructorDeclaration, Kinds) {
   EXPECT_TRUE(notMatches(
       "struct S { S(const S&); };",
       cxxConstructorDecl(isDefaultConstructor(), unless(isImplicit()))));
-  EXPECT_TRUE(matches(
-      "struct S { S(const S&); };",
-      cxxConstructorDecl(isCopyConstructor(), unless(isImplicit()))));
+  EXPECT_TRUE(
+      matches("struct S { S(const S&); };",
+              cxxConstructorDecl(isCopyConstructor(), unless(isImplicit()))));
   EXPECT_TRUE(notMatches(
       "struct S { S(const S&); };",
       cxxConstructorDecl(isMoveConstructor(), unless(isImplicit()))));
@@ -1514,12 +1731,16 @@ TEST(ConstructorDeclaration, Kinds) {
   EXPECT_TRUE(notMatches(
       "struct S { S(S&&); };",
       cxxConstructorDecl(isCopyConstructor(), unless(isImplicit()))));
-  EXPECT_TRUE(matches(
-      "struct S { S(S&&); };",
-      cxxConstructorDecl(isMoveConstructor(), unless(isImplicit()))));
+  EXPECT_TRUE(
+      matches("struct S { S(S&&); };",
+              cxxConstructorDecl(isMoveConstructor(), unless(isImplicit()))));
 }
 
-TEST(ConstructorDeclaration, IsUserProvided) {
+TEST_P(ASTMatchersTest, IsUserProvided) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("struct S { int X = 0; };",
                          cxxConstructorDecl(isUserProvided())));
   EXPECT_TRUE(notMatches("struct S { S() = default; };",
@@ -1527,36 +1748,53 @@ TEST(ConstructorDeclaration, IsUserProvided) {
   EXPECT_TRUE(notMatches("struct S { S() = delete; };",
                          cxxConstructorDecl(isUserProvided())));
   EXPECT_TRUE(
-    matches("struct S { S(); };", cxxConstructorDecl(isUserProvided())));
+      matches("struct S { S(); };", cxxConstructorDecl(isUserProvided())));
   EXPECT_TRUE(matches("struct S { S(); }; S::S(){}",
                       cxxConstructorDecl(isUserProvided())));
 }
 
-TEST(ConstructorDeclaration, IsDelegatingConstructor) {
+TEST_P(ASTMatchersTest, IsDelegatingConstructor) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("struct S { S(); S(int); int X; };",
                          cxxConstructorDecl(isDelegatingConstructor())));
   EXPECT_TRUE(notMatches("struct S { S(){} S(int X) : X(X) {} int X; };",
                          cxxConstructorDecl(isDelegatingConstructor())));
   EXPECT_TRUE(matches(
-    "struct S { S() : S(0) {} S(int X) : X(X) {} int X; };",
-    cxxConstructorDecl(isDelegatingConstructor(), parameterCountIs(0))));
+      "struct S { S() : S(0) {} S(int X) : X(X) {} int X; };",
+      cxxConstructorDecl(isDelegatingConstructor(), parameterCountIs(0))));
   EXPECT_TRUE(matches(
-    "struct S { S(); S(int X); int X; }; S::S(int X) : S() {}",
-    cxxConstructorDecl(isDelegatingConstructor(), parameterCountIs(1))));
+      "struct S { S(); S(int X); int X; }; S::S(int X) : S() {}",
+      cxxConstructorDecl(isDelegatingConstructor(), parameterCountIs(1))));
 }
 
-TEST(StringLiteral, HasSize) {
+TEST_P(ASTMatchersTest, HasSize) {
   StatementMatcher Literal = stringLiteral(hasSize(4));
   EXPECT_TRUE(matches("const char *s = \"abcd\";", Literal));
-  // wide string
-  EXPECT_TRUE(matches("const wchar_t *s = L\"abcd\";", Literal));
   // with escaped characters
   EXPECT_TRUE(matches("const char *s = \"\x05\x06\x07\x08\";", Literal));
   // no matching, too small
   EXPECT_TRUE(notMatches("const char *s = \"ab\";", Literal));
 }
 
-TEST(Matcher, HasNameSupportsNamespaces) {
+TEST_P(ASTMatchersTest, HasSize_CXX) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Fix this test to also work in non-C++ language modes.
+    return;
+  }
+
+  StatementMatcher Literal = stringLiteral(hasSize(4));
+  // wide string
+  EXPECT_TRUE(matches("const wchar_t *s = L\"abcd\";", Literal));
+}
+
+TEST_P(ASTMatchersTest, HasName_MatchesNamespaces) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("namespace a { namespace b { class C; } }",
                       recordDecl(hasName("a::b::C"))));
   EXPECT_TRUE(matches("namespace a { namespace b { class C; } }",
@@ -1583,42 +1821,40 @@ TEST(Matcher, HasNameSupportsNamespaces) {
                          recordDecl(hasName("C"))));
 }
 
-TEST(Matcher, HasNameSupportsOuterClasses) {
-  EXPECT_TRUE(
-    matches("class A { class B { class C; }; };",
-            recordDecl(hasName("A::B::C"))));
-  EXPECT_TRUE(
-    matches("class A { class B { class C; }; };",
-            recordDecl(hasName("::A::B::C"))));
-  EXPECT_TRUE(
-    matches("class A { class B { class C; }; };",
-            recordDecl(hasName("B::C"))));
-  EXPECT_TRUE(
-    matches("class A { class B { class C; }; };",
-            recordDecl(hasName("C"))));
-  EXPECT_TRUE(
-    notMatches("class A { class B { class C; }; };",
-               recordDecl(hasName("c::B::C"))));
-  EXPECT_TRUE(
-    notMatches("class A { class B { class C; }; };",
-               recordDecl(hasName("A::c::C"))));
-  EXPECT_TRUE(
-    notMatches("class A { class B { class C; }; };",
-               recordDecl(hasName("A::B::A"))));
-  EXPECT_TRUE(
-    notMatches("class A { class B { class C; }; };",
-               recordDecl(hasName("::C"))));
+TEST_P(ASTMatchersTest, HasName_MatchesOuterClasses) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("class A { class B { class C; }; };",
+                      recordDecl(hasName("A::B::C"))));
+  EXPECT_TRUE(matches("class A { class B { class C; }; };",
+                      recordDecl(hasName("::A::B::C"))));
+  EXPECT_TRUE(matches("class A { class B { class C; }; };",
+                      recordDecl(hasName("B::C"))));
   EXPECT_TRUE(
-    notMatches("class A { class B { class C; }; };",
-               recordDecl(hasName("::B::C"))));
+      matches("class A { class B { class C; }; };", recordDecl(hasName("C"))));
+  EXPECT_TRUE(notMatches("class A { class B { class C; }; };",
+                         recordDecl(hasName("c::B::C"))));
+  EXPECT_TRUE(notMatches("class A { class B { class C; }; };",
+                         recordDecl(hasName("A::c::C"))));
+  EXPECT_TRUE(notMatches("class A { class B { class C; }; };",
+                         recordDecl(hasName("A::B::A"))));
+  EXPECT_TRUE(notMatches("class A { class B { class C; }; };",
+                         recordDecl(hasName("::C"))));
+  EXPECT_TRUE(notMatches("class A { class B { class C; }; };",
+                         recordDecl(hasName("::B::C"))));
   EXPECT_TRUE(notMatches("class A { class B { class C; }; };",
                          recordDecl(hasName("z::A::B::C"))));
-  EXPECT_TRUE(
-    notMatches("class A { class B { class C; }; };",
-               recordDecl(hasName("A+B::C"))));
+  EXPECT_TRUE(notMatches("class A { class B { class C; }; };",
+                         recordDecl(hasName("A+B::C"))));
 }
 
-TEST(Matcher, HasNameSupportsInlinedNamespaces) {
+TEST_P(ASTMatchersTest, HasName_MatchesInlinedNamespaces) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   StringRef code = "namespace a { inline namespace b { class C; } }";
   EXPECT_TRUE(matches(code, recordDecl(hasName("a::b::C"))));
   EXPECT_TRUE(matches(code, recordDecl(hasName("a::C"))));
@@ -1626,17 +1862,25 @@ TEST(Matcher, HasNameSupportsInlinedNamespaces) {
   EXPECT_TRUE(matches(code, recordDecl(hasName("::a::C"))));
 }
 
-TEST(Matcher, HasNameSupportsAnonymousNamespaces) {
+TEST_P(ASTMatchersTest, HasName_MatchesAnonymousNamespaces) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   StringRef code = "namespace a { namespace { class C; } }";
   EXPECT_TRUE(
-    matches(code, recordDecl(hasName("a::(anonymous namespace)::C"))));
+      matches(code, recordDecl(hasName("a::(anonymous namespace)::C"))));
   EXPECT_TRUE(matches(code, recordDecl(hasName("a::C"))));
   EXPECT_TRUE(
-    matches(code, recordDecl(hasName("::a::(anonymous namespace)::C"))));
+      matches(code, recordDecl(hasName("::a::(anonymous namespace)::C"))));
   EXPECT_TRUE(matches(code, recordDecl(hasName("::a::C"))));
 }
 
-TEST(Matcher, HasNameSupportsAnonymousOuterClasses) {
+TEST_P(ASTMatchersTest, HasName_MatchesAnonymousOuterClasses) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class A { class { class C; } x; };",
                       recordDecl(hasName("A::(anonymous class)::C"))));
   EXPECT_TRUE(matches("class A { class { class C; } x; };",
@@ -1651,7 +1895,11 @@ TEST(Matcher, HasNameSupportsAnonymousOuterClasses) {
                        recordDecl(hasName("::A::C"))));
 }
 
-TEST(Matcher, HasNameSupportsFunctionScope) {
+TEST_P(ASTMatchersTest, HasName_MatchesFunctionScope) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   StringRef code =
       "namespace a { void F(int a) { struct S { int m; }; int i; } }";
   EXPECT_TRUE(matches(code, varDecl(hasName("i"))));
@@ -1664,7 +1912,11 @@ TEST(Matcher, HasNameSupportsFunctionScope) {
   EXPECT_TRUE(matches(code, fieldDecl(hasName("::a::F(int)::S::m"))));
 }
 
-TEST(Matcher, HasNameQualifiedSupportsLinkage) {
+TEST_P(ASTMatchersTest, HasName_QualifiedStringMatchesThroughLinkage) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   // https://bugs.llvm.org/show_bug.cgi?id=42193
   StringRef code = R"cpp(namespace foo { extern "C" void test(); })cpp";
   EXPECT_TRUE(matches(code, functionDecl(hasName("test"))));
@@ -1679,7 +1931,12 @@ TEST(Matcher, HasNameQualifiedSupportsLinkage) {
   EXPECT_TRUE(notMatches(code, functionDecl(hasName("::test"))));
 }
 
-TEST(Matcher, HasAnyName) {
+TEST_P(ASTMatchersTest, HasAnyName) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `hasAnyName()` that does not depend on C++.
+    return;
+  }
+
   StringRef Code = "namespace a { namespace b { class C; } }";
 
   EXPECT_TRUE(matches(Code, recordDecl(hasAnyName("XX", "a::b::C"))));
@@ -1689,38 +1946,49 @@ TEST(Matcher, HasAnyName) {
 
   EXPECT_TRUE(notMatches(Code, recordDecl(hasAnyName("::C", "::b::C"))));
   EXPECT_TRUE(
-    matches(Code, recordDecl(hasAnyName("::C", "::b::C", "::a::b::C"))));
+      matches(Code, recordDecl(hasAnyName("::C", "::b::C", "::a::b::C"))));
 
   std::vector Names = {"::C", "::b::C", "::a::b::C"};
   EXPECT_TRUE(matches(Code, recordDecl(hasAnyName(Names))));
 }
 
-TEST(Matcher, IsDefinition) {
+TEST_P(ASTMatchersTest, IsDefinition) {
   DeclarationMatcher DefinitionOfClassA =
-    recordDecl(hasName("A"), isDefinition());
-  EXPECT_TRUE(matches("class A {};", DefinitionOfClassA));
-  EXPECT_TRUE(notMatches("class A;", DefinitionOfClassA));
+      recordDecl(hasName("A"), isDefinition());
+  EXPECT_TRUE(matches("struct A {};", DefinitionOfClassA));
+  EXPECT_TRUE(notMatches("struct A;", DefinitionOfClassA));
 
   DeclarationMatcher DefinitionOfVariableA =
-    varDecl(hasName("a"), isDefinition());
+      varDecl(hasName("a"), isDefinition());
   EXPECT_TRUE(matches("int a;", DefinitionOfVariableA));
   EXPECT_TRUE(notMatches("extern int a;", DefinitionOfVariableA));
+}
+
+TEST_P(ASTMatchersTest, IsDefinition_CXX) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
 
   DeclarationMatcher DefinitionOfMethodA =
-    cxxMethodDecl(hasName("a"), isDefinition());
+      cxxMethodDecl(hasName("a"), isDefinition());
   EXPECT_TRUE(matches("class A { void a() {} };", DefinitionOfMethodA));
   EXPECT_TRUE(notMatches("class A { void a(); };", DefinitionOfMethodA));
 
   DeclarationMatcher DefinitionOfObjCMethodA =
-    objcMethodDecl(hasName("a"), isDefinition());
+      objcMethodDecl(hasName("a"), isDefinition());
   EXPECT_TRUE(matchesObjC("@interface A @end "
                           "@implementation A; -(void)a {} @end",
                           DefinitionOfObjCMethodA));
-  EXPECT_TRUE(notMatchesObjC("@interface A; - (void)a; @end",
-                             DefinitionOfObjCMethodA));
+  EXPECT_TRUE(
+      notMatchesObjC("@interface A; - (void)a; @end", DefinitionOfObjCMethodA));
 }
 
-TEST(Matcher, HandlesNullQualTypes) {
+TEST_P(ASTMatchersTest, HandlesNullQualTypes) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add an equivalent test that does not depend on C++.
+    return;
+  }
+
   // FIXME: Add a Type matcher so we can replace uses of this
   // variable with Type(True())
   const TypeMatcher AnyType = anything();
@@ -1728,7 +1996,7 @@ TEST(Matcher, HandlesNullQualTypes) {
   // We don't really care whether this matcher succeeds; we're testing that
   // it completes without crashing.
   EXPECT_TRUE(matches(
-    "struct A { };"
+      "struct A { };"
       "template "
       "void f(T t) {"
       "  T local_t(t /* this becomes a null QualType in the AST */);"
@@ -1736,52 +2004,45 @@ TEST(Matcher, HandlesNullQualTypes) {
       "void g() {"
       "  f(0);"
       "}",
-    expr(hasType(TypeMatcher(
-      anyOf(
-        TypeMatcher(hasDeclaration(anything())),
-        pointsTo(AnyType),
-        references(AnyType)
-        // Other QualType matchers should go here.
-      ))))));
+      expr(hasType(TypeMatcher(anyOf(TypeMatcher(hasDeclaration(anything())),
+                                     pointsTo(AnyType), references(AnyType)
+                                     // Other QualType matchers should go here.
+                                     ))))));
 }
 
-TEST(ObjCIvarRefExprMatcher, IvarExpr) {
+TEST_P(ASTMatchersTest, ObjCIvarRefExpr) {
   StringRef ObjCString =
       "@interface A @end "
       "@implementation A { A *x; } - (void) func { x = 0; } @end";
   EXPECT_TRUE(matchesObjC(ObjCString, objcIvarRefExpr()));
-  EXPECT_TRUE(matchesObjC(ObjCString, objcIvarRefExpr(
-        hasDeclaration(namedDecl(hasName("x"))))));
-  EXPECT_FALSE(matchesObjC(ObjCString, objcIvarRefExpr(
-        hasDeclaration(namedDecl(hasName("y"))))));
+  EXPECT_TRUE(matchesObjC(
+      ObjCString, objcIvarRefExpr(hasDeclaration(namedDecl(hasName("x"))))));
+  EXPECT_FALSE(matchesObjC(
+      ObjCString, objcIvarRefExpr(hasDeclaration(namedDecl(hasName("y"))))));
 }
 
-TEST(BlockExprMatcher, BlockExpr) {
+TEST_P(ASTMatchersTest, BlockExpr) {
   EXPECT_TRUE(matchesObjC("void f() { ^{}(); }", blockExpr()));
 }
 
-TEST(StatementCountIs, FindsNoStatementsInAnEmptyCompoundStatement) {
-  EXPECT_TRUE(matches("void f() { }",
-                      compoundStmt(statementCountIs(0))));
-  EXPECT_TRUE(notMatches("void f() {}",
-                         compoundStmt(statementCountIs(1))));
+TEST_P(ASTMatchersTest,
+       StatementCountIs_FindsNoStatementsInAnEmptyCompoundStatement) {
+  EXPECT_TRUE(matches("void f() { }", compoundStmt(statementCountIs(0))));
+  EXPECT_TRUE(notMatches("void f() {}", compoundStmt(statementCountIs(1))));
 }
 
-TEST(StatementCountIs, AppearsToMatchOnlyOneCount) {
-  EXPECT_TRUE(matches("void f() { 1; }",
-                      compoundStmt(statementCountIs(1))));
-  EXPECT_TRUE(notMatches("void f() { 1; }",
-                         compoundStmt(statementCountIs(0))));
-  EXPECT_TRUE(notMatches("void f() { 1; }",
-                         compoundStmt(statementCountIs(2))));
+TEST_P(ASTMatchersTest, StatementCountIs_AppearsToMatchOnlyOneCount) {
+  EXPECT_TRUE(matches("void f() { 1; }", compoundStmt(statementCountIs(1))));
+  EXPECT_TRUE(notMatches("void f() { 1; }", compoundStmt(statementCountIs(0))));
+  EXPECT_TRUE(notMatches("void f() { 1; }", compoundStmt(statementCountIs(2))));
 }
 
-TEST(StatementCountIs, WorksWithMultipleStatements) {
-  EXPECT_TRUE(matches("void f() { 1; 2; 3; }",
-                      compoundStmt(statementCountIs(3))));
+TEST_P(ASTMatchersTest, StatementCountIs_WorksWithMultipleStatements) {
+  EXPECT_TRUE(
+      matches("void f() { 1; 2; 3; }", compoundStmt(statementCountIs(3))));
 }
 
-TEST(StatementCountIs, WorksWithNestedCompoundStatements) {
+TEST_P(ASTMatchersTest, StatementCountIs_WorksWithNestedCompoundStatements) {
   EXPECT_TRUE(matches("void f() { { 1; } { 1; 2; 3; 4; } }",
                       compoundStmt(statementCountIs(1))));
   EXPECT_TRUE(matches("void f() { { 1; } { 1; 2; 3; 4; } }",
@@ -1792,45 +2053,62 @@ TEST(StatementCountIs, WorksWithNestedCompoundStatements) {
                       compoundStmt(statementCountIs(4))));
 }
 
-TEST(Member, WorksInSimplestCase) {
+TEST_P(ASTMatchersTest, Member_WorksInSimplestCase) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `member()` that does not depend on C++.
+    return;
+  }
   EXPECT_TRUE(matches("struct { int first; } s; int i(s.first);",
                       memberExpr(member(hasName("first")))));
 }
 
-TEST(Member, DoesNotMatchTheBaseExpression) {
+TEST_P(ASTMatchersTest, Member_DoesNotMatchTheBaseExpression) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `member()` that does not depend on C++.
+    return;
+  }
+
   // Don't pick out the wrong part of the member expression, this should
   // be checking the member (name) only.
   EXPECT_TRUE(notMatches("struct { int i; } first; int i(first.i);",
                          memberExpr(member(hasName("first")))));
 }
 
-TEST(Member, MatchesInMemberFunctionCall) {
+TEST_P(ASTMatchersTest, Member_MatchesInMemberFunctionCall) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("void f() {"
-                        "  struct { void first() {}; } s;"
-                        "  s.first();"
-                        "};",
+                      "  struct { void first() {}; } s;"
+                      "  s.first();"
+                      "};",
                       memberExpr(member(hasName("first")))));
 }
 
-TEST(Member, MatchesMember) {
-  EXPECT_TRUE(matches(
-    "struct A { int i; }; void f() { A a; a.i = 2; }",
-    memberExpr(hasDeclaration(fieldDecl(hasType(isInteger()))))));
-  EXPECT_TRUE(notMatches(
-    "struct A { float f; }; void f() { A a; a.f = 2.0f; }",
-    memberExpr(hasDeclaration(fieldDecl(hasType(isInteger()))))));
+TEST_P(ASTMatchersTest, FieldDecl) {
+  EXPECT_TRUE(
+      matches("struct A { int i; }; void f() { struct A a; a.i = 2; }",
+              memberExpr(hasDeclaration(fieldDecl(hasType(isInteger()))))));
+  EXPECT_TRUE(
+      notMatches("struct A { float f; }; void f() { struct A a; a.f = 2.0f; }",
+                 memberExpr(hasDeclaration(fieldDecl(hasType(isInteger()))))));
 }
 
-TEST(Member, BitFields) {
-  EXPECT_TRUE(matches("class C { int a : 2; int b; };",
+TEST_P(ASTMatchersTest, IsBitField) {
+  EXPECT_TRUE(matches("struct C { int a : 2; int b; };",
                       fieldDecl(isBitField(), hasName("a"))));
-  EXPECT_TRUE(notMatches("class C { int a : 2; int b; };",
+  EXPECT_TRUE(notMatches("struct C { int a : 2; int b; };",
                          fieldDecl(isBitField(), hasName("b"))));
-  EXPECT_TRUE(matches("class C { int a : 2; int b : 4; };",
+  EXPECT_TRUE(matches("struct C { int a : 2; int b : 4; };",
                       fieldDecl(isBitField(), hasBitWidth(2), hasName("a"))));
 }
 
-TEST(Member, InClassInitializer) {
+TEST_P(ASTMatchersTest, HasInClassInitializer) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
       matches("class C { int a = 2; int b; };",
               fieldDecl(hasInClassInitializer(integerLiteral(equals(2))),
@@ -1840,27 +2118,31 @@ TEST(Member, InClassInitializer) {
                  fieldDecl(hasInClassInitializer(anything()), hasName("b"))));
 }
 
-TEST(Member, UnderstandsAccess) {
-  EXPECT_TRUE(matches(
-    "struct A { int i; };", fieldDecl(isPublic(), hasName("i"))));
-  EXPECT_TRUE(notMatches(
-    "struct A { int i; };", fieldDecl(isProtected(), hasName("i"))));
-  EXPECT_TRUE(notMatches(
-    "struct A { int i; };", fieldDecl(isPrivate(), hasName("i"))));
+TEST_P(ASTMatchersTest, IsPublic_IsProtected_IsPrivate) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
 
-  EXPECT_TRUE(notMatches(
-    "class A { int i; };", fieldDecl(isPublic(), hasName("i"))));
-  EXPECT_TRUE(notMatches(
-    "class A { int i; };", fieldDecl(isProtected(), hasName("i"))));
-  EXPECT_TRUE(matches(
-    "class A { int i; };", fieldDecl(isPrivate(), hasName("i"))));
+  EXPECT_TRUE(
+      matches("struct A { int i; };", fieldDecl(isPublic(), hasName("i"))));
+  EXPECT_TRUE(notMatches("struct A { int i; };",
+                         fieldDecl(isProtected(), hasName("i"))));
+  EXPECT_TRUE(
+      notMatches("struct A { int i; };", fieldDecl(isPrivate(), hasName("i"))));
 
-  EXPECT_TRUE(notMatches(
-    "class A { protected: int i; };", fieldDecl(isPublic(), hasName("i"))));
+  EXPECT_TRUE(
+      notMatches("class A { int i; };", fieldDecl(isPublic(), hasName("i"))));
+  EXPECT_TRUE(notMatches("class A { int i; };",
+                         fieldDecl(isProtected(), hasName("i"))));
+  EXPECT_TRUE(
+      matches("class A { int i; };", fieldDecl(isPrivate(), hasName("i"))));
+
+  EXPECT_TRUE(notMatches("class A { protected: int i; };",
+                         fieldDecl(isPublic(), hasName("i"))));
   EXPECT_TRUE(matches("class A { protected: int i; };",
                       fieldDecl(isProtected(), hasName("i"))));
-  EXPECT_TRUE(notMatches(
-    "class A { protected: int i; };", fieldDecl(isPrivate(), hasName("i"))));
+  EXPECT_TRUE(notMatches("class A { protected: int i; };",
+                         fieldDecl(isPrivate(), hasName("i"))));
 
   // Non-member decls have the AccessSpecifier AS_none and thus aren't matched.
   EXPECT_TRUE(notMatches("int i;", varDecl(isPublic(), hasName("i"))));
@@ -1868,50 +2150,76 @@ TEST(Member, UnderstandsAccess) {
   EXPECT_TRUE(notMatches("int i;", varDecl(isPrivate(), hasName("i"))));
 }
 
-TEST(hasDynamicExceptionSpec, MatchesDynamicExceptionSpecifications) {
-  EXPECT_TRUE(notMatches("void f();", functionDecl(hasDynamicExceptionSpec())));
+TEST_P(ASTMatchersTest,
+       HasDynamicExceptionSpec_MatchesDynamicExceptionSpecifications) {
+  if (!GetParam().supportsCXXDynamicExceptionSpecification()) {
+    return;
+  }
+
+  EXPECT_TRUE(notMatches("void f();", functionDecl(hasDynamicExceptionSpec())));
+  EXPECT_TRUE(
+      matches("void j() throw();", functionDecl(hasDynamicExceptionSpec())));
+  EXPECT_TRUE(
+      matches("void k() throw(int);", functionDecl(hasDynamicExceptionSpec())));
+  EXPECT_TRUE(
+      matches("void l() throw(...);", functionDecl(hasDynamicExceptionSpec())));
+
+  EXPECT_TRUE(
+      notMatches("void f();", functionProtoType(hasDynamicExceptionSpec())));
+  EXPECT_TRUE(matches("void j() throw();",
+                      functionProtoType(hasDynamicExceptionSpec())));
+  EXPECT_TRUE(matches("void k() throw(int);",
+                      functionProtoType(hasDynamicExceptionSpec())));
+  EXPECT_TRUE(matches("void l() throw(...);",
+                      functionProtoType(hasDynamicExceptionSpec())));
+}
+
+TEST_P(ASTMatchersTest,
+       HasDynamicExceptionSpec_MatchesDynamicExceptionSpecifications_CXX11) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("void g() noexcept;",
                          functionDecl(hasDynamicExceptionSpec())));
   EXPECT_TRUE(notMatches("void h() noexcept(true);",
                          functionDecl(hasDynamicExceptionSpec())));
   EXPECT_TRUE(notMatches("void i() noexcept(false);",
                          functionDecl(hasDynamicExceptionSpec())));
-  EXPECT_TRUE(
-      matches("void j() throw();", functionDecl(hasDynamicExceptionSpec())));
-  EXPECT_TRUE(
-      matches("void k() throw(int);", functionDecl(hasDynamicExceptionSpec())));
-  EXPECT_TRUE(
-      matches("void l() throw(...);", functionDecl(hasDynamicExceptionSpec())));
 
-  EXPECT_TRUE(notMatches("void f();", functionProtoType(hasDynamicExceptionSpec())));
   EXPECT_TRUE(notMatches("void g() noexcept;",
                          functionProtoType(hasDynamicExceptionSpec())));
   EXPECT_TRUE(notMatches("void h() noexcept(true);",
                          functionProtoType(hasDynamicExceptionSpec())));
   EXPECT_TRUE(notMatches("void i() noexcept(false);",
                          functionProtoType(hasDynamicExceptionSpec())));
-  EXPECT_TRUE(
-      matches("void j() throw();", functionProtoType(hasDynamicExceptionSpec())));
-  EXPECT_TRUE(
-      matches("void k() throw(int);", functionProtoType(hasDynamicExceptionSpec())));
-  EXPECT_TRUE(
-      matches("void l() throw(...);", functionProtoType(hasDynamicExceptionSpec())));
 }
 
-TEST(HasObjectExpression, DoesNotMatchMember) {
+TEST_P(ASTMatchersTest, HasObjectExpression_DoesNotMatchMember) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches(
-    "class X {}; struct Z { X m; }; void f(Z z) { z.m; }",
-    memberExpr(hasObjectExpression(hasType(recordDecl(hasName("X")))))));
+      "class X {}; struct Z { X m; }; void f(Z z) { z.m; }",
+      memberExpr(hasObjectExpression(hasType(recordDecl(hasName("X")))))));
 }
 
-TEST(HasObjectExpression, MatchesBaseOfVariable) {
+TEST_P(ASTMatchersTest, HasObjectExpression_MatchesBaseOfVariable) {
   EXPECT_TRUE(matches(
-    "struct X { int m; }; void f(X x) { x.m; }",
-    memberExpr(hasObjectExpression(hasType(recordDecl(hasName("X")))))));
-  EXPECT_TRUE(matches(
-    "struct X { int m; }; void f(X* x) { x->m; }",
-    memberExpr(hasObjectExpression(
-      hasType(pointsTo(recordDecl(hasName("X"))))))));
+      "struct X { int m; }; void f(struct X x) { x.m; }",
+      memberExpr(hasObjectExpression(hasType(recordDecl(hasName("X")))))));
+  EXPECT_TRUE(matches("struct X { int m; }; void f(struct X* x) { x->m; }",
+                      memberExpr(hasObjectExpression(
+                          hasType(pointsTo(recordDecl(hasName("X"))))))));
+}
+
+TEST_P(ASTMatchersTest, HasObjectExpression_MatchesBaseOfVariable_CXX) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("template  struct X { void f() { T t; t.m; } };",
                       cxxDependentScopeMemberExpr(hasObjectExpression(
                           declRefExpr(to(namedDecl(hasName("t"))))))));
@@ -1921,10 +2229,22 @@ TEST(HasObjectExpression, MatchesBaseOfVariable) {
                   declRefExpr(to(namedDecl(hasName("t"))))))));
 }
 
-TEST(HasObjectExpression, MatchesBaseOfMemberFunc) {
+TEST_P(ASTMatchersTest, HasObjectExpression_MatchesBaseOfMemberFunc) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches(
       "struct X { void f(); }; void g(X x) { x.f(); }",
       memberExpr(hasObjectExpression(hasType(recordDecl(hasName("X")))))));
+}
+
+TEST_P(ASTMatchersTest, HasObjectExpression_MatchesBaseOfMemberFunc_Template) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("struct X { template  void f(); };"
                       "template  void g(X x) { x.f(); }",
                       unresolvedMemberExpr(hasObjectExpression(
@@ -1934,98 +2254,101 @@ TEST(HasObjectExpression, MatchesBaseOfMemberFunc) {
                           declRefExpr(to(namedDecl(hasName("t"))))))));
 }
 
-TEST(HasObjectExpression,
-     MatchesObjectExpressionOfImplicitlyFormedMemberExpression) {
-  EXPECT_TRUE(matches(
-    "class X {}; struct S { X m; void f() { this->m; } };",
-    memberExpr(hasObjectExpression(
-      hasType(pointsTo(recordDecl(hasName("S"))))))));
-  EXPECT_TRUE(matches(
-    "class X {}; struct S { X m; void f() { m; } };",
-    memberExpr(hasObjectExpression(
-      hasType(pointsTo(recordDecl(hasName("S"))))))));
+TEST_P(ASTMatchersTest, HasObjectExpression_ImplicitlyFormedMemberExpression) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("class X {}; struct S { X m; void f() { this->m; } };",
+                      memberExpr(hasObjectExpression(
+                          hasType(pointsTo(recordDecl(hasName("S"))))))));
+  EXPECT_TRUE(matches("class X {}; struct S { X m; void f() { m; } };",
+                      memberExpr(hasObjectExpression(
+                          hasType(pointsTo(recordDecl(hasName("S"))))))));
 }
 
-TEST(Field, DoesNotMatchNonFieldMembers) {
+TEST_P(ASTMatchersTest, FieldDecl_DoesNotMatchNonFieldMembers) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("class X { void m(); };", fieldDecl(hasName("m"))));
   EXPECT_TRUE(notMatches("class X { class m {}; };", fieldDecl(hasName("m"))));
   EXPECT_TRUE(notMatches("class X { enum { m }; };", fieldDecl(hasName("m"))));
   EXPECT_TRUE(notMatches("class X { enum m {}; };", fieldDecl(hasName("m"))));
 }
 
-TEST(Field, MatchesField) {
-  EXPECT_TRUE(matches("class X { int m; };", fieldDecl(hasName("m"))));
+TEST_P(ASTMatchersTest, FieldDecl_MatchesField) {
+  EXPECT_TRUE(matches("struct X { int m; };", fieldDecl(hasName("m"))));
 }
 
-TEST(IsVolatileQualified, QualifiersMatch) {
-  EXPECT_TRUE(matches("volatile int i = 42;",
-                      varDecl(hasType(isVolatileQualified()))));
-  EXPECT_TRUE(notMatches("volatile int *i;",
-                         varDecl(hasType(isVolatileQualified()))));
+TEST_P(ASTMatchersTest, IsVolatileQualified) {
+  EXPECT_TRUE(
+      matches("volatile int i = 42;", varDecl(hasType(isVolatileQualified()))));
+  EXPECT_TRUE(
+      notMatches("volatile int *i;", varDecl(hasType(isVolatileQualified()))));
   EXPECT_TRUE(matches("typedef volatile int v_int; v_int i = 42;",
                       varDecl(hasType(isVolatileQualified()))));
 }
 
-TEST(IsConstQualified, MatchesConstInt) {
-  EXPECT_TRUE(matches("const int i = 42;",
-                      varDecl(hasType(isConstQualified()))));
+TEST_P(ASTMatchersTest, IsConstQualified_MatchesConstInt) {
+  EXPECT_TRUE(
+      matches("const int i = 42;", varDecl(hasType(isConstQualified()))));
 }
 
-TEST(IsConstQualified, MatchesConstPointer) {
-  EXPECT_TRUE(matches("int i = 42; int* const p(&i);",
+TEST_P(ASTMatchersTest, IsConstQualified_MatchesConstPointer) {
+  EXPECT_TRUE(matches("int i = 42; int* const p = &i;",
                       varDecl(hasType(isConstQualified()))));
 }
 
-TEST(IsConstQualified, MatchesThroughTypedef) {
+TEST_P(ASTMatchersTest, IsConstQualified_MatchesThroughTypedef) {
   EXPECT_TRUE(matches("typedef const int const_int; const_int i = 42;",
                       varDecl(hasType(isConstQualified()))));
-  EXPECT_TRUE(matches("typedef int* int_ptr; const int_ptr p(0);",
+  EXPECT_TRUE(matches("typedef int* int_ptr; const int_ptr p = ((int*)0);",
                       varDecl(hasType(isConstQualified()))));
 }
 
-TEST(IsConstQualified, DoesNotMatchInappropriately) {
+TEST_P(ASTMatchersTest, IsConstQualified_DoesNotMatchInappropriately) {
   EXPECT_TRUE(notMatches("typedef int nonconst_int; nonconst_int i = 42;",
                          varDecl(hasType(isConstQualified()))));
-  EXPECT_TRUE(notMatches("int const* p;",
-                         varDecl(hasType(isConstQualified()))));
+  EXPECT_TRUE(
+      notMatches("int const* p;", varDecl(hasType(isConstQualified()))));
 }
 
-TEST(DeclCount, DeclCountIsCorrect) {
-  EXPECT_TRUE(matches("void f() {int i,j;}",
-                      declStmt(declCountIs(2))));
-  EXPECT_TRUE(notMatches("void f() {int i,j; int k;}",
-                         declStmt(declCountIs(3))));
-  EXPECT_TRUE(notMatches("void f() {int i,j, k, l;}",
-                         declStmt(declCountIs(3))));
+TEST_P(ASTMatchersTest, DeclCountIs_DeclCountIsCorrect) {
+  EXPECT_TRUE(matches("void f() {int i,j;}", declStmt(declCountIs(2))));
+  EXPECT_TRUE(
+      notMatches("void f() {int i,j; int k;}", declStmt(declCountIs(3))));
+  EXPECT_TRUE(
+      notMatches("void f() {int i,j, k, l;}", declStmt(declCountIs(3))));
 }
 
-
-TEST(EachOf, TriggersForEachMatch) {
+TEST_P(ASTMatchersTest, EachOf_TriggersForEachMatch) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "class A { int a; int b; };",
-    recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
-                      has(fieldDecl(hasName("b")).bind("v")))),
-    std::make_unique>("v", 2)));
+      "class A { int a; int b; };",
+      recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
+                        has(fieldDecl(hasName("b")).bind("v")))),
+      std::make_unique>("v", 2)));
 }
 
-TEST(EachOf, BehavesLikeAnyOfUnlessBothMatch) {
+TEST_P(ASTMatchersTest, EachOf_BehavesLikeAnyOfUnlessBothMatch) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "class A { int a; int c; };",
-    recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
-                      has(fieldDecl(hasName("b")).bind("v")))),
-    std::make_unique>("v", 1)));
+      "struct A { int a; int c; };",
+      recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
+                        has(fieldDecl(hasName("b")).bind("v")))),
+      std::make_unique>("v", 1)));
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "class A { int c; int b; };",
-    recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
-                      has(fieldDecl(hasName("b")).bind("v")))),
-    std::make_unique>("v", 1)));
-  EXPECT_TRUE(notMatches(
-    "class A { int c; int d; };",
-    recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
-                      has(fieldDecl(hasName("b")).bind("v"))))));
+      "struct A { int c; int b; };",
+      recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
+                        has(fieldDecl(hasName("b")).bind("v")))),
+      std::make_unique>("v", 1)));
+  EXPECT_TRUE(
+      notMatches("struct A { int c; int d; };",
+                 recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
+                                   has(fieldDecl(hasName("b")).bind("v"))))));
 }
 
-TEST(Optionally, SubmatchersDoNotMatch) {
+TEST_P(ASTMatchersTest, Optionally_SubmatchersDoNotMatch) {
   EXPECT_TRUE(matchAndVerifyResultFalse(
       "class A { int a; int b; };",
       recordDecl(optionally(has(fieldDecl(hasName("c")).bind("c")))),
@@ -2033,7 +2356,7 @@ TEST(Optionally, SubmatchersDoNotMatch) {
 }
 
 // Regression test.
-TEST(Optionally, SubmatchersDoNotMatchButPreserveBindings) {
+TEST_P(ASTMatchersTest, Optionally_SubmatchersDoNotMatchButPreserveBindings) {
   StringRef Code = "class A { int a; int b; };";
   auto Matcher = recordDecl(decl().bind("decl"),
                             optionally(has(fieldDecl(hasName("c")).bind("v"))));
@@ -2045,40 +2368,56 @@ TEST(Optionally, SubmatchersDoNotMatchButPreserveBindings) {
       Code, Matcher, std::make_unique>("v")));
 }
 
-TEST(Optionally, SubmatchersMatch) {
+TEST_P(ASTMatchersTest, Optionally_SubmatchersMatch) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
       "class A { int a; int c; };",
       recordDecl(optionally(has(fieldDecl(hasName("a")).bind("v")))),
       std::make_unique>("v")));
 }
 
-TEST(IsTemplateInstantiation, MatchesImplicitClassTemplateInstantiation) {
+TEST_P(ASTMatchersTest,
+       IsTemplateInstantiation_MatchesImplicitClassTemplateInstantiation) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   // Make sure that we can both match the class by name (::X) and by the type
   // the template was instantiated with (via a field).
 
-  EXPECT_TRUE(matches(
-    "template  class X {}; class A {}; X x;",
-    cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
+  EXPECT_TRUE(
+      matches("template  class X {}; class A {}; X x;",
+              cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
 
   EXPECT_TRUE(matches(
-    "template  class X { T t; }; class A {}; X x;",
-    cxxRecordDecl(isTemplateInstantiation(), hasDescendant(
-      fieldDecl(hasType(recordDecl(hasName("A"))))))));
+      "template  class X { T t; }; class A {}; X x;",
+      cxxRecordDecl(
+          isTemplateInstantiation(),
+          hasDescendant(fieldDecl(hasType(recordDecl(hasName("A"))))))));
 }
 
-TEST(IsTemplateInstantiation, MatchesImplicitFunctionTemplateInstantiation) {
+TEST_P(ASTMatchersTest,
+       IsTemplateInstantiation_MatchesImplicitFunctionTemplateInstantiation) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches(
-    "template  void f(T t) {} class A {}; void g() { f(A()); }",
-    functionDecl(hasParameter(0, hasType(recordDecl(hasName("A")))),
-                 isTemplateInstantiation())));
+      "template  void f(T t) {} class A {}; void g() { f(A()); }",
+      functionDecl(hasParameter(0, hasType(recordDecl(hasName("A")))),
+                   isTemplateInstantiation())));
 }
 
-TEST(IsTemplateInstantiation, MatchesExplicitClassTemplateInstantiation) {
-  EXPECT_TRUE(matches(
-    "template  class X { T t; }; class A {};"
-      "template class X;",
-    cxxRecordDecl(isTemplateInstantiation(), hasDescendant(
-      fieldDecl(hasType(recordDecl(hasName("A"))))))));
+TEST_P(ASTMatchersTest,
+       IsTemplateInstantiation_MatchesExplicitClassTemplateInstantiation) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("template  class X { T t; }; class A {};"
+                      "template class X;",
+                      cxxRecordDecl(isTemplateInstantiation(),
+                                    hasDescendant(fieldDecl(
+                                        hasType(recordDecl(hasName("A"))))))));
 
   // Make sure that we match the instantiation instead of the template
   // definition by checking whether the member function is present.
@@ -2089,199 +2428,326 @@ TEST(IsTemplateInstantiation, MatchesExplicitClassTemplateInstantiation) {
                             unless(hasDescendant(varDecl(hasName("t")))))));
 }
 
-TEST(IsTemplateInstantiation,
-     MatchesInstantiationOfPartiallySpecializedClassTemplate) {
-  EXPECT_TRUE(matches(
-    "template  class X {};"
-      "template  class X {}; class A {}; X x;",
-    cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
+TEST_P(
+    ASTMatchersTest,
+    IsTemplateInstantiation_MatchesInstantiationOfPartiallySpecializedClassTemplate) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(
+      matches("template  class X {};"
+              "template  class X {}; class A {}; X x;",
+              cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
 }
 
-TEST(IsTemplateInstantiation,
-     MatchesInstantiationOfClassTemplateNestedInNonTemplate) {
-  EXPECT_TRUE(matches(
-    "class A {};"
-      "class X {"
-      "  template  class Y { U u; };"
-      "  Y y;"
-      "};",
-    cxxRecordDecl(hasName("::X::Y"), isTemplateInstantiation())));
+TEST_P(
+    ASTMatchersTest,
+    IsTemplateInstantiation_MatchesInstantiationOfClassTemplateNestedInNonTemplate) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(
+      matches("class A {};"
+              "class X {"
+              "  template  class Y { U u; };"
+              "  Y y;"
+              "};",
+              cxxRecordDecl(hasName("::X::Y"), isTemplateInstantiation())));
 }
 
-TEST(IsTemplateInstantiation, DoesNotMatchInstantiationsInsideOfInstantiation) {
+TEST_P(
+    ASTMatchersTest,
+    IsTemplateInstantiation_DoesNotMatchInstantiationsInsideOfInstantiation) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   // FIXME: Figure out whether this makes sense. It doesn't affect the
   // normal use case as long as the uppermost instantiation always is marked
   // as template instantiation, but it might be confusing as a predicate.
   EXPECT_TRUE(matches(
-    "class A {};"
+      "class A {};"
       "template  class X {"
       "  template  class Y { U u; };"
       "  Y y;"
       "}; X x;",
-    cxxRecordDecl(hasName("::X::Y"), unless(isTemplateInstantiation()))));
+      cxxRecordDecl(hasName("::X::Y"), unless(isTemplateInstantiation()))));
 }
 
-TEST(IsTemplateInstantiation, DoesNotMatchExplicitClassTemplateSpecialization) {
-  EXPECT_TRUE(notMatches(
-    "template  class X {}; class A {};"
-      "template <> class X {}; X x;",
-    cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
+TEST_P(
+    ASTMatchersTest,
+    IsTemplateInstantiation_DoesNotMatchExplicitClassTemplateSpecialization) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(
+      notMatches("template  class X {}; class A {};"
+                 "template <> class X {}; X x;",
+                 cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
 }
 
-TEST(IsTemplateInstantiation, DoesNotMatchNonTemplate) {
-  EXPECT_TRUE(notMatches(
-    "class A {}; class Y { A a; };",
-    cxxRecordDecl(isTemplateInstantiation())));
+TEST_P(ASTMatchersTest, IsTemplateInstantiation_DoesNotMatchNonTemplate) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(notMatches("class A {}; class Y { A a; };",
+                         cxxRecordDecl(isTemplateInstantiation())));
 }
 
-TEST(IsInstantiated, MatchesInstantiation) {
+TEST_P(ASTMatchersTest, IsInstantiated_MatchesInstantiation) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
-    matches("template class A { T i; }; class Y { A a; };",
-            cxxRecordDecl(isInstantiated())));
+      matches("template class A { T i; }; class Y { A a; };",
+              cxxRecordDecl(isInstantiated())));
 }
 
-TEST(IsInstantiated, NotMatchesDefinition) {
+TEST_P(ASTMatchersTest, IsInstantiated_NotMatchesDefinition) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template class A { T i; };",
                          cxxRecordDecl(isInstantiated())));
 }
 
-TEST(IsInTemplateInstantiation, MatchesInstantiationStmt) {
+TEST_P(ASTMatchersTest, IsInTemplateInstantiation_MatchesInstantiationStmt) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("template struct A { A() { T i; } };"
-                        "class Y { A a; }; Y y;",
+                      "class Y { A a; }; Y y;",
                       declStmt(isInTemplateInstantiation())));
 }
 
-TEST(IsInTemplateInstantiation, NotMatchesDefinitionStmt) {
+TEST_P(ASTMatchersTest, IsInTemplateInstantiation_NotMatchesDefinitionStmt) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template struct A { void x() { T i; } };",
                          declStmt(isInTemplateInstantiation())));
 }
 
-TEST(IsInstantiated, MatchesFunctionInstantiation) {
+TEST_P(ASTMatchersTest, IsInstantiated_MatchesFunctionInstantiation) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
-    matches("template void A(T t) { T i; } void x() { A(0); }",
-            functionDecl(isInstantiated())));
+      matches("template void A(T t) { T i; } void x() { A(0); }",
+              functionDecl(isInstantiated())));
 }
 
-TEST(IsInstantiated, NotMatchesFunctionDefinition) {
+TEST_P(ASTMatchersTest, IsInstantiated_NotMatchesFunctionDefinition) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template void A(T t) { T i; }",
                          varDecl(isInstantiated())));
 }
 
-TEST(IsInTemplateInstantiation, MatchesFunctionInstantiationStmt) {
+TEST_P(ASTMatchersTest,
+       IsInTemplateInstantiation_MatchesFunctionInstantiationStmt) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
-    matches("template void A(T t) { T i; } void x() { A(0); }",
-            declStmt(isInTemplateInstantiation())));
+      matches("template void A(T t) { T i; } void x() { A(0); }",
+              declStmt(isInTemplateInstantiation())));
 }
 
-TEST(IsInTemplateInstantiation, NotMatchesFunctionDefinitionStmt) {
+TEST_P(ASTMatchersTest,
+       IsInTemplateInstantiation_NotMatchesFunctionDefinitionStmt) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template void A(T t) { T i; }",
                          declStmt(isInTemplateInstantiation())));
 }
 
-TEST(IsInTemplateInstantiation, Sharing) {
+TEST_P(ASTMatchersTest, IsInTemplateInstantiation_Sharing) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   auto Matcher = binaryOperator(unless(isInTemplateInstantiation()));
   // FIXME: Node sharing is an implementation detail, exposing it is ugly
   // and makes the matcher behave in non-obvious ways.
   EXPECT_TRUE(notMatches(
-    "int j; template void A(T t) { j += 42; } void x() { A(0); }",
-    Matcher));
+      "int j; template void A(T t) { j += 42; } void x() { A(0); }",
+      Matcher));
   EXPECT_TRUE(matches(
-    "int j; template void A(T t) { j += t; } void x() { A(0); }",
-    Matcher));
+      "int j; template void A(T t) { j += t; } void x() { A(0); }",
+      Matcher));
 }
 
-TEST(IsInstantiationDependent, MatchesNonValueTypeDependent) {
+TEST_P(ASTMatchersTest, IsInstantiationDependent_MatchesNonValueTypeDependent) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches(
       "template void f() { (void) sizeof(sizeof(T() + T())); }",
       expr(isInstantiationDependent())));
 }
 
-TEST(IsInstantiationDependent, MatchesValueDependent) {
+TEST_P(ASTMatchersTest, IsInstantiationDependent_MatchesValueDependent) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("template int f() { return T; }",
                       expr(isInstantiationDependent())));
 }
 
-TEST(IsInstantiationDependent, MatchesTypeDependent) {
+TEST_P(ASTMatchersTest, IsInstantiationDependent_MatchesTypeDependent) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("template T f() { return T(); }",
                       expr(isInstantiationDependent())));
 }
 
-TEST(IsTypeDependent, MatchesTypeDependent) {
+TEST_P(ASTMatchersTest, IsTypeDependent_MatchesTypeDependent) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("template T f() { return T(); }",
                       expr(isTypeDependent())));
 }
 
-TEST(IsTypeDependent, NotMatchesValueDependent) {
+TEST_P(ASTMatchersTest, IsTypeDependent_NotMatchesValueDependent) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template int f() { return T; }",
                          expr(isTypeDependent())));
 }
 
-TEST(IsValueDependent, MatchesValueDependent) {
+TEST_P(ASTMatchersTest, IsValueDependent_MatchesValueDependent) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("template int f() { return T; }",
                       expr(isValueDependent())));
 }
 
-TEST(IsValueDependent, MatchesTypeDependent) {
+TEST_P(ASTMatchersTest, IsValueDependent_MatchesTypeDependent) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("template T f() { return T(); }",
                       expr(isValueDependent())));
 }
 
-TEST(IsValueDependent, MatchesInstantiationDependent) {
+TEST_P(ASTMatchersTest, IsValueDependent_MatchesInstantiationDependent) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches(
       "template void f() { (void) sizeof(sizeof(T() + T())); }",
       expr(isValueDependent())));
 }
 
-TEST(IsExplicitTemplateSpecialization,
-     DoesNotMatchPrimaryTemplate) {
-  EXPECT_TRUE(notMatches(
-    "template  class X {};",
-    cxxRecordDecl(isExplicitTemplateSpecialization())));
-  EXPECT_TRUE(notMatches(
-    "template  void f(T t);",
-    functionDecl(isExplicitTemplateSpecialization())));
+TEST_P(ASTMatchersTest,
+       IsExplicitTemplateSpecialization_DoesNotMatchPrimaryTemplate) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(notMatches("template  class X {};",
+                         cxxRecordDecl(isExplicitTemplateSpecialization())));
+  EXPECT_TRUE(notMatches("template  void f(T t);",
+                         functionDecl(isExplicitTemplateSpecialization())));
 }
 
-TEST(IsExplicitTemplateSpecialization,
-     DoesNotMatchExplicitTemplateInstantiations) {
-  EXPECT_TRUE(notMatches(
-    "template  class X {};"
-      "template class X; extern template class X;",
-    cxxRecordDecl(isExplicitTemplateSpecialization())));
-  EXPECT_TRUE(notMatches(
-    "template  void f(T t) {}"
-      "template void f(int t); extern template void f(long t);",
-    functionDecl(isExplicitTemplateSpecialization())));
+TEST_P(
+    ASTMatchersTest,
+    IsExplicitTemplateSpecialization_DoesNotMatchExplicitTemplateInstantiations) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(
+      notMatches("template  class X {};"
+                 "template class X; extern template class X;",
+                 cxxRecordDecl(isExplicitTemplateSpecialization())));
+  EXPECT_TRUE(
+      notMatches("template  void f(T t) {}"
+                 "template void f(int t); extern template void f(long t);",
+                 functionDecl(isExplicitTemplateSpecialization())));
 }
 
-TEST(IsExplicitTemplateSpecialization,
-     DoesNotMatchImplicitTemplateInstantiations) {
-  EXPECT_TRUE(notMatches(
-    "template  class X {}; X x;",
-    cxxRecordDecl(isExplicitTemplateSpecialization())));
-  EXPECT_TRUE(notMatches(
-    "template  void f(T t); void g() { f(10); }",
-    functionDecl(isExplicitTemplateSpecialization())));
+TEST_P(
+    ASTMatchersTest,
+    IsExplicitTemplateSpecialization_DoesNotMatchImplicitTemplateInstantiations) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(notMatches("template  class X {}; X x;",
+                         cxxRecordDecl(isExplicitTemplateSpecialization())));
+  EXPECT_TRUE(
+      notMatches("template  void f(T t); void g() { f(10); }",
+                 functionDecl(isExplicitTemplateSpecialization())));
 }
 
-TEST(IsExplicitTemplateSpecialization,
-     MatchesExplicitTemplateSpecializations) {
-  EXPECT_TRUE(matches(
-    "template  class X {};"
-      "template<> class X {};",
-    cxxRecordDecl(isExplicitTemplateSpecialization())));
-  EXPECT_TRUE(matches(
-    "template  void f(T t) {}"
-      "template<> void f(int t) {}",
-    functionDecl(isExplicitTemplateSpecialization())));
+TEST_P(
+    ASTMatchersTest,
+    IsExplicitTemplateSpecialization_MatchesExplicitTemplateSpecializations) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("template  class X {};"
+                      "template<> class X {};",
+                      cxxRecordDecl(isExplicitTemplateSpecialization())));
+  EXPECT_TRUE(matches("template  void f(T t) {}"
+                      "template<> void f(int t) {}",
+                      functionDecl(isExplicitTemplateSpecialization())));
 }
 
-TEST(TypeMatching, MatchesNoReturn) {
+TEST_P(ASTMatchersTest, IsNoReturn) {
   EXPECT_TRUE(notMatches("void func();", functionDecl(isNoReturn())));
   EXPECT_TRUE(notMatches("void func() {}", functionDecl(isNoReturn())));
 
-  EXPECT_TRUE(notMatchesC("void func();", functionDecl(isNoReturn())));
-  EXPECT_TRUE(notMatchesC("void func() {}", functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("__attribute__((noreturn)) void func();",
+                      functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("__attribute__((noreturn)) void func() {}",
+                      functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(matches("_Noreturn void func();", functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("_Noreturn void func() {}", functionDecl(isNoReturn())));
+}
+
+TEST_P(ASTMatchersTest, IsNoReturn_CXX) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
 
   EXPECT_TRUE(
       notMatches("struct S { void func(); };", functionDecl(isNoReturn())));
@@ -2298,32 +2764,6 @@ TEST(TypeMatching, MatchesNoReturn) {
 
   // ---
 
-  EXPECT_TRUE(matches("[[noreturn]] void func();", functionDecl(isNoReturn())));
-  EXPECT_TRUE(
-      matches("[[noreturn]] void func() {}", functionDecl(isNoReturn())));
-
-  EXPECT_TRUE(matches("struct S { [[noreturn]] void func(); };",
-                      functionDecl(isNoReturn())));
-  EXPECT_TRUE(matches("struct S { [[noreturn]] void func() {} };",
-                      functionDecl(isNoReturn())));
-
-  EXPECT_TRUE(matches("struct S { [[noreturn]] static void func(); };",
-                      functionDecl(isNoReturn())));
-  EXPECT_TRUE(matches("struct S { [[noreturn]] static void func() {} };",
-                      functionDecl(isNoReturn())));
-
-  EXPECT_TRUE(
-      matches("struct S { [[noreturn]] S(); };", functionDecl(isNoReturn())));
-  EXPECT_TRUE(matches("struct S { [[noreturn]] S() {} };",
-                      functionDecl(isNoReturn())));
-
-  // ---
-
-  EXPECT_TRUE(matches("__attribute__((noreturn)) void func();",
-                      functionDecl(isNoReturn())));
-  EXPECT_TRUE(matches("__attribute__((noreturn)) void func() {}",
-                      functionDecl(isNoReturn())));
-
   EXPECT_TRUE(matches("struct S { __attribute__((noreturn)) void func(); };",
                       functionDecl(isNoReturn())));
   EXPECT_TRUE(matches("struct S { __attribute__((noreturn)) void func() {} };",
@@ -2340,33 +2780,62 @@ TEST(TypeMatching, MatchesNoReturn) {
                       functionDecl(isNoReturn())));
   EXPECT_TRUE(matches("struct S { __attribute__((noreturn)) S() {} };",
                       functionDecl(isNoReturn())));
+}
 
-  // ---
+TEST_P(ASTMatchersTest, IsNoReturn_CXX11Attribute) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("[[noreturn]] void func();", functionDecl(isNoReturn())));
+  EXPECT_TRUE(
+      matches("[[noreturn]] void func() {}", functionDecl(isNoReturn())));
 
-  EXPECT_TRUE(matchesC("__attribute__((noreturn)) void func();",
+  EXPECT_TRUE(matches("struct S { [[noreturn]] void func(); };",
                       functionDecl(isNoReturn())));
-  EXPECT_TRUE(matchesC("__attribute__((noreturn)) void func() {}",
+  EXPECT_TRUE(matches("struct S { [[noreturn]] void func() {} };",
                       functionDecl(isNoReturn())));
 
-  EXPECT_TRUE(matchesC("_Noreturn void func();",
+  EXPECT_TRUE(matches("struct S { [[noreturn]] static void func(); };",
                       functionDecl(isNoReturn())));
-  EXPECT_TRUE(matchesC("_Noreturn void func() {}",
+  EXPECT_TRUE(matches("struct S { [[noreturn]] static void func() {} };",
                       functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(
+      matches("struct S { [[noreturn]] S(); };", functionDecl(isNoReturn())));
+  EXPECT_TRUE(
+      matches("struct S { [[noreturn]] S() {} };", functionDecl(isNoReturn())));
 }
 
-TEST(TypeMatching, MatchesBool) {
+TEST_P(ASTMatchersTest, BooleanType) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `booleanType()` that does not depend on C++.
+    return;
+  }
+
   EXPECT_TRUE(matches("struct S { bool func(); };",
                       cxxMethodDecl(returns(booleanType()))));
   EXPECT_TRUE(notMatches("struct S { void func(); };",
                          cxxMethodDecl(returns(booleanType()))));
 }
 
-TEST(TypeMatching, MatchesVoid) {
+TEST_P(ASTMatchersTest, VoidType) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `voidType()` that does not depend on C++.
+    return;
+  }
+
   EXPECT_TRUE(matches("struct S { void func(); };",
                       cxxMethodDecl(returns(voidType()))));
 }
 
-TEST(TypeMatching, MatchesRealFloats) {
+TEST_P(ASTMatchersTest, RealFloatingPointType) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `realFloatingPointType()` that does not depend on
+    // C++.
+    return;
+  }
+
   EXPECT_TRUE(matches("struct S { float func(); };",
                       cxxMethodDecl(returns(realFloatingPointType()))));
   EXPECT_TRUE(notMatches("struct S { int func(); };",
@@ -2375,61 +2844,66 @@ TEST(TypeMatching, MatchesRealFloats) {
                       cxxMethodDecl(returns(realFloatingPointType()))));
 }
 
-TEST(TypeMatching, MatchesArrayTypes) {
+TEST_P(ASTMatchersTest, ArrayType) {
   EXPECT_TRUE(matches("int a[] = {2,3};", arrayType()));
   EXPECT_TRUE(matches("int a[42];", arrayType()));
   EXPECT_TRUE(matches("void f(int b) { int a[b]; }", arrayType()));
 
-  EXPECT_TRUE(notMatches("struct A {}; A a[7];",
+  EXPECT_TRUE(notMatches("struct A {}; struct A a[7];",
                          arrayType(hasElementType(builtinType()))));
 
+  EXPECT_TRUE(matches("int const a[] = { 2, 3 };",
+                      qualType(arrayType(hasElementType(builtinType())))));
   EXPECT_TRUE(matches(
-    "int const a[] = { 2, 3 };",
-    qualType(arrayType(hasElementType(builtinType())))));
-  EXPECT_TRUE(matches(
-    "int const a[] = { 2, 3 };",
-    qualType(isConstQualified(), arrayType(hasElementType(builtinType())))));
-  EXPECT_TRUE(matches(
-    "typedef const int T; T x[] = { 1, 2 };",
-    qualType(isConstQualified(), arrayType())));
+      "int const a[] = { 2, 3 };",
+      qualType(isConstQualified(), arrayType(hasElementType(builtinType())))));
+  EXPECT_TRUE(matches("typedef const int T; T x[] = { 1, 2 };",
+                      qualType(isConstQualified(), arrayType())));
 
   EXPECT_TRUE(notMatches(
-    "int a[] = { 2, 3 };",
-    qualType(isConstQualified(), arrayType(hasElementType(builtinType())))));
+      "int a[] = { 2, 3 };",
+      qualType(isConstQualified(), arrayType(hasElementType(builtinType())))));
   EXPECT_TRUE(notMatches(
-    "int a[] = { 2, 3 };",
-    qualType(arrayType(hasElementType(isConstQualified(), builtinType())))));
-  EXPECT_TRUE(notMatches(
-    "int const a[] = { 2, 3 };",
-    qualType(arrayType(hasElementType(builtinType())),
-             unless(isConstQualified()))));
+      "int a[] = { 2, 3 };",
+      qualType(arrayType(hasElementType(isConstQualified(), builtinType())))));
+  EXPECT_TRUE(notMatches("int const a[] = { 2, 3 };",
+                         qualType(arrayType(hasElementType(builtinType())),
+                                  unless(isConstQualified()))));
 
-  EXPECT_TRUE(matches("int a[2];",
-                      constantArrayType(hasElementType(builtinType()))));
+  EXPECT_TRUE(
+      matches("int a[2];", constantArrayType(hasElementType(builtinType()))));
   EXPECT_TRUE(matches("const int a = 0;", qualType(isInteger())));
 }
 
-TEST(TypeMatching, DecayedType) {
-  EXPECT_TRUE(matches("void f(int i[]);", valueDecl(hasType(decayedType(hasDecayedType(pointerType()))))));
+TEST_P(ASTMatchersTest, DecayedType) {
+  EXPECT_TRUE(
+      matches("void f(int i[]);",
+              valueDecl(hasType(decayedType(hasDecayedType(pointerType()))))));
   EXPECT_TRUE(notMatches("int i[7];", decayedType()));
 }
 
-TEST(TypeMatching, MatchesComplexTypes) {
+TEST_P(ASTMatchersTest, ComplexType) {
   EXPECT_TRUE(matches("_Complex float f;", complexType()));
-  EXPECT_TRUE(matches(
-    "_Complex float f;",
-    complexType(hasElementType(builtinType()))));
-  EXPECT_TRUE(notMatches(
-    "_Complex float f;",
-    complexType(hasElementType(isInteger()))));
+  EXPECT_TRUE(
+      matches("_Complex float f;", complexType(hasElementType(builtinType()))));
+  EXPECT_TRUE(notMatches("_Complex float f;",
+                         complexType(hasElementType(isInteger()))));
 }
 
-TEST(NS, Anonymous) {
+TEST_P(ASTMatchersTest, IsAnonymous) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("namespace N {}", namespaceDecl(isAnonymous())));
   EXPECT_TRUE(matches("namespace {}", namespaceDecl(isAnonymous())));
 }
 
-TEST(DeclarationMatcher, InStdNamespace) {
+TEST_P(ASTMatchersTest, InStdNamespace) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("class vector {};"
                          "namespace foo {"
                          "  class vector {};"
@@ -2445,6 +2919,13 @@ TEST(DeclarationMatcher, InStdNamespace) {
                       "  class vector {};"
                       "}",
                       cxxRecordDecl(hasName("vector"), isInStdNamespace())));
+}
+
+TEST_P(ASTMatchersTest, InStdNamespace_CXX11) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("namespace std {"
                       "  inline namespace __1 {"
                       "    class vector {};"
@@ -2480,63 +2961,74 @@ TEST(DeclarationMatcher, InStdNamespace) {
                                                       isInStdNamespace())))));
 }
 
-TEST(EqualsBoundNodeMatcher, QualType) {
+TEST_P(ASTMatchersTest, EqualsBoundNodeMatcher_QualType) {
   EXPECT_TRUE(matches(
-    "int i = 1;", varDecl(hasType(qualType().bind("type")),
-                          hasInitializer(ignoringParenImpCasts(
-                            hasType(qualType(equalsBoundNode("type"))))))));
+      "int i = 1;", varDecl(hasType(qualType().bind("type")),
+                            hasInitializer(ignoringParenImpCasts(
+                                hasType(qualType(equalsBoundNode("type"))))))));
   EXPECT_TRUE(notMatches("int i = 1.f;",
                          varDecl(hasType(qualType().bind("type")),
                                  hasInitializer(ignoringParenImpCasts(hasType(
-                                   qualType(equalsBoundNode("type"))))))));
+                                     qualType(equalsBoundNode("type"))))))));
 }
 
-TEST(EqualsBoundNodeMatcher, NonMatchingTypes) {
+TEST_P(ASTMatchersTest, EqualsBoundNodeMatcher_NonMatchingTypes) {
   EXPECT_TRUE(notMatches(
-    "int i = 1;", varDecl(namedDecl(hasName("i")).bind("name"),
-                          hasInitializer(ignoringParenImpCasts(
-                            hasType(qualType(equalsBoundNode("type"))))))));
+      "int i = 1;", varDecl(namedDecl(hasName("i")).bind("name"),
+                            hasInitializer(ignoringParenImpCasts(
+                                hasType(qualType(equalsBoundNode("type"))))))));
 }
 
-TEST(EqualsBoundNodeMatcher, Stmt) {
+TEST_P(ASTMatchersTest, EqualsBoundNodeMatcher_Stmt) {
   EXPECT_TRUE(
-    matches("void f() { if(true) {} }",
-            stmt(allOf(ifStmt().bind("if"),
-                       hasParent(stmt(has(stmt(equalsBoundNode("if")))))))));
+      matches("void f() { if(1) {} }",
+              stmt(allOf(ifStmt().bind("if"),
+                         hasParent(stmt(has(stmt(equalsBoundNode("if")))))))));
 
   EXPECT_TRUE(notMatches(
-    "void f() { if(true) { if (true) {} } }",
-    stmt(allOf(ifStmt().bind("if"), has(stmt(equalsBoundNode("if")))))));
+      "void f() { if(1) { if (1) {} } }",
+      stmt(allOf(ifStmt().bind("if"), has(stmt(equalsBoundNode("if")))))));
 }
 
-TEST(EqualsBoundNodeMatcher, Decl) {
+TEST_P(ASTMatchersTest, EqualsBoundNodeMatcher_Decl) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `equalsBoundNode()` for declarations that does not
+    // depend on C++.
+    return;
+  }
+
   EXPECT_TRUE(matches(
-    "class X { class Y {}; };",
-    decl(allOf(recordDecl(hasName("::X::Y")).bind("record"),
-               hasParent(decl(has(decl(equalsBoundNode("record")))))))));
+      "class X { class Y {}; };",
+      decl(allOf(recordDecl(hasName("::X::Y")).bind("record"),
+                 hasParent(decl(has(decl(equalsBoundNode("record")))))))));
 
   EXPECT_TRUE(notMatches("class X { class Y {}; };",
                          decl(allOf(recordDecl(hasName("::X")).bind("record"),
                                     has(decl(equalsBoundNode("record")))))));
 }
 
-TEST(EqualsBoundNodeMatcher, Type) {
+TEST_P(ASTMatchersTest, EqualsBoundNodeMatcher_Type) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `equalsBoundNode()` for types that does not depend
+    // on C++.
+    return;
+  }
   EXPECT_TRUE(matches(
-    "class X { int a; int b; };",
-    recordDecl(
-      has(fieldDecl(hasName("a"), hasType(type().bind("t")))),
-      has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t"))))))));
+      "class X { int a; int b; };",
+      recordDecl(
+          has(fieldDecl(hasName("a"), hasType(type().bind("t")))),
+          has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t"))))))));
 
   EXPECT_TRUE(notMatches(
-    "class X { int a; double b; };",
-    recordDecl(
-      has(fieldDecl(hasName("a"), hasType(type().bind("t")))),
-      has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t"))))))));
+      "class X { int a; double b; };",
+      recordDecl(
+          has(fieldDecl(hasName("a"), hasType(type().bind("t")))),
+          has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t"))))))));
 }
 
-TEST(EqualsBoundNodeMatcher, UsingForEachDescendant) {
+TEST_P(ASTMatchersTest, EqualsBoundNodeMatcher_UsingForEachDescendant) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "int f() {"
+      "int f() {"
       "  if (1) {"
       "    int i = 9;"
       "  }"
@@ -2546,68 +3038,85 @@ TEST(EqualsBoundNodeMatcher, UsingForEachDescendant) {
       "  }"
       "  return 0;"
       "}",
-    // Look for variable declarations within functions whose type is the same
-    // as the function return type.
-    functionDecl(returns(qualType().bind("type")),
-                 forEachDescendant(varDecl(hasType(
-                   qualType(equalsBoundNode("type")))).bind("decl"))),
-    // Only i and j should match, not k.
-    std::make_unique>("decl", 2)));
+      // Look for variable declarations within functions whose type is the same
+      // as the function return type.
+      functionDecl(
+          returns(qualType().bind("type")),
+          forEachDescendant(varDecl(hasType(qualType(equalsBoundNode("type"))))
+                                .bind("decl"))),
+      // Only i and j should match, not k.
+      std::make_unique>("decl", 2)));
 }
 
-TEST(EqualsBoundNodeMatcher, FiltersMatchedCombinations) {
+TEST_P(ASTMatchersTest, EqualsBoundNodeMatcher_FiltersMatchedCombinations) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "void f() {"
+      "void f() {"
       "  int x;"
       "  double d;"
       "  x = d + x - d + x;"
       "}",
-    functionDecl(
-      hasName("f"), forEachDescendant(varDecl().bind("d")),
-      forEachDescendant(declRefExpr(to(decl(equalsBoundNode("d")))))),
-    std::make_unique>("d", 5)));
+      functionDecl(
+          hasName("f"), forEachDescendant(varDecl().bind("d")),
+          forEachDescendant(declRefExpr(to(decl(equalsBoundNode("d")))))),
+      std::make_unique>("d", 5)));
 }
 
-TEST(EqualsBoundNodeMatcher, UnlessDescendantsOfAncestorsMatch) {
+TEST_P(ASTMatchersTest,
+       EqualsBoundNodeMatcher_UnlessDescendantsOfAncestorsMatch) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "struct StringRef { int size() const; const char* data() const; };"
+      "struct StringRef { int size() const; const char* data() const; };"
       "void f(StringRef v) {"
       "  v.data();"
       "}",
-    cxxMemberCallExpr(
-      callee(cxxMethodDecl(hasName("data"))),
-      on(declRefExpr(to(
-        varDecl(hasType(recordDecl(hasName("StringRef")))).bind("var")))),
-      unless(hasAncestor(stmt(hasDescendant(cxxMemberCallExpr(
-        callee(cxxMethodDecl(anyOf(hasName("size"), hasName("length")))),
-        on(declRefExpr(to(varDecl(equalsBoundNode("var")))))))))))
-      .bind("data"),
-    std::make_unique>("data", 1)));
+      cxxMemberCallExpr(
+          callee(cxxMethodDecl(hasName("data"))),
+          on(declRefExpr(to(
+              varDecl(hasType(recordDecl(hasName("StringRef")))).bind("var")))),
+          unless(hasAncestor(stmt(hasDescendant(cxxMemberCallExpr(
+              callee(cxxMethodDecl(anyOf(hasName("size"), hasName("length")))),
+              on(declRefExpr(to(varDecl(equalsBoundNode("var")))))))))))
+          .bind("data"),
+      std::make_unique>("data", 1)));
 
   EXPECT_FALSE(matches(
-    "struct StringRef { int size() const; const char* data() const; };"
+      "struct StringRef { int size() const; const char* data() const; };"
       "void f(StringRef v) {"
       "  v.data();"
       "  v.size();"
       "}",
-    cxxMemberCallExpr(
-      callee(cxxMethodDecl(hasName("data"))),
-      on(declRefExpr(to(
-        varDecl(hasType(recordDecl(hasName("StringRef")))).bind("var")))),
-      unless(hasAncestor(stmt(hasDescendant(cxxMemberCallExpr(
-        callee(cxxMethodDecl(anyOf(hasName("size"), hasName("length")))),
-        on(declRefExpr(to(varDecl(equalsBoundNode("var")))))))))))
-      .bind("data")));
+      cxxMemberCallExpr(
+          callee(cxxMethodDecl(hasName("data"))),
+          on(declRefExpr(to(
+              varDecl(hasType(recordDecl(hasName("StringRef")))).bind("var")))),
+          unless(hasAncestor(stmt(hasDescendant(cxxMemberCallExpr(
+              callee(cxxMethodDecl(anyOf(hasName("size"), hasName("length")))),
+              on(declRefExpr(to(varDecl(equalsBoundNode("var")))))))))))
+          .bind("data")));
 }
 
-TEST(NullPointerConstants, Basic) {
+TEST_P(ASTMatchersTest, NullPointerConstant) {
   EXPECT_TRUE(matches("#define NULL ((void *)0)\n"
-                        "void *v1 = NULL;", expr(nullPointerConstant())));
-  EXPECT_TRUE(matches("void *v2 = nullptr;", expr(nullPointerConstant())));
-  EXPECT_TRUE(matches("void *v3 = __null;", expr(nullPointerConstant())));
+                      "void *v1 = NULL;",
+                      expr(nullPointerConstant())));
   EXPECT_TRUE(matches("char *cp = (char *)0;", expr(nullPointerConstant())));
   EXPECT_TRUE(matches("int *ip = 0;", expr(nullPointerConstant())));
   EXPECT_TRUE(matches("int i = 0;", expr(nullPointerConstant())));
+}
+
+TEST_P(ASTMatchersTest, NullPointerConstant_GNUNull) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("void *p = __null;", expr(nullPointerConstant())));
+}
+
+TEST_P(ASTMatchersTest, NullPointerConstant_GNUNullInTemplate) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   const char kTest[] = R"(
     template 
     struct MyTemplate {
@@ -2618,44 +3127,64 @@ TEST(NullPointerConstants, Basic) {
   EXPECT_TRUE(matches(kTest, expr(nullPointerConstant())));
 }
 
-TEST(HasExternalFormalLinkage, Basic) {
-  EXPECT_TRUE(matches("int a = 0;", namedDecl(hasExternalFormalLinkage())));
-  EXPECT_TRUE(
-      notMatches("static int a = 0;", namedDecl(hasExternalFormalLinkage())));
+TEST_P(ASTMatchersTest, NullPointerConstant_CXX11Nullptr) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("void *p = nullptr;", expr(nullPointerConstant())));
+}
+
+TEST_P(ASTMatchersTest, HasExternalFormalLinkage) {
+  EXPECT_TRUE(matches("int a = 0;",
+                      namedDecl(hasName("a"), hasExternalFormalLinkage())));
+  EXPECT_TRUE(notMatches("static int a = 0;",
+                         namedDecl(hasName("a"), hasExternalFormalLinkage())));
   EXPECT_TRUE(notMatches("static void f(void) { int a = 0; }",
-                         namedDecl(hasExternalFormalLinkage())));
-  EXPECT_TRUE(matches("void f(void) { int a = 0; }",
-                      namedDecl(hasExternalFormalLinkage())));
+                         namedDecl(hasName("a"), hasExternalFormalLinkage())));
+  EXPECT_TRUE(notMatches("void f(void) { int a = 0; }",
+                         namedDecl(hasName("a"), hasExternalFormalLinkage())));
+}
+
+TEST_P(ASTMatchersTest, HasExternalFormalLinkage_CXX) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
 
-  // Despite having internal semantic linkage, the anonymous namespace member
-  // has external linkage because the member has a unique name in all
-  // translation units.
-  EXPECT_TRUE(matches("namespace { int a = 0; }",
-                      namedDecl(hasExternalFormalLinkage())));
+  EXPECT_TRUE(notMatches("namespace { int a = 0; }",
+                         namedDecl(hasName("a"), hasExternalFormalLinkage())));
 }
 
-TEST(HasDefaultArgument, Basic) {
-  EXPECT_TRUE(matches("void x(int val = 0) {}",
-                      parmVarDecl(hasDefaultArgument())));
-  EXPECT_TRUE(notMatches("void x(int val) {}",
-                      parmVarDecl(hasDefaultArgument())));
+TEST_P(ASTMatchersTest, HasDefaultArgument) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(
+      matches("void x(int val = 0) {}", parmVarDecl(hasDefaultArgument())));
+  EXPECT_TRUE(
+      notMatches("void x(int val) {}", parmVarDecl(hasDefaultArgument())));
 }
 
-TEST(IsAtPosition, Basic) {
+TEST_P(ASTMatchersTest, IsAtPosition) {
   EXPECT_TRUE(matches("void x(int a, int b) {}", parmVarDecl(isAtPosition(1))));
   EXPECT_TRUE(matches("void x(int a, int b) {}", parmVarDecl(isAtPosition(0))));
   EXPECT_TRUE(matches("void x(int a, int b) {}", parmVarDecl(isAtPosition(1))));
   EXPECT_TRUE(notMatches("void x(int val) {}", parmVarDecl(isAtPosition(1))));
 }
 
-TEST(IsAtPosition, FunctionDecl) {
+TEST_P(ASTMatchersTest, IsAtPosition_FunctionDecl) {
   EXPECT_TRUE(matches("void x(int a);", parmVarDecl(isAtPosition(0))));
   EXPECT_TRUE(matches("void x(int a, int b);", parmVarDecl(isAtPosition(0))));
   EXPECT_TRUE(matches("void x(int a, int b);", parmVarDecl(isAtPosition(1))));
   EXPECT_TRUE(notMatches("void x(int val);", parmVarDecl(isAtPosition(1))));
 }
 
-TEST(IsAtPosition, Lambda) {
+TEST_P(ASTMatchersTest, IsAtPosition_Lambda) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(
       matches("void x() { [](int a) {};  }", parmVarDecl(isAtPosition(0))));
   EXPECT_TRUE(matches("void x() { [](int a, int b) {}; }",
@@ -2666,7 +3195,7 @@ TEST(IsAtPosition, Lambda) {
       notMatches("void x() { [](int val) {}; }", parmVarDecl(isAtPosition(1))));
 }
 
-TEST(IsAtPosition, BlockDecl) {
+TEST_P(ASTMatchersTest, IsAtPosition_BlockDecl) {
   EXPECT_TRUE(matchesObjC(
       "void func()  { void (^my_block)(int arg) = ^void(int arg) {}; } ",
       parmVarDecl(isAtPosition(0))));
@@ -2680,69 +3209,115 @@ TEST(IsAtPosition, BlockDecl) {
       parmVarDecl(isAtPosition(1))));
 }
 
-TEST(IsArray, Basic) {
+TEST_P(ASTMatchersTest, IsArray) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("struct MyClass {}; MyClass *p1 = new MyClass[10];",
                       cxxNewExpr(isArray())));
 }
 
-TEST(HasArraySize, Basic) {
+TEST_P(ASTMatchersTest, HasArraySize) {
+  if (GetParam().Language != Lang_CXX03) {
+    // FIXME: Fix this test to work in all C++ language modes.
+    return;
+  }
+
   EXPECT_TRUE(matches("struct MyClass {}; MyClass *p1 = new MyClass[10];",
                       cxxNewExpr(hasArraySize(integerLiteral(equals(10))))));
 }
 
-TEST(HasDefinition, MatchesStructDefinition) {
-  EXPECT_TRUE(matches("struct x {};",
-                      cxxRecordDecl(hasDefinition())));
-  EXPECT_TRUE(notMatches("struct x;",
-                      cxxRecordDecl(hasDefinition())));
+TEST_P(ASTMatchersTest, HasDefinition_MatchesStructDefinition) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("struct x {};", cxxRecordDecl(hasDefinition())));
+  EXPECT_TRUE(notMatches("struct x;", cxxRecordDecl(hasDefinition())));
 }
 
-TEST(HasDefinition, MatchesClassDefinition) {
-  EXPECT_TRUE(matches("class x {};",
-                      cxxRecordDecl(hasDefinition())));
-  EXPECT_TRUE(notMatches("class x;",
-                      cxxRecordDecl(hasDefinition())));
+TEST_P(ASTMatchersTest, HasDefinition_MatchesClassDefinition) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("class x {};", cxxRecordDecl(hasDefinition())));
+  EXPECT_TRUE(notMatches("class x;", cxxRecordDecl(hasDefinition())));
 }
 
-TEST(HasDefinition, MatchesUnionDefinition) {
-  EXPECT_TRUE(matches("union x {};",
-                      cxxRecordDecl(hasDefinition())));
-  EXPECT_TRUE(notMatches("union x;",
-                      cxxRecordDecl(hasDefinition())));
+TEST_P(ASTMatchersTest, HasDefinition_MatchesUnionDefinition) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("union x {};", cxxRecordDecl(hasDefinition())));
+  EXPECT_TRUE(notMatches("union x;", cxxRecordDecl(hasDefinition())));
 }
 
-TEST(IsScopedEnum, MatchesScopedEnum) {
+TEST_P(ASTMatchersTest, IsScoped_MatchesScopedEnum) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
   EXPECT_TRUE(matches("enum class X {};", enumDecl(isScoped())));
-  EXPECT_TRUE(notMatches("enum X {};", enumDecl(isScoped())));
 }
 
-TEST(TagDeclKind, MatchesTagDeclKind) {
-  EXPECT_TRUE(matches("struct X {};", tagDecl(isStruct())));
-  EXPECT_TRUE(matches("class C {};", tagDecl(isClass())));
+TEST_P(ASTMatchersTest, IsScoped_NotMatchesRegularEnum) {
+  EXPECT_TRUE(notMatches("enum E { E1 };", enumDecl(isScoped())));
+}
+
+TEST_P(ASTMatchersTest, IsStruct) {
+  EXPECT_TRUE(matches("struct S {};", tagDecl(isStruct())));
+}
+
+TEST_P(ASTMatchersTest, IsUnion) {
   EXPECT_TRUE(matches("union U {};", tagDecl(isUnion())));
-  EXPECT_TRUE(matches("enum E {};", tagDecl(isEnum())));
 }
 
-TEST(HasTrailingReturn, MatchesTrailingReturn) {
+TEST_P(ASTMatchersTest, IsEnum) {
+  EXPECT_TRUE(matches("enum E { E1 };", tagDecl(isEnum())));
+}
+
+TEST_P(ASTMatchersTest, IsClass) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("class C {};", tagDecl(isClass())));
+}
+
+TEST_P(ASTMatchersTest, HasTrailingReturn_MatchesTrailingReturn) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("auto Y() -> int { return 0; }",
                       functionDecl(hasTrailingReturn())));
   EXPECT_TRUE(matches("auto X() -> int;", functionDecl(hasTrailingReturn())));
-  EXPECT_TRUE(notMatches("int X() { return 0; }",
-                      functionDecl(hasTrailingReturn())));
+  EXPECT_TRUE(
+      notMatches("int X() { return 0; }", functionDecl(hasTrailingReturn())));
   EXPECT_TRUE(notMatches("int X();", functionDecl(hasTrailingReturn())));
-  EXPECT_TRUE(notMatchesC("void X();", functionDecl(hasTrailingReturn())));
+  EXPECT_TRUE(notMatches("void X();", functionDecl(hasTrailingReturn())));
 }
 
-TEST(HasTrailingReturn, MatchesLambdaTrailingReturn) {
+TEST_P(ASTMatchersTest, HasTrailingReturn_MatchesLambdaTrailingReturn) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches(
-          "auto lambda2 = [](double x, double y) -> double {return x + y;};",
-          functionDecl(hasTrailingReturn())));
-  EXPECT_TRUE(notMatches(
-          "auto lambda2 = [](double x, double y) {return x + y;};",
-          functionDecl(hasTrailingReturn())));
+      "auto lambda2 = [](double x, double y) -> double {return x + y;};",
+      functionDecl(hasTrailingReturn())));
+  EXPECT_TRUE(
+      notMatches("auto lambda2 = [](double x, double y) {return x + y;};",
+                 functionDecl(hasTrailingReturn())));
 }
 
-TEST(IsAssignmentOperator, Basic) {
+TEST_P(ASTMatchersTest, IsAssignmentOperator) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   StatementMatcher BinAsgmtOperator = binaryOperator(isAssignmentOperator());
   StatementMatcher CXXAsgmtOperator =
       cxxOperatorCallExpr(isAssignmentOperator());
@@ -2757,7 +3332,11 @@ TEST(IsAssignmentOperator, Basic) {
       notMatches("void x() { int a; if(a == 0) return; }", BinAsgmtOperator));
 }
 
-TEST(IsComparisonOperator, Basic) {
+TEST_P(ASTMatchersTest, IsComparisonOperator) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   StatementMatcher BinCompOperator = binaryOperator(isComparisonOperator());
   StatementMatcher CXXCompOperator =
       cxxOperatorCallExpr(isComparisonOperator());
@@ -2771,27 +3350,24 @@ TEST(IsComparisonOperator, Basic) {
       notMatches("void x() { int a; if(a = 0) return; }", BinCompOperator));
 }
 
-TEST(HasInit, Basic) {
-  EXPECT_TRUE(
-    matches("int x{0};",
-            initListExpr(hasInit(0, expr()))));
-  EXPECT_FALSE(
-    matches("int x{0};",
-            initListExpr(hasInit(1, expr()))));
-  EXPECT_FALSE(
-    matches("int x;",
-            initListExpr(hasInit(0, expr()))));
+TEST_P(ASTMatchersTest, HasInit) {
+  if (!GetParam().isCXX11OrLater()) {
+    // FIXME: Add a test for `hasInit()` that does not depend on C++.
+    return;
+  }
+
+  EXPECT_TRUE(matches("int x{0};", initListExpr(hasInit(0, expr()))));
+  EXPECT_FALSE(matches("int x{0};", initListExpr(hasInit(1, expr()))));
+  EXPECT_FALSE(matches("int x;", initListExpr(hasInit(0, expr()))));
 }
 
-TEST(Matcher, isMain) {
-  EXPECT_TRUE(
-    matches("int main() {}", functionDecl(isMain())));
+TEST_P(ASTMatchersTest, IsMain) {
+  EXPECT_TRUE(matches("int main() {}", functionDecl(isMain())));
 
-  EXPECT_TRUE(
-    notMatches("int main2() {}", functionDecl(isMain())));
+  EXPECT_TRUE(notMatches("int main2() {}", functionDecl(isMain())));
 }
 
-TEST(OMPExecutableDirective, isStandaloneDirective) {
+TEST_P(ASTMatchersTest, OMPExecutableDirective_IsStandaloneDirective) {
   auto Matcher = ompExecutableDirective(isStandaloneDirective());
 
   StringRef Source0 = R"(
@@ -2808,7 +3384,7 @@ void x() {
   EXPECT_TRUE(matchesWithOpenMP(Source1, Matcher));
 }
 
-TEST(OMPExecutableDirective, hasStructuredBlock) {
+TEST_P(ASTMatchersTest, OMPExecutableDirective_HasStructuredBlock) {
   StringRef Source0 = R"(
 void x() {
 #pragma omp parallel
@@ -2836,7 +3412,7 @@ void x() {
       Source2, ompExecutableDirective(hasStructuredBlock(anything()))));
 }
 
-TEST(OMPExecutableDirective, hasClause) {
+TEST_P(ASTMatchersTest, OMPExecutableDirective_HasClause) {
   auto Matcher = ompExecutableDirective(hasAnyClause(anything()));
 
   StringRef Source0 = R"(
@@ -2867,14 +3443,21 @@ void x() {
   EXPECT_TRUE(matchesWithOpenMP(Source3, Matcher));
 
   StringRef Source4 = R"(
+void x() {
+#pragma omp parallel default(firstprivate)
+;
+})";
+  EXPECT_TRUE(matchesWithOpenMP51(Source4, Matcher));
+
+  StringRef Source5 = R"(
 void x(int x) {
 #pragma omp parallel num_threads(x)
 ;
 })";
-  EXPECT_TRUE(matchesWithOpenMP(Source4, Matcher));
+  EXPECT_TRUE(matchesWithOpenMP(Source5, Matcher));
 }
 
-TEST(OMPDefaultClause, isNoneKind) {
+TEST_P(ASTMatchersTest, OMPDefaultClause_IsNoneKind) {
   auto Matcher =
       ompExecutableDirective(hasAnyClause(ompDefaultClause(isNoneKind())));
 
@@ -2907,13 +3490,20 @@ void x() {
 
   StringRef Source4 = R"(
 void x(int x) {
+#pragma omp parallel default(firstprivate)
+;
+})";
+  EXPECT_TRUE(notMatchesWithOpenMP51(Source4, Matcher));
+
+  const std::string Source5 = R"(
+void x(int x) {
 #pragma omp parallel num_threads(x)
 ;
 })";
-  EXPECT_TRUE(notMatchesWithOpenMP(Source4, Matcher));
+  EXPECT_TRUE(notMatchesWithOpenMP(Source5, Matcher));
 }
 
-TEST(OMPDefaultClause, isSharedKind) {
+TEST_P(ASTMatchersTest, OMPDefaultClause_IsSharedKind) {
   auto Matcher =
       ompExecutableDirective(hasAnyClause(ompDefaultClause(isSharedKind())));
 
@@ -2946,13 +3536,66 @@ void x() {
 
   StringRef Source4 = R"(
 void x(int x) {
+#pragma omp parallel default(firstprivate)
+;
+})";
+  EXPECT_TRUE(notMatchesWithOpenMP51(Source4, Matcher));
+
+  const std::string Source5 = R"(
+void x(int x) {
+#pragma omp parallel num_threads(x)
+;
+})";
+  EXPECT_TRUE(notMatchesWithOpenMP(Source5, Matcher));
+}
+
+TEST(OMPDefaultClause, isFirstPrivateKind) {
+  auto Matcher = ompExecutableDirective(
+      hasAnyClause(ompDefaultClause(isFirstPrivateKind())));
+
+  const std::string Source0 = R"(
+void x() {
+;
+})";
+  EXPECT_TRUE(notMatchesWithOpenMP(Source0, Matcher));
+
+  const std::string Source1 = R"(
+void x() {
+#pragma omp parallel
+;
+})";
+  EXPECT_TRUE(notMatchesWithOpenMP(Source1, Matcher));
+
+  const std::string Source2 = R"(
+void x() {
+#pragma omp parallel default(shared)
+;
+})";
+  EXPECT_TRUE(notMatchesWithOpenMP(Source2, Matcher));
+
+  const std::string Source3 = R"(
+void x() {
+#pragma omp parallel default(none)
+;
+})";
+  EXPECT_TRUE(notMatchesWithOpenMP(Source3, Matcher));
+
+  const std::string Source4 = R"(
+void x(int x) {
+#pragma omp parallel default(firstprivate)
+;
+})";
+  EXPECT_TRUE(matchesWithOpenMP51(Source4, Matcher));
+
+  const std::string Source5 = R"(
+void x(int x) {
 #pragma omp parallel num_threads(x)
 ;
 })";
-  EXPECT_TRUE(notMatchesWithOpenMP(Source4, Matcher));
+  EXPECT_TRUE(notMatchesWithOpenMP(Source5, Matcher));
 }
 
-TEST(OMPExecutableDirective, isAllowedToContainClauseKind) {
+TEST_P(ASTMatchersTest, OMPExecutableDirective_IsAllowedToContainClauseKind) {
   auto Matcher = ompExecutableDirective(
       isAllowedToContainClauseKind(llvm::omp::OMPC_default));
 
@@ -2984,27 +3627,37 @@ void x() {
   EXPECT_TRUE(matchesWithOpenMP(Source3, Matcher));
 
   StringRef Source4 = R"(
+void x() {
+#pragma omp parallel default(firstprivate)
+;
+})";
+  EXPECT_TRUE(matchesWithOpenMP51(Source4, Matcher));
+
+  StringRef Source5 = R"(
 void x(int x) {
 #pragma omp parallel num_threads(x)
 ;
 })";
-  EXPECT_TRUE(matchesWithOpenMP(Source4, Matcher));
+  EXPECT_TRUE(matchesWithOpenMP(Source5, Matcher));
 
-  StringRef Source5 = R"(
+  StringRef Source6 = R"(
 void x() {
 #pragma omp taskyield
 })";
-  EXPECT_TRUE(notMatchesWithOpenMP(Source5, Matcher));
+  EXPECT_TRUE(notMatchesWithOpenMP(Source6, Matcher));
 
-  StringRef Source6 = R"(
+  StringRef Source7 = R"(
 void x() {
 #pragma omp task
 ;
 })";
-  EXPECT_TRUE(matchesWithOpenMP(Source6, Matcher));
+  EXPECT_TRUE(matchesWithOpenMP(Source7, Matcher));
 }
 
-TEST(HasAnyBase, DirectBase) {
+TEST_P(ASTMatchersTest, HasAnyBase_DirectBase) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches(
       "struct Base {};"
       "struct ExpectedMatch : Base {};",
@@ -3012,7 +3665,10 @@ TEST(HasAnyBase, DirectBase) {
                     hasAnyBase(hasType(cxxRecordDecl(hasName("Base")))))));
 }
 
-TEST(HasAnyBase, IndirectBase) {
+TEST_P(ASTMatchersTest, HasAnyBase_IndirectBase) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches(
       "struct Base {};"
       "struct Intermediate : Base {};"
@@ -3021,97 +3677,145 @@ TEST(HasAnyBase, IndirectBase) {
                     hasAnyBase(hasType(cxxRecordDecl(hasName("Base")))))));
 }
 
-TEST(HasAnyBase, NoBase) {
+TEST_P(ASTMatchersTest, HasAnyBase_NoBase) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("struct Foo {};"
                          "struct Bar {};",
                          cxxRecordDecl(hasAnyBase(hasType(cxxRecordDecl())))));
 }
 
-TEST(IsPublicBase, Public) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPublic_Public) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches("class Base {};"
                       "class Derived : public Base {};",
                       cxxRecordDecl(hasAnyBase(isPublic()))));
 }
 
-TEST(IsPublicBase, DefaultAccessSpecifierPublic) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPublic_DefaultAccessSpecifierPublic) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches("class Base {};"
                       "struct Derived : Base {};",
                       cxxRecordDecl(hasAnyBase(isPublic()))));
 }
 
-TEST(IsPublicBase, Private) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPublic_Private) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : private Base {};",
                          cxxRecordDecl(hasAnyBase(isPublic()))));
 }
 
-TEST(IsPublicBase, DefaultAccessSpecifierPrivate) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPublic_DefaultAccessSpecifierPrivate) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : Base {};",
                          cxxRecordDecl(hasAnyBase(isPublic()))));
 }
 
-TEST(IsPublicBase, Protected) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPublic_Protected) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : protected Base {};",
                          cxxRecordDecl(hasAnyBase(isPublic()))));
 }
 
-TEST(IsPrivateBase, Private) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPrivate_Private) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches("class Base {};"
                       "class Derived : private Base {};",
                       cxxRecordDecl(hasAnyBase(isPrivate()))));
 }
 
-TEST(IsPrivateBase, DefaultAccessSpecifierPrivate) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPrivate_DefaultAccessSpecifierPrivate) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches("struct Base {};"
                       "class Derived : Base {};",
                       cxxRecordDecl(hasAnyBase(isPrivate()))));
 }
 
-TEST(IsPrivateBase, Public) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPrivate_Public) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : public Base {};",
                          cxxRecordDecl(hasAnyBase(isPrivate()))));
 }
 
-TEST(IsPrivateBase, DefaultAccessSpecifierPublic) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPrivate_DefaultAccessSpecifierPublic) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "struct Derived : Base {};",
                          cxxRecordDecl(hasAnyBase(isPrivate()))));
 }
 
-TEST(IsPrivateBase, Protected) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPrivate_Protected) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : protected Base {};",
                          cxxRecordDecl(hasAnyBase(isPrivate()))));
 }
 
-TEST(IsProtectedBase, Protected) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsProtected_Protected) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches("class Base {};"
                       "class Derived : protected Base {};",
                       cxxRecordDecl(hasAnyBase(isProtected()))));
 }
 
-TEST(IsProtectedBase, Public) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsProtected_Public) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : public Base {};",
                          cxxRecordDecl(hasAnyBase(isProtected()))));
 }
 
-TEST(IsProtectedBase, Private) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsProtected_Private) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : private Base {};",
                          cxxRecordDecl(hasAnyBase(isProtected()))));
 }
 
-TEST(IsVirtual, Directly) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsVirtual_Directly) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches("class Base {};"
                       "class Derived : virtual Base {};",
                       cxxRecordDecl(hasAnyBase(isVirtual()))));
 }
 
-TEST(IsVirtual, Indirectly) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsVirtual_Indirectly) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(
       matches("class Base {};"
               "class Intermediate : virtual Base {};"
@@ -3119,13 +3823,20 @@ TEST(IsVirtual, Indirectly) {
               cxxRecordDecl(hasName("Derived"), hasAnyBase(isVirtual()))));
 }
 
-TEST(IsVirtual, NoVirtualBase) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsVirtual_NoVirtualBase) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : Base {};",
                          cxxRecordDecl(hasAnyBase(isVirtual()))));
 }
 
-TEST(BaseSpecifier, hasDirectBase) {
+TEST_P(ASTMatchersTest, HasDirectBase) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches(
       R"cc(
     class Base {};
diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
index 59e0f74b3910b..895c8ae48adc1 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
@@ -118,13 +118,13 @@ TEST_P(ASTMatchersTest, TranslationUnitDecl) {
                    "int MyVar2;\n"
                    "}  // namespace NameSpace\n";
   EXPECT_TRUE(matches(
-    Code, varDecl(hasName("MyVar1"), hasDeclContext(translationUnitDecl()))));
+      Code, varDecl(hasName("MyVar1"), hasDeclContext(translationUnitDecl()))));
   EXPECT_FALSE(matches(
-    Code, varDecl(hasName("MyVar2"), hasDeclContext(translationUnitDecl()))));
+      Code, varDecl(hasName("MyVar2"), hasDeclContext(translationUnitDecl()))));
   EXPECT_TRUE(matches(
-    Code,
-    varDecl(hasName("MyVar2"),
-            hasDeclContext(decl(hasDeclContext(translationUnitDecl()))))));
+      Code,
+      varDecl(hasName("MyVar2"),
+              hasDeclContext(decl(hasDeclContext(translationUnitDecl()))))));
 }
 
 TEST_P(ASTMatchersTest, LinkageSpecDecl) {
@@ -158,10 +158,10 @@ TEST_P(ASTMatchersTest,
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(notMatches("template class X { };"
-                           "template<> class X { int a; };",
-                         classTemplateDecl(hasName("X"),
-                                           hasDescendant(fieldDecl(hasName("a"))))));
+  EXPECT_TRUE(notMatches(
+      "template class X { };"
+      "template<> class X { int a; };",
+      classTemplateDecl(hasName("X"), hasDescendant(fieldDecl(hasName("a"))))));
 }
 
 TEST_P(ASTMatchersTest,
@@ -169,18 +169,17 @@ TEST_P(ASTMatchersTest,
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(notMatches("template class X { };"
-                           "template class X { int a; };",
-                         classTemplateDecl(hasName("X"),
-                                           hasDescendant(fieldDecl(hasName("a"))))));
+  EXPECT_TRUE(notMatches(
+      "template class X { };"
+      "template class X { int a; };",
+      classTemplateDecl(hasName("X"), hasDescendant(fieldDecl(hasName("a"))))));
 }
 
 TEST(ASTMatchersTestCUDA, CUDAKernelCallExpr) {
   EXPECT_TRUE(matchesWithCuda("__global__ void f() { }"
-                                "void g() { f<<<1, 2>>>(); }",
+                              "void g() { f<<<1, 2>>>(); }",
                               cudaKernelCallExpr()));
-  EXPECT_TRUE(notMatchesWithCuda("void f() {}",
-                                 cudaKernelCallExpr()));
+  EXPECT_TRUE(notMatchesWithCuda("void f() {}", cudaKernelCallExpr()));
 }
 
 TEST(ASTMatchersTestCUDA, HasAttrCUDA) {
@@ -316,56 +315,50 @@ TEST_P(ASTMatchersTest, CallExpr_CXX) {
   // FIXME: Do we want to overload Call() to directly take
   // Matcher, too?
   StatementMatcher MethodX =
-    callExpr(hasDeclaration(cxxMethodDecl(hasName("x"))));
+      callExpr(hasDeclaration(cxxMethodDecl(hasName("x"))));
 
   EXPECT_TRUE(matches("class Y { void x() { x(); } };", MethodX));
   EXPECT_TRUE(notMatches("class Y { void x() {} };", MethodX));
 
   StatementMatcher MethodOnY =
-    cxxMemberCallExpr(on(hasType(recordDecl(hasName("Y")))));
+      cxxMemberCallExpr(on(hasType(recordDecl(hasName("Y")))));
 
-  EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z() { Y y; y.x(); }",
-            MethodOnY));
-  EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z(Y &y) { y.x(); }",
-            MethodOnY));
-  EXPECT_TRUE(
-    notMatches("class Y { public: void x(); }; void z(Y *&y) { y->x(); }",
-               MethodOnY));
-  EXPECT_TRUE(
-    notMatches("class Y { public: void x(); }; void z(Y y[]) { y->x(); }",
-               MethodOnY));
-  EXPECT_TRUE(
-    notMatches("class Y { public: void x(); }; void z() { Y *y; y->x(); }",
-               MethodOnY));
+  EXPECT_TRUE(matches("class Y { public: void x(); }; void z() { Y y; y.x(); }",
+                      MethodOnY));
+  EXPECT_TRUE(matches("class Y { public: void x(); }; void z(Y &y) { y.x(); }",
+                      MethodOnY));
+  EXPECT_TRUE(notMatches(
+      "class Y { public: void x(); }; void z(Y *&y) { y->x(); }", MethodOnY));
+  EXPECT_TRUE(notMatches(
+      "class Y { public: void x(); }; void z(Y y[]) { y->x(); }", MethodOnY));
+  EXPECT_TRUE(notMatches(
+      "class Y { public: void x(); }; void z() { Y *y; y->x(); }", MethodOnY));
 
   StatementMatcher MethodOnYPointer =
-    cxxMemberCallExpr(on(hasType(pointsTo(recordDecl(hasName("Y"))))));
+      cxxMemberCallExpr(on(hasType(pointsTo(recordDecl(hasName("Y"))))));
 
   EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z() { Y *y; y->x(); }",
-            MethodOnYPointer));
+      matches("class Y { public: void x(); }; void z() { Y *y; y->x(); }",
+              MethodOnYPointer));
   EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z(Y *&y) { y->x(); }",
-            MethodOnYPointer));
+      matches("class Y { public: void x(); }; void z(Y *&y) { y->x(); }",
+              MethodOnYPointer));
   EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z(Y y[]) { y->x(); }",
-            MethodOnYPointer));
+      matches("class Y { public: void x(); }; void z(Y y[]) { y->x(); }",
+              MethodOnYPointer));
   EXPECT_TRUE(
-    notMatches("class Y { public: void x(); }; void z() { Y y; y.x(); }",
-               MethodOnYPointer));
+      notMatches("class Y { public: void x(); }; void z() { Y y; y.x(); }",
+                 MethodOnYPointer));
   EXPECT_TRUE(
-    notMatches("class Y { public: void x(); }; void z(Y &y) { y.x(); }",
-               MethodOnYPointer));
+      notMatches("class Y { public: void x(); }; void z(Y &y) { y.x(); }",
+                 MethodOnYPointer));
 }
 
 TEST_P(ASTMatchersTest, LambdaExpr) {
   if (!GetParam().isCXX11OrLater()) {
     return;
   }
-  EXPECT_TRUE(matches("auto f = [] (int i) { return i; };",
-                      lambdaExpr()));
+  EXPECT_TRUE(matches("auto f = [] (int i) { return i; };", lambdaExpr()));
 }
 
 TEST_P(ASTMatchersTest, CXXForRangeStmt) {
@@ -378,7 +371,7 @@ TEST_P(ASTMatchersTest, CXXForRangeStmt_CXX11) {
     return;
   }
   EXPECT_TRUE(matches("int as[] = { 1, 2, 3 };"
-                        "void f() { for (auto &a : as); }",
+                      "void f() { for (auto &a : as); }",
                       cxxForRangeStmt()));
 }
 
@@ -387,15 +380,13 @@ TEST_P(ASTMatchersTest, SubstNonTypeTemplateParmExpr) {
     return;
   }
   EXPECT_FALSE(matches("template\n"
-                         "struct A {  static const int n = 0; };\n"
-                         "struct B : public A<42> {};",
-                         traverse(TK_AsIs,
-                       substNonTypeTemplateParmExpr())));
+                       "struct A {  static const int n = 0; };\n"
+                       "struct B : public A<42> {};",
+                       traverse(TK_AsIs, substNonTypeTemplateParmExpr())));
   EXPECT_TRUE(matches("template\n"
-                        "struct A {  static const int n = N; };\n"
-                        "struct B : public A<42> {};",
-                         traverse(TK_AsIs,
-                      substNonTypeTemplateParmExpr())));
+                      "struct A {  static const int n = N; };\n"
+                      "struct B : public A<42> {};",
+                      traverse(TK_AsIs, substNonTypeTemplateParmExpr())));
 }
 
 TEST_P(ASTMatchersTest, NonTypeTemplateParmDecl) {
@@ -405,7 +396,7 @@ TEST_P(ASTMatchersTest, NonTypeTemplateParmDecl) {
   EXPECT_TRUE(matches("template  void f();",
                       nonTypeTemplateParmDecl(hasName("N"))));
   EXPECT_TRUE(
-    notMatches("template  void f();", nonTypeTemplateParmDecl()));
+      notMatches("template  void f();", nonTypeTemplateParmDecl()));
 }
 
 TEST_P(ASTMatchersTest, TemplateTypeParmDecl) {
@@ -414,8 +405,7 @@ TEST_P(ASTMatchersTest, TemplateTypeParmDecl) {
   }
   EXPECT_TRUE(matches("template  void f();",
                       templateTypeParmDecl(hasName("T"))));
-  EXPECT_TRUE(
-    notMatches("template  void f();", templateTypeParmDecl()));
+  EXPECT_TRUE(notMatches("template  void f();", templateTypeParmDecl()));
 }
 
 TEST_P(ASTMatchersTest, UserDefinedLiteral) {
@@ -423,9 +413,9 @@ TEST_P(ASTMatchersTest, UserDefinedLiteral) {
     return;
   }
   EXPECT_TRUE(matches("constexpr char operator \"\" _inc (const char i) {"
-                        "  return i + 1;"
-                        "}"
-                        "char c = 'a'_inc;",
+                      "  return i + 1;"
+                      "}"
+                      "char c = 'a'_inc;",
                       userDefinedLiteral()));
 }
 
@@ -434,9 +424,7 @@ TEST_P(ASTMatchersTest, FlowControl) {
   EXPECT_TRUE(matches("void f() { while(1) { continue; } }", continueStmt()));
   EXPECT_TRUE(matches("void f() { goto FOO; FOO: ;}", gotoStmt()));
   EXPECT_TRUE(matches("void f() { goto FOO; FOO: ;}",
-                      labelStmt(
-                        hasDeclaration(
-                          labelDecl(hasName("FOO"))))));
+                      labelStmt(hasDeclaration(labelDecl(hasName("FOO"))))));
   EXPECT_TRUE(matches("void f() { FOO: ; void *ptr = &&FOO; goto *ptr; }",
                       addrLabelExpr()));
   EXPECT_TRUE(matches("void f() { return; }", returnStmt()));
@@ -450,8 +438,9 @@ TEST_P(ASTMatchersTest, CXXOperatorCallExpr) {
   StatementMatcher OpCall = cxxOperatorCallExpr();
   // Unary operator
   EXPECT_TRUE(matches("class Y { }; "
-                        "bool operator!(Y x) { return false; }; "
-                        "Y y; bool c = !y;", OpCall));
+                      "bool operator!(Y x) { return false; }; "
+                      "Y y; bool c = !y;",
+                      OpCall));
   // No match -- special operators like "new", "delete"
   // FIXME: operator new takes size_t, for which we need stddef.h, for which
   // we need to figure out include paths in the test.
@@ -460,12 +449,13 @@ TEST_P(ASTMatchersTest, CXXOperatorCallExpr) {
   //             "void *operator new(size_t size) { return 0; } "
   //             "Y *y = new Y;", OpCall));
   EXPECT_TRUE(notMatches("class Y { }; "
-                           "void operator delete(void *p) { } "
-                           "void a() {Y *y = new Y; delete y;}", OpCall));
+                         "void operator delete(void *p) { } "
+                         "void a() {Y *y = new Y; delete y;}",
+                         OpCall));
   // Binary operator
   EXPECT_TRUE(matches("class Y { }; "
-                        "bool operator&&(Y x, Y y) { return true; }; "
-                        "Y a; Y b; bool c = a && b;",
+                      "bool operator&&(Y x, Y y) { return true; }; "
+                      "Y a; Y b; bool c = a && b;",
                       OpCall));
   // No match -- normal operator, not an overloaded one.
   EXPECT_TRUE(notMatches("bool x = true, y = true; bool t = x && y;", OpCall));
@@ -481,30 +471,25 @@ TEST_P(ASTMatchersTest, ThisPointerType) {
       traverse(ast_type_traits::TK_AsIs,
                cxxMemberCallExpr(thisPointerType(recordDecl(hasName("Y")))));
 
-  EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z() { Y y; y.x(); }",
-            MethodOnY));
-  EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z(Y &y) { y.x(); }",
-            MethodOnY));
-  EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z(Y *&y) { y->x(); }",
-            MethodOnY));
-  EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z(Y y[]) { y->x(); }",
-            MethodOnY));
-  EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z() { Y *y; y->x(); }",
-            MethodOnY));
-
+  EXPECT_TRUE(matches("class Y { public: void x(); }; void z() { Y y; y.x(); }",
+                      MethodOnY));
+  EXPECT_TRUE(matches("class Y { public: void x(); }; void z(Y &y) { y.x(); }",
+                      MethodOnY));
   EXPECT_TRUE(matches(
-    "class Y {"
-      "  public: virtual void x();"
-      "};"
-      "class X : public Y {"
-      "  public: virtual void x();"
-      "};"
-      "void z() { X *x; x->Y::x(); }", MethodOnY));
+      "class Y { public: void x(); }; void z(Y *&y) { y->x(); }", MethodOnY));
+  EXPECT_TRUE(matches(
+      "class Y { public: void x(); }; void z(Y y[]) { y->x(); }", MethodOnY));
+  EXPECT_TRUE(matches(
+      "class Y { public: void x(); }; void z() { Y *y; y->x(); }", MethodOnY));
+
+  EXPECT_TRUE(matches("class Y {"
+                      "  public: virtual void x();"
+                      "};"
+                      "class X : public Y {"
+                      "  public: virtual void x();"
+                      "};"
+                      "void z() { X *x; x->Y::x(); }",
+                      MethodOnY));
 }
 
 TEST_P(ASTMatchersTest, DeclRefExpr) {
@@ -512,29 +497,27 @@ TEST_P(ASTMatchersTest, DeclRefExpr) {
     // FIXME: Add a test for `declRefExpr()` that does not depend on C++.
     return;
   }
-  StatementMatcher Reference =
-    declRefExpr(to(
-      varDecl(hasInitializer(
-        cxxMemberCallExpr(thisPointerType(recordDecl(hasName("Y"))))))));
+  StatementMatcher Reference = declRefExpr(to(varDecl(hasInitializer(
+      cxxMemberCallExpr(thisPointerType(recordDecl(hasName("Y"))))))));
 
-  EXPECT_TRUE(matches(
-    "class Y {"
-      " public:"
-      "  bool x() const;"
-      "};"
-      "void z(const Y &y) {"
-      "  bool b = y.x();"
-      "  if (b) {}"
-      "}", Reference));
+  EXPECT_TRUE(matches("class Y {"
+                      " public:"
+                      "  bool x() const;"
+                      "};"
+                      "void z(const Y &y) {"
+                      "  bool b = y.x();"
+                      "  if (b) {}"
+                      "}",
+                      Reference));
 
-  EXPECT_TRUE(notMatches(
-    "class Y {"
-      " public:"
-      "  bool x() const;"
-      "};"
-      "void z(const Y &y) {"
-      "  bool b = y.x();"
-      "}", Reference));
+  EXPECT_TRUE(notMatches("class Y {"
+                         " public:"
+                         "  bool x() const;"
+                         "};"
+                         "void z(const Y &y) {"
+                         "  bool b = y.x();"
+                         "}",
+                         Reference));
 }
 
 TEST_P(ASTMatchersTest, CXXMemberCallExpr) {
@@ -542,32 +525,32 @@ TEST_P(ASTMatchersTest, CXXMemberCallExpr) {
     return;
   }
   StatementMatcher CallOnVariableY =
-    cxxMemberCallExpr(on(declRefExpr(to(varDecl(hasName("y"))))));
-
-  EXPECT_TRUE(matches(
-    "class Y { public: void x() { Y y; y.x(); } };", CallOnVariableY));
-  EXPECT_TRUE(matches(
-    "class Y { public: void x() const { Y y; y.x(); } };", CallOnVariableY));
-  EXPECT_TRUE(matches(
-    "class Y { public: void x(); };"
-      "class X : public Y { void z() { X y; y.x(); } };", CallOnVariableY));
-  EXPECT_TRUE(matches(
-    "class Y { public: void x(); };"
-      "class X : public Y { void z() { X *y; y->x(); } };", CallOnVariableY));
+      cxxMemberCallExpr(on(declRefExpr(to(varDecl(hasName("y"))))));
+
+  EXPECT_TRUE(matches("class Y { public: void x() { Y y; y.x(); } };",
+                      CallOnVariableY));
+  EXPECT_TRUE(matches("class Y { public: void x() const { Y y; y.x(); } };",
+                      CallOnVariableY));
+  EXPECT_TRUE(matches("class Y { public: void x(); };"
+                      "class X : public Y { void z() { X y; y.x(); } };",
+                      CallOnVariableY));
+  EXPECT_TRUE(matches("class Y { public: void x(); };"
+                      "class X : public Y { void z() { X *y; y->x(); } };",
+                      CallOnVariableY));
   EXPECT_TRUE(notMatches(
-    "class Y { public: void x(); };"
+      "class Y { public: void x(); };"
       "class X : public Y { void z() { unsigned long y; ((X*)y)->x(); } };",
-    CallOnVariableY));
+      CallOnVariableY));
 }
 
 TEST_P(ASTMatchersTest, UnaryExprOrTypeTraitExpr) {
-  EXPECT_TRUE(matches("void x() { int a = sizeof(a); }",
-                      unaryExprOrTypeTraitExpr()));
+  EXPECT_TRUE(
+      matches("void x() { int a = sizeof(a); }", unaryExprOrTypeTraitExpr()));
 }
 
 TEST_P(ASTMatchersTest, AlignOfExpr) {
-  EXPECT_TRUE(notMatches("void x() { int a = sizeof(a); }",
-                         alignOfExpr(anything())));
+  EXPECT_TRUE(
+      notMatches("void x() { int a = sizeof(a); }", alignOfExpr(anything())));
   // FIXME: Uncomment once alignof is enabled.
   // EXPECT_TRUE(matches("void x() { int a = alignof(a); }",
   //                     unaryExprOrTypeTraitExpr()));
@@ -603,11 +586,10 @@ TEST_P(ASTMatchersTest, MemberExpr_MatchesVariable) {
     return;
   }
   EXPECT_TRUE(
-    matches("class Y { void x() { this->y; } int y; };", memberExpr()));
-  EXPECT_TRUE(
-    matches("class Y { void x() { y; } int y; };", memberExpr()));
+      matches("class Y { void x() { this->y; } int y; };", memberExpr()));
+  EXPECT_TRUE(matches("class Y { void x() { y; } int y; };", memberExpr()));
   EXPECT_TRUE(
-    matches("class Y { void x() { Y y; y.y; } int y; };", memberExpr()));
+      matches("class Y { void x() { Y y; y.y; } int y; };", memberExpr()));
   EXPECT_TRUE(matches("template "
                       "class X : T { void f() { this->T::v; } };",
                       cxxDependentScopeMemberExpr()));
@@ -623,8 +605,8 @@ TEST_P(ASTMatchersTest, MemberExpr_MatchesStaticVariable) {
   }
   EXPECT_TRUE(matches("class Y { void x() { this->y; } static int y; };",
                       memberExpr()));
-  EXPECT_TRUE(notMatches("class Y { void x() { y; } static int y; };",
-                         memberExpr()));
+  EXPECT_TRUE(
+      notMatches("class Y { void x() { y; } static int y; };", memberExpr()));
   EXPECT_TRUE(notMatches("class Y { void x() { Y::y; } static int y; };",
                          memberExpr()));
 }
@@ -658,21 +640,21 @@ TEST_P(ASTMatchersTest, FunctionDecl_CXX) {
   if (!GetParam().hasDelayedTemplateParsing()) {
     // FIXME: Fix this test to work with delayed template parsing.
     // Dependent contexts, but a non-dependent call.
-    EXPECT_TRUE(matches("void f(); template  void g() { f(); }",
-                        CallFunctionF));
     EXPECT_TRUE(
-      matches("void f(); template  struct S { void g() { f(); } };",
-              CallFunctionF));
+        matches("void f(); template  void g() { f(); }", CallFunctionF));
+    EXPECT_TRUE(
+        matches("void f(); template  struct S { void g() { f(); } };",
+                CallFunctionF));
   }
 
   // Depedent calls don't match.
   EXPECT_TRUE(
-    notMatches("void f(int); template  void g(T t) { f(t); }",
-               CallFunctionF));
+      notMatches("void f(int); template  void g(T t) { f(t); }",
+                 CallFunctionF));
   EXPECT_TRUE(
-    notMatches("void f(int);"
+      notMatches("void f(int);"
                  "template  struct S { void g(T t) { f(t); } };",
-               CallFunctionF));
+                 CallFunctionF));
 
   EXPECT_TRUE(matches("void f(...);", functionDecl(isVariadic())));
   EXPECT_TRUE(matches("void f(...);", functionDecl(parameterCountIs(0))));
@@ -692,9 +674,8 @@ TEST_P(ASTMatchersTest,
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(
-    matches("template  void f(T t) {}",
-            functionTemplateDecl(hasName("f"))));
+  EXPECT_TRUE(matches("template  void f(T t) {}",
+                      functionTemplateDecl(hasName("f"))));
 }
 
 TEST_P(ASTMatchersTest, FunctionTemplate_DoesNotMatchFunctionDeclarations) {
@@ -709,12 +690,11 @@ TEST_P(ASTMatchersTest,
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(
-    notMatches("void g(); template  void f(T t) {}"
-                 "template <> void f(int t) { g(); }",
-               functionTemplateDecl(hasName("f"),
-                                    hasDescendant(declRefExpr(to(
-                                      functionDecl(hasName("g"))))))));
+  EXPECT_TRUE(notMatches(
+      "void g(); template  void f(T t) {}"
+      "template <> void f(int t) { g(); }",
+      functionTemplateDecl(hasName("f"), hasDescendant(declRefExpr(to(
+                                             functionDecl(hasName("g"))))))));
 }
 
 TEST_P(ASTMatchersTest, ClassTemplateSpecializationDecl) {
@@ -722,7 +702,7 @@ TEST_P(ASTMatchersTest, ClassTemplateSpecializationDecl) {
     return;
   }
   EXPECT_TRUE(matches("template struct A {};"
-                        "template<> struct A {};",
+                      "template<> struct A {};",
                       classTemplateSpecializationDecl()));
   EXPECT_TRUE(matches("template struct A {}; A a;",
                       classTemplateSpecializationDecl()));
@@ -756,13 +736,11 @@ TEST_P(ASTMatchersTest, Matcher_ConstructorCall) {
       traverse(ast_type_traits::TK_AsIs, cxxConstructExpr());
 
   EXPECT_TRUE(
-    matches("class X { public: X(); }; void x() { X x; }", Constructor));
-  EXPECT_TRUE(
-    matches("class X { public: X(); }; void x() { X x = X(); }",
-            Constructor));
-  EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { X x = 0; }",
-            Constructor));
+      matches("class X { public: X(); }; void x() { X x; }", Constructor));
+  EXPECT_TRUE(matches("class X { public: X(); }; void x() { X x = X(); }",
+                      Constructor));
+  EXPECT_TRUE(matches("class X { public: X(int); }; void x() { X x = 0; }",
+                      Constructor));
   EXPECT_TRUE(matches("class X {}; void x(int) { X x; }", Constructor));
 }
 
@@ -779,9 +757,9 @@ TEST_P(ASTMatchersTest, Matcher_ThisExpr) {
     return;
   }
   EXPECT_TRUE(
-    matches("struct X { int a; int f () { return a; } };", cxxThisExpr()));
+      matches("struct X { int a; int f () { return a; } };", cxxThisExpr()));
   EXPECT_TRUE(
-    notMatches("struct X { int f () { int a; return a; } };", cxxThisExpr()));
+      notMatches("struct X { int f () { int a; return a; } };", cxxThisExpr()));
 }
 
 TEST_P(ASTMatchersTest, Matcher_BindTemporaryExpression) {
@@ -794,30 +772,27 @@ TEST_P(ASTMatchersTest, Matcher_BindTemporaryExpression) {
 
   StringRef ClassString = "class string { public: string(); ~string(); }; ";
 
-  EXPECT_TRUE(
-    matches(ClassString +
-              "string GetStringByValue();"
-                "void FunctionTakesString(string s);"
-                "void run() { FunctionTakesString(GetStringByValue()); }",
-            TempExpression));
+  EXPECT_TRUE(matches(
+      ClassString + "string GetStringByValue();"
+                    "void FunctionTakesString(string s);"
+                    "void run() { FunctionTakesString(GetStringByValue()); }",
+      TempExpression));
 
-  EXPECT_TRUE(
-    notMatches(ClassString +
-                 "string* GetStringPointer(); "
-                   "void FunctionTakesStringPtr(string* s);"
-                   "void run() {"
-                   "  string* s = GetStringPointer();"
-                   "  FunctionTakesStringPtr(GetStringPointer());"
-                   "  FunctionTakesStringPtr(s);"
-                   "}",
-               TempExpression));
+  EXPECT_TRUE(notMatches(ClassString +
+                             "string* GetStringPointer(); "
+                             "void FunctionTakesStringPtr(string* s);"
+                             "void run() {"
+                             "  string* s = GetStringPointer();"
+                             "  FunctionTakesStringPtr(GetStringPointer());"
+                             "  FunctionTakesStringPtr(s);"
+                             "}",
+                         TempExpression));
 
-  EXPECT_TRUE(
-    notMatches("class no_dtor {};"
-                 "no_dtor GetObjByValue();"
-                 "void ConsumeObj(no_dtor param);"
-                 "void run() { ConsumeObj(GetObjByValue()); }",
-               TempExpression));
+  EXPECT_TRUE(notMatches("class no_dtor {};"
+                         "no_dtor GetObjByValue();"
+                         "void ConsumeObj(no_dtor param);"
+                         "void run() { ConsumeObj(GetObjByValue()); }",
+                         TempExpression));
 }
 
 TEST_P(ASTMatchersTest, MaterializeTemporaryExpr_MatchesTemporaryCXX11CXX14) {
@@ -872,10 +847,9 @@ TEST_P(ASTMatchersTest, Matcher_NewExpression) {
   StatementMatcher New = cxxNewExpr();
 
   EXPECT_TRUE(matches("class X { public: X(); }; void x() { new X; }", New));
+  EXPECT_TRUE(matches("class X { public: X(); }; void x() { new X(); }", New));
   EXPECT_TRUE(
-    matches("class X { public: X(); }; void x() { new X(); }", New));
-  EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { new X(0); }", New));
+      matches("class X { public: X(int); }; void x() { new X(0); }", New));
   EXPECT_TRUE(matches("class X {}; void x(int) { new X; }", New));
 }
 
@@ -883,8 +857,8 @@ TEST_P(ASTMatchersTest, Matcher_DeleteExpression) {
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(matches("struct A {}; void f(A* a) { delete a; }",
-                      cxxDeleteExpr()));
+  EXPECT_TRUE(
+      matches("struct A {}; void f(A* a) { delete a; }", cxxDeleteExpr()));
 }
 
 TEST_P(ASTMatchersTest, Matcher_NoexceptExpression) {
@@ -907,7 +881,7 @@ TEST_P(ASTMatchersTest, Matcher_DefaultArgument) {
   StatementMatcher Arg = cxxDefaultArgExpr();
   EXPECT_TRUE(matches("void x(int, int = 0) { int y; x(y); }", Arg));
   EXPECT_TRUE(
-    matches("class X { void x(int, int = 0) { int y; x(y); } };", Arg));
+      matches("class X { void x(int, int = 0) { int y; x(y); } };", Arg));
   EXPECT_TRUE(notMatches("void x(int, int = 0) { int y; x(y, 0); }", Arg));
 }
 
@@ -951,7 +925,7 @@ TEST_P(ASTMatchersTest, IntegerLiteral) {
 
   // Non-matching cases (character literals, float and double)
   EXPECT_TRUE(notMatches("int i = L'a';",
-                         HasIntLiteral));  // this is actually a character
+                         HasIntLiteral)); // this is actually a character
   // literal cast to int
   EXPECT_TRUE(notMatches("int i = 'a';", HasIntLiteral));
   EXPECT_TRUE(notMatches("int i = 1e10;", HasIntLiteral));
@@ -974,13 +948,13 @@ TEST_P(ASTMatchersTest, FloatLiteral) {
   EXPECT_TRUE(matches("double i = 5.0;", floatLiteral(equals(5.0))));
   EXPECT_TRUE(matches("double i = 5.0;", floatLiteral(equals(5.0f))));
   EXPECT_TRUE(
-    matches("double i = 5.0;", floatLiteral(equals(llvm::APFloat(5.0)))));
+      matches("double i = 5.0;", floatLiteral(equals(llvm::APFloat(5.0)))));
 
   EXPECT_TRUE(notMatches("float i = 10;", HasFloatLiteral));
   EXPECT_TRUE(notMatches("double i = 5.0;", floatLiteral(equals(6.0))));
   EXPECT_TRUE(notMatches("double i = 5.0;", floatLiteral(equals(6.0f))));
   EXPECT_TRUE(
-    notMatches("double i = 5.0;", floatLiteral(equals(llvm::APFloat(6.0)))));
+      notMatches("double i = 5.0;", floatLiteral(equals(llvm::APFloat(6.0)))));
 }
 
 TEST_P(ASTMatchersTest, CXXNullPtrLiteralExpr) {
@@ -1051,9 +1025,9 @@ TEST_P(ASTMatchersTest, ParenListExpr) {
     return;
   }
   EXPECT_TRUE(
-    matches("template class foo { void bar() { foo X(*this); } };"
+      matches("template class foo { void bar() { foo X(*this); } };"
               "template class foo;",
-            varDecl(hasInitializer(parenListExpr(has(unaryOperator()))))));
+              varDecl(hasInitializer(parenListExpr(has(unaryOperator()))))));
 }
 
 TEST_P(ASTMatchersTest, StmtExpr) {
@@ -1064,9 +1038,8 @@ TEST_P(ASTMatchersTest, StmtExpr) {
 TEST_P(ASTMatchersTest, PredefinedExpr) {
   // __func__ expands as StringLiteral("foo")
   EXPECT_TRUE(matches("void foo() { __func__; }",
-                      predefinedExpr(
-                        hasType(asString("const char [4]")),
-                        has(stringLiteral()))));
+                      predefinedExpr(hasType(asString("const char [4]")),
+                                     has(stringLiteral()))));
 }
 
 TEST_P(ASTMatchersTest, AsmStatement) {
@@ -1080,7 +1053,7 @@ TEST_P(ASTMatchersTest, HasCondition) {
   }
 
   StatementMatcher Condition =
-    ifStmt(hasCondition(cxxBoolLiteral(equals(true))));
+      ifStmt(hasCondition(cxxBoolLiteral(equals(true))));
 
   EXPECT_TRUE(matches("void x() { if (true) {} }", Condition));
   EXPECT_TRUE(notMatches("void x() { if (false) {} }", Condition));
@@ -1096,24 +1069,24 @@ TEST_P(ASTMatchersTest, ConditionalOperator) {
     return;
   }
 
-  StatementMatcher Conditional = conditionalOperator(
-    hasCondition(cxxBoolLiteral(equals(true))),
-    hasTrueExpression(cxxBoolLiteral(equals(false))));
+  StatementMatcher Conditional =
+      conditionalOperator(hasCondition(cxxBoolLiteral(equals(true))),
+                          hasTrueExpression(cxxBoolLiteral(equals(false))));
 
   EXPECT_TRUE(matches("void x() { true ? false : true; }", Conditional));
   EXPECT_TRUE(notMatches("void x() { false ? false : true; }", Conditional));
   EXPECT_TRUE(notMatches("void x() { true ? true : false; }", Conditional));
 
-  StatementMatcher ConditionalFalse = conditionalOperator(
-    hasFalseExpression(cxxBoolLiteral(equals(false))));
+  StatementMatcher ConditionalFalse =
+      conditionalOperator(hasFalseExpression(cxxBoolLiteral(equals(false))));
 
   EXPECT_TRUE(matches("void x() { true ? true : false; }", ConditionalFalse));
   EXPECT_TRUE(
-    notMatches("void x() { true ? false : true; }", ConditionalFalse));
+      notMatches("void x() { true ? false : true; }", ConditionalFalse));
 
   EXPECT_TRUE(matches("void x() { true ? true : false; }", ConditionalFalse));
   EXPECT_TRUE(
-    notMatches("void x() { true ? false : true; }", ConditionalFalse));
+      notMatches("void x() { true ? false : true; }", ConditionalFalse));
 }
 
 TEST_P(ASTMatchersTest, BinaryConditionalOperator) {
@@ -1132,18 +1105,17 @@ TEST_P(ASTMatchersTest, BinaryConditionalOperator) {
   EXPECT_TRUE(matches("void x() { 1 ?: 0; }", AlwaysOne));
 
   StatementMatcher FourNotFive = binaryConditionalOperator(
-    hasTrueExpression(opaqueValueExpr(
-      hasSourceExpression((integerLiteral(equals(4)))))),
-    hasFalseExpression(integerLiteral(equals(5))));
+      hasTrueExpression(
+          opaqueValueExpr(hasSourceExpression((integerLiteral(equals(4)))))),
+      hasFalseExpression(integerLiteral(equals(5))));
 
   EXPECT_TRUE(matches("void x() { 4 ?: 5; }", FourNotFive));
 }
 
 TEST_P(ASTMatchersTest, ArraySubscriptExpr) {
-  EXPECT_TRUE(matches("int i[2]; void f() { i[1] = 1; }",
-                      arraySubscriptExpr()));
-  EXPECT_TRUE(notMatches("int i; void f() { i = 1; }",
-                         arraySubscriptExpr()));
+  EXPECT_TRUE(
+      matches("int i[2]; void f() { i[1] = 1; }", arraySubscriptExpr()));
+  EXPECT_TRUE(notMatches("int i; void f() { i = 1; }", arraySubscriptExpr()));
 }
 
 TEST_P(ASTMatchersTest, ForStmt) {
@@ -1178,10 +1150,9 @@ TEST_P(ASTMatchersTest, CompoundStatement_DoesNotMatchEmptyStruct) {
   }
   // It's not a compound statement just because there's "{}" in the source
   // text. This is an AST search, not grep.
-  EXPECT_TRUE(notMatches("namespace n { struct S {}; }",
-                         compoundStmt()));
-  EXPECT_TRUE(matches("namespace n { struct S { void f() {{}} }; }",
-                      compoundStmt()));
+  EXPECT_TRUE(notMatches("namespace n { struct S {}; }", compoundStmt()));
+  EXPECT_TRUE(
+      matches("namespace n { struct S { void f() {{}} }; }", compoundStmt()));
 }
 
 TEST_P(ASTMatchersTest, CastExpr_MatchesExplicitCasts) {
@@ -1242,8 +1213,8 @@ TEST_P(ASTMatchersTest, CXXReinterpretCastExpr_DoesNotMatchOtherCasts) {
   EXPECT_TRUE(notMatches("void* p = static_cast(&p);",
                          cxxReinterpretCastExpr()));
   EXPECT_TRUE(notMatches("struct B { virtual ~B() {} }; struct D : B {};"
-                           "B b;"
-                           "D* p = dynamic_cast(&b);",
+                         "B b;"
+                         "D* p = dynamic_cast(&b);",
                          cxxReinterpretCastExpr()));
 }
 
@@ -1262,11 +1233,10 @@ TEST_P(ASTMatchersTest, CXXFunctionalCastExpr_DoesNotMatchOtherCasts) {
   }
   StringRef FooClass = "class Foo { public: Foo(const char*); };";
   EXPECT_TRUE(
-    notMatches(FooClass + "void r() { Foo f = (Foo) \"hello world\"; }",
-               cxxFunctionalCastExpr()));
-  EXPECT_TRUE(
-    notMatches(FooClass + "void r() { Foo f = \"hello world\"; }",
-               cxxFunctionalCastExpr()));
+      notMatches(FooClass + "void r() { Foo f = (Foo) \"hello world\"; }",
+                 cxxFunctionalCastExpr()));
+  EXPECT_TRUE(notMatches(FooClass + "void r() { Foo f = \"hello world\"; }",
+                         cxxFunctionalCastExpr()));
 }
 
 TEST_P(ASTMatchersTest, CXXDynamicCastExpr) {
@@ -1274,8 +1244,8 @@ TEST_P(ASTMatchersTest, CXXDynamicCastExpr) {
     return;
   }
   EXPECT_TRUE(matches("struct B { virtual ~B() {} }; struct D : B {};"
-                        "B b;"
-                        "D* p = dynamic_cast(&b);",
+                      "B b;"
+                      "D* p = dynamic_cast(&b);",
                       cxxDynamicCastExpr()));
 }
 
@@ -1283,8 +1253,7 @@ TEST_P(ASTMatchersTest, CXXStaticCastExpr_MatchesSimpleCase) {
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(matches("void* p(static_cast(&p));",
-                      cxxStaticCastExpr()));
+  EXPECT_TRUE(matches("void* p(static_cast(&p));", cxxStaticCastExpr()));
 }
 
 TEST_P(ASTMatchersTest, CXXStaticCastExpr_DoesNotMatchOtherCasts) {
@@ -1292,13 +1261,13 @@ TEST_P(ASTMatchersTest, CXXStaticCastExpr_DoesNotMatchOtherCasts) {
     return;
   }
   EXPECT_TRUE(notMatches("char* p = (char*)(&p);", cxxStaticCastExpr()));
-  EXPECT_TRUE(notMatches("char q, *p = const_cast(&q);",
-                         cxxStaticCastExpr()));
+  EXPECT_TRUE(
+      notMatches("char q, *p = const_cast(&q);", cxxStaticCastExpr()));
   EXPECT_TRUE(notMatches("void* p = reinterpret_cast(&p);",
                          cxxStaticCastExpr()));
   EXPECT_TRUE(notMatches("struct B { virtual ~B() {} }; struct D : B {};"
-                           "B b;"
-                           "D* p = dynamic_cast(&b);",
+                         "B b;"
+                         "D* p = dynamic_cast(&b);",
                          cxxStaticCastExpr()));
 }
 
@@ -1311,11 +1280,11 @@ TEST_P(ASTMatchersTest, CStyleCastExpr_DoesNotMatchOtherCasts) {
     return;
   }
   EXPECT_TRUE(notMatches("char* p = static_cast(0);"
-                           "char q, *r = const_cast(&q);"
-                           "void* s = reinterpret_cast(&s);"
-                           "struct B { virtual ~B() {} }; struct D : B {};"
-                           "B b;"
-                           "D* t = dynamic_cast(&b);",
+                         "char q, *r = const_cast(&q);"
+                         "void* s = reinterpret_cast(&s);"
+                         "struct B { virtual ~B() {} }; struct D : B {};"
+                         "B b;"
+                         "D* t = dynamic_cast(&b);",
                          cStyleCastExpr()));
 }
 
@@ -1335,12 +1304,12 @@ TEST_P(ASTMatchersTest, ImplicitCastExpr_MatchesSimpleCase) {
 }
 
 TEST_P(ASTMatchersTest, ImplicitCastExpr_DoesNotMatchIncorrectly) {
-  // This test verifies that implicitCastExpr() matches exactly when implicit casts
-  // are present, and that it ignores explicit and paren casts.
+  // This test verifies that implicitCastExpr() matches exactly when implicit
+  // casts are present, and that it ignores explicit and paren casts.
 
   // These two test cases have no casts.
-  EXPECT_TRUE(notMatches("int x = 0;",
-                         varDecl(hasInitializer(implicitCastExpr()))));
+  EXPECT_TRUE(
+      notMatches("int x = 0;", varDecl(hasInitializer(implicitCastExpr()))));
   EXPECT_TRUE(
       notMatches("int x = (0);", varDecl(hasInitializer(implicitCastExpr()))));
   EXPECT_TRUE(notMatches("void f() { int x = 0; double d = (double) x; }",
@@ -1393,7 +1362,7 @@ TEST_P(ASTMatchersTest, InitListExpr) {
   EXPECT_TRUE(matches("struct B { int x, y; }; struct B b = { 5, 6 };",
                       initListExpr(hasType(recordDecl(hasName("B"))))));
   EXPECT_TRUE(
-    matches("int i[1] = {42, [0] = 43};", integerLiteral(equals(42))));
+      matches("int i[1] = {42, [0] = 43};", integerLiteral(equals(42))));
 }
 
 TEST_P(ASTMatchersTest, InitListExpr_CXX) {
@@ -1441,8 +1410,7 @@ TEST_P(ASTMatchersTest, UsingDecl_MatchesUsingDeclarations) {
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(matches("namespace X { int x; } using X::x;",
-                      usingDecl()));
+  EXPECT_TRUE(matches("namespace X { int x; } using X::x;", usingDecl()));
 }
 
 TEST_P(ASTMatchersTest, UsingDecl_MatchesShadowUsingDelcarations) {
@@ -1460,7 +1428,7 @@ TEST_P(ASTMatchersTest, UsingDirectiveDecl_MatchesUsingNamespace) {
   EXPECT_TRUE(matches("namespace X { int x; } using namespace X;",
                       usingDirectiveDecl()));
   EXPECT_FALSE(
-    matches("namespace X { int x; } using X::x;", usingDirectiveDecl()));
+      matches("namespace X { int x; } using X::x;", usingDirectiveDecl()));
 }
 
 TEST_P(ASTMatchersTest, WhileStmt) {
@@ -1499,11 +1467,11 @@ TEST_P(ASTMatchersTest, CxxExceptionHandling_SimpleCases) {
   EXPECT_TRUE(matches("void foo() try { } catch(int X) { }", cxxCatchStmt()));
   EXPECT_TRUE(matches("void foo() try { } catch(int X) { }", cxxTryStmt()));
   EXPECT_TRUE(
-    notMatches("void foo() try { } catch(int X) { }", cxxThrowExpr()));
-  EXPECT_TRUE(matches("void foo() try { throw; } catch(int X) { }",
-                      cxxThrowExpr()));
-  EXPECT_TRUE(matches("void foo() try { throw 5;} catch(int X) { }",
-                      cxxThrowExpr()));
+      notMatches("void foo() try { } catch(int X) { }", cxxThrowExpr()));
+  EXPECT_TRUE(
+      matches("void foo() try { throw; } catch(int X) { }", cxxThrowExpr()));
+  EXPECT_TRUE(
+      matches("void foo() try { throw 5;} catch(int X) { }", cxxThrowExpr()));
   EXPECT_TRUE(matches("void foo() try { throw; } catch(...) { }",
                       cxxCatchStmt(isCatchAll())));
   EXPECT_TRUE(notMatches("void foo() try { throw; } catch(int) { }",
@@ -1542,9 +1510,8 @@ TEST_P(ASTMatchersTest, QualType) {
 
 TEST_P(ASTMatchersTest, ConstantArrayType) {
   EXPECT_TRUE(matches("int a[2];", constantArrayType()));
-  EXPECT_TRUE(notMatches(
-    "void f() { int a[] = { 2, 3 }; int b[a[0]]; }",
-    constantArrayType(hasElementType(builtinType()))));
+  EXPECT_TRUE(notMatches("void f() { int a[] = { 2, 3 }; int b[a[0]]; }",
+                         constantArrayType(hasElementType(builtinType()))));
 
   EXPECT_TRUE(matches("int a[42];", constantArrayType(hasSize(42))));
   EXPECT_TRUE(matches("int b[2*21];", constantArrayType(hasSize(42))));
@@ -1555,12 +1522,12 @@ TEST_P(ASTMatchersTest, DependentSizedArrayType) {
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(matches(
-    "template  class array { T data[Size]; };",
-    dependentSizedArrayType()));
-  EXPECT_TRUE(notMatches(
-    "int a[42]; int b[] = { 2, 3 }; void f() { int c[b[0]]; }",
-    dependentSizedArrayType()));
+  EXPECT_TRUE(
+      matches("template  class array { T data[Size]; };",
+              dependentSizedArrayType()));
+  EXPECT_TRUE(
+      notMatches("int a[42]; int b[] = { 2, 3 }; void f() { int c[b[0]]; }",
+                 dependentSizedArrayType()));
 }
 
 TEST_P(ASTMatchersTest, IncompleteArrayType) {
@@ -1575,22 +1542,21 @@ TEST_P(ASTMatchersTest, VariableArrayType) {
   EXPECT_TRUE(matches("void f(int b) { int a[b]; }", variableArrayType()));
   EXPECT_TRUE(notMatches("int a[] = {2, 3}; int b[42];", variableArrayType()));
 
-  EXPECT_TRUE(matches(
-    "void f(int b) { int a[b]; }",
-    variableArrayType(hasSizeExpr(ignoringImpCasts(declRefExpr(to(
-      varDecl(hasName("b")))))))));
+  EXPECT_TRUE(matches("void f(int b) { int a[b]; }",
+                      variableArrayType(hasSizeExpr(ignoringImpCasts(
+                          declRefExpr(to(varDecl(hasName("b")))))))));
 }
 
 TEST_P(ASTMatchersTest, AtomicType) {
   if (llvm::Triple(llvm::sys::getDefaultTargetTriple()).getOS() !=
-    llvm::Triple::Win32) {
+      llvm::Triple::Win32) {
     // FIXME: Make this work for MSVC.
     EXPECT_TRUE(matches("_Atomic(int) i;", atomicType()));
 
-    EXPECT_TRUE(matches("_Atomic(int) i;",
-                        atomicType(hasValueType(isInteger()))));
-    EXPECT_TRUE(notMatches("_Atomic(float) f;",
-                           atomicType(hasValueType(isInteger()))));
+    EXPECT_TRUE(
+        matches("_Atomic(int) i;", atomicType(hasValueType(isInteger()))));
+    EXPECT_TRUE(
+        notMatches("_Atomic(float) f;", atomicType(hasValueType(isInteger()))));
   }
 }
 
@@ -1608,9 +1574,9 @@ TEST_P(ASTMatchersTest, AutoType) {
 
   // FIXME: Matching against the type-as-written can't work here, because the
   //        type as written was not deduced.
-  //EXPECT_TRUE(matches("auto a = 1;",
+  // EXPECT_TRUE(matches("auto a = 1;",
   //                    autoType(hasDeducedType(isInteger()))));
-  //EXPECT_TRUE(notMatches("auto b = 2.0;",
+  // EXPECT_TRUE(notMatches("auto b = 2.0;",
   //                       autoType(hasDeducedType(isInteger()))));
 }
 
@@ -1657,48 +1623,43 @@ TEST_P(ASTMatchersTest, FunctionProtoType_CXX) {
 
 TEST_P(ASTMatchersTest, ParenType) {
   EXPECT_TRUE(
-    matches("int (*array)[4];", varDecl(hasType(pointsTo(parenType())))));
+      matches("int (*array)[4];", varDecl(hasType(pointsTo(parenType())))));
   EXPECT_TRUE(notMatches("int *array[4];", varDecl(hasType(parenType()))));
 
   EXPECT_TRUE(matches(
-    "int (*ptr_to_func)(int);",
-    varDecl(hasType(pointsTo(parenType(innerType(functionType())))))));
+      "int (*ptr_to_func)(int);",
+      varDecl(hasType(pointsTo(parenType(innerType(functionType())))))));
   EXPECT_TRUE(notMatches(
-    "int (*ptr_to_array)[4];",
-    varDecl(hasType(pointsTo(parenType(innerType(functionType())))))));
+      "int (*ptr_to_array)[4];",
+      varDecl(hasType(pointsTo(parenType(innerType(functionType())))))));
 }
 
 TEST_P(ASTMatchersTest, PointerType) {
   // FIXME: Reactive when these tests can be more specific (not matching
   // implicit code on certain platforms), likely when we have hasDescendant for
   // Types/TypeLocs.
-  //EXPECT_TRUE(matchAndVerifyResultTrue(
+  // EXPECT_TRUE(matchAndVerifyResultTrue(
   //    "int* a;",
   //    pointerTypeLoc(pointeeLoc(typeLoc().bind("loc"))),
   //    std::make_unique>("loc", 1)));
-  //EXPECT_TRUE(matchAndVerifyResultTrue(
+  // EXPECT_TRUE(matchAndVerifyResultTrue(
   //    "int* a;",
   //    pointerTypeLoc().bind("loc"),
   //    std::make_unique>("loc", 1)));
-  EXPECT_TRUE(matches(
-    "int** a;",
-    loc(pointerType(pointee(qualType())))));
-  EXPECT_TRUE(matches(
-    "int** a;",
-    loc(pointerType(pointee(pointerType())))));
-  EXPECT_TRUE(matches(
-    "int* b; int* * const a = &b;",
-    loc(qualType(isConstQualified(), pointerType()))));
+  EXPECT_TRUE(matches("int** a;", loc(pointerType(pointee(qualType())))));
+  EXPECT_TRUE(matches("int** a;", loc(pointerType(pointee(pointerType())))));
+  EXPECT_TRUE(matches("int* b; int* * const a = &b;",
+                      loc(qualType(isConstQualified(), pointerType()))));
 
   StringRef Fragment = "int *ptr;";
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("ptr"),
-                                           hasType(blockPointerType()))));
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("ptr"),
-                                           hasType(memberPointerType()))));
-  EXPECT_TRUE(matches(Fragment, varDecl(hasName("ptr"),
-                                        hasType(pointerType()))));
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("ptr"),
-                                           hasType(referenceType()))));
+  EXPECT_TRUE(notMatches(Fragment,
+                         varDecl(hasName("ptr"), hasType(blockPointerType()))));
+  EXPECT_TRUE(notMatches(
+      Fragment, varDecl(hasName("ptr"), hasType(memberPointerType()))));
+  EXPECT_TRUE(
+      matches(Fragment, varDecl(hasName("ptr"), hasType(pointerType()))));
+  EXPECT_TRUE(
+      notMatches(Fragment, varDecl(hasName("ptr"), hasType(referenceType()))));
 }
 
 TEST_P(ASTMatchersTest, PointerType_CXX) {
@@ -1763,28 +1724,28 @@ TEST_P(ASTMatchersTest, AutoRefTypes) {
                        "auto &c = a;"
                        "auto &&d = c;"
                        "auto &&e = 2;";
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("a"),
-                                           hasType(referenceType()))));
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("b"),
-                                           hasType(referenceType()))));
-  EXPECT_TRUE(matches(Fragment, varDecl(hasName("c"),
-                                        hasType(referenceType()))));
-  EXPECT_TRUE(matches(Fragment, varDecl(hasName("c"),
-                                        hasType(lValueReferenceType()))));
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("c"),
-                                           hasType(rValueReferenceType()))));
-  EXPECT_TRUE(matches(Fragment, varDecl(hasName("d"),
-                                        hasType(referenceType()))));
-  EXPECT_TRUE(matches(Fragment, varDecl(hasName("d"),
-                                        hasType(lValueReferenceType()))));
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("d"),
-                                           hasType(rValueReferenceType()))));
-  EXPECT_TRUE(matches(Fragment, varDecl(hasName("e"),
-                                        hasType(referenceType()))));
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("e"),
-                                           hasType(lValueReferenceType()))));
-  EXPECT_TRUE(matches(Fragment, varDecl(hasName("e"),
-                                        hasType(rValueReferenceType()))));
+  EXPECT_TRUE(
+      notMatches(Fragment, varDecl(hasName("a"), hasType(referenceType()))));
+  EXPECT_TRUE(
+      notMatches(Fragment, varDecl(hasName("b"), hasType(referenceType()))));
+  EXPECT_TRUE(
+      matches(Fragment, varDecl(hasName("c"), hasType(referenceType()))));
+  EXPECT_TRUE(
+      matches(Fragment, varDecl(hasName("c"), hasType(lValueReferenceType()))));
+  EXPECT_TRUE(notMatches(
+      Fragment, varDecl(hasName("c"), hasType(rValueReferenceType()))));
+  EXPECT_TRUE(
+      matches(Fragment, varDecl(hasName("d"), hasType(referenceType()))));
+  EXPECT_TRUE(
+      matches(Fragment, varDecl(hasName("d"), hasType(lValueReferenceType()))));
+  EXPECT_TRUE(notMatches(
+      Fragment, varDecl(hasName("d"), hasType(rValueReferenceType()))));
+  EXPECT_TRUE(
+      matches(Fragment, varDecl(hasName("e"), hasType(referenceType()))));
+  EXPECT_TRUE(notMatches(
+      Fragment, varDecl(hasName("e"), hasType(lValueReferenceType()))));
+  EXPECT_TRUE(
+      matches(Fragment, varDecl(hasName("e"), hasType(rValueReferenceType()))));
 }
 
 TEST_P(ASTMatchersTest, EnumType) {
@@ -1796,34 +1757,29 @@ TEST_P(ASTMatchersTest, EnumType_CXX) {
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(matches("enum Color { Green }; Color color;",
-                      loc(enumType())));
+  EXPECT_TRUE(matches("enum Color { Green }; Color color;", loc(enumType())));
 }
 
 TEST_P(ASTMatchersTest, EnumType_CXX11) {
   if (!GetParam().isCXX11OrLater()) {
     return;
   }
-  EXPECT_TRUE(matches("enum class Color { Green }; Color color;",
-                      loc(enumType())));
+  EXPECT_TRUE(
+      matches("enum class Color { Green }; Color color;", loc(enumType())));
 }
 
 TEST_P(ASTMatchersTest, PointerType_MatchesPointersToConstTypes) {
-  EXPECT_TRUE(matches("int b; int * const a = &b;",
-                      loc(pointerType())));
-  EXPECT_TRUE(matches("int b; int * const a = &b;",
-                      loc(pointerType())));
-  EXPECT_TRUE(matches(
-    "int b; const int * a = &b;",
-    loc(pointerType(pointee(builtinType())))));
-  EXPECT_TRUE(matches(
-    "int b; const int * a = &b;",
-    pointerType(pointee(builtinType()))));
+  EXPECT_TRUE(matches("int b; int * const a = &b;", loc(pointerType())));
+  EXPECT_TRUE(matches("int b; int * const a = &b;", loc(pointerType())));
+  EXPECT_TRUE(matches("int b; const int * a = &b;",
+                      loc(pointerType(pointee(builtinType())))));
+  EXPECT_TRUE(matches("int b; const int * a = &b;",
+                      pointerType(pointee(builtinType()))));
 }
 
 TEST_P(ASTMatchersTest, TypedefType) {
-  EXPECT_TRUE(matches("typedef int X; X a;", varDecl(hasName("a"),
-                                                     hasType(typedefType()))));
+  EXPECT_TRUE(matches("typedef int X; X a;",
+                      varDecl(hasName("a"), hasType(typedefType()))));
 }
 
 TEST_P(ASTMatchersTest, TemplateSpecializationType) {
@@ -1864,13 +1820,13 @@ TEST_P(ASTMatchersTest, ElaboratedType) {
     // FIXME: Add a test for `elaboratedType()` that does not depend on C++.
     return;
   }
-  EXPECT_TRUE(matches(
-    "namespace N {"
-      "  namespace M {"
-      "    class D {};"
-      "  }"
-      "}"
-      "N::M::D d;", elaboratedType()));
+  EXPECT_TRUE(matches("namespace N {"
+                      "  namespace M {"
+                      "    class D {};"
+                      "  }"
+                      "}"
+                      "N::M::D d;",
+                      elaboratedType()));
   EXPECT_TRUE(matches("class C {} c;", elaboratedType()));
   EXPECT_TRUE(notMatches("class C {}; C c;", elaboratedType()));
 }
@@ -1885,30 +1841,29 @@ TEST_P(ASTMatchersTest, SubstTemplateTypeParmType) {
                    "}"
                    "int i = F();";
   EXPECT_FALSE(matches(code, binaryOperator(hasLHS(
-    expr(hasType(substTemplateTypeParmType()))))));
+                                 expr(hasType(substTemplateTypeParmType()))))));
   EXPECT_TRUE(matches(code, binaryOperator(hasRHS(
-    expr(hasType(substTemplateTypeParmType()))))));
+                                expr(hasType(substTemplateTypeParmType()))))));
 }
 
 TEST_P(ASTMatchersTest, NestedNameSpecifier) {
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(matches("namespace ns { struct A {}; } ns::A a;",
-                      nestedNameSpecifier()));
+  EXPECT_TRUE(
+      matches("namespace ns { struct A {}; } ns::A a;", nestedNameSpecifier()));
   EXPECT_TRUE(matches("template  class A { typename T::B b; };",
                       nestedNameSpecifier()));
-  EXPECT_TRUE(matches("struct A { void f(); }; void A::f() {}",
-                      nestedNameSpecifier()));
+  EXPECT_TRUE(
+      matches("struct A { void f(); }; void A::f() {}", nestedNameSpecifier()));
   EXPECT_TRUE(matches("namespace a { namespace b {} } namespace ab = a::b;",
                       nestedNameSpecifier()));
 
-  EXPECT_TRUE(matches(
-    "struct A { static void f() {} }; void g() { A::f(); }",
-    nestedNameSpecifier()));
-  EXPECT_TRUE(notMatches(
-    "struct A { static void f() {} }; void g(A* a) { a->f(); }",
-    nestedNameSpecifier()));
+  EXPECT_TRUE(matches("struct A { static void f() {} }; void g() { A::f(); }",
+                      nestedNameSpecifier()));
+  EXPECT_TRUE(
+      notMatches("struct A { static void f() {} }; void g(A* a) { a->f(); }",
+                 nestedNameSpecifier()));
 }
 
 TEST_P(ASTMatchersTest, NullStmt) {
@@ -1929,10 +1884,10 @@ TEST_P(ASTMatchersTest, NestedNameSpecifier_MatchesTypes) {
     return;
   }
   NestedNameSpecifierMatcher Matcher = nestedNameSpecifier(
-    specifiesType(hasDeclaration(recordDecl(hasName("A")))));
+      specifiesType(hasDeclaration(recordDecl(hasName("A")))));
   EXPECT_TRUE(matches("struct A { struct B {}; }; A::B b;", Matcher));
-  EXPECT_TRUE(matches("struct A { struct B { struct C {}; }; }; A::B::C c;",
-                      Matcher));
+  EXPECT_TRUE(
+      matches("struct A { struct B { struct C {}; }; }; A::B::C c;", Matcher));
   EXPECT_TRUE(notMatches("namespace A { struct B {}; } A::B b;", Matcher));
 }
 
@@ -1940,8 +1895,8 @@ TEST_P(ASTMatchersTest, NestedNameSpecifier_MatchesNamespaceDecls) {
   if (!GetParam().isCXX()) {
     return;
   }
-  NestedNameSpecifierMatcher Matcher = nestedNameSpecifier(
-    specifiesNamespace(hasName("ns")));
+  NestedNameSpecifierMatcher Matcher =
+      nestedNameSpecifier(specifiesNamespace(hasName("ns")));
   EXPECT_TRUE(matches("namespace ns { struct A {}; } ns::A a;", Matcher));
   EXPECT_TRUE(notMatches("namespace xx { struct A {}; } xx::A a;", Matcher));
   EXPECT_TRUE(notMatches("struct ns { struct A {}; }; ns::A a;", Matcher));
@@ -1953,16 +1908,15 @@ TEST_P(ASTMatchersTest,
     return;
   }
   EXPECT_TRUE(matches(
-    "struct A { struct B { struct C {}; }; }; A::B::C c;",
-    nestedNameSpecifier(hasPrefix(specifiesType(asString("struct A"))))));
-  EXPECT_TRUE(matches(
-    "struct A { struct B { struct C {}; }; }; A::B::C c;",
-    nestedNameSpecifierLoc(hasPrefix(
-      specifiesTypeLoc(loc(qualType(asString("struct A"))))))));
+      "struct A { struct B { struct C {}; }; }; A::B::C c;",
+      nestedNameSpecifier(hasPrefix(specifiesType(asString("struct A"))))));
+  EXPECT_TRUE(matches("struct A { struct B { struct C {}; }; }; A::B::C c;",
+                      nestedNameSpecifierLoc(hasPrefix(specifiesTypeLoc(
+                          loc(qualType(asString("struct A"))))))));
   EXPECT_TRUE(matches(
-    "namespace N { struct A { struct B { struct C {}; }; }; } N::A::B::C c;",
-    nestedNameSpecifierLoc(hasPrefix(
-      specifiesTypeLoc(loc(qualType(asString("struct N::A"))))))));
+      "namespace N { struct A { struct B { struct C {}; }; }; } N::A::B::C c;",
+      nestedNameSpecifierLoc(hasPrefix(
+          specifiesTypeLoc(loc(qualType(asString("struct N::A"))))))));
 }
 
 template 
@@ -1980,18 +1934,18 @@ class VerifyAncestorHasChildIsEqual : public BoundNodesCallback {
     // to equalsNode.
     const T *TypedNode = cast(Node);
     return selectFirst(
-      "", match(stmt(hasParent(
-        stmt(has(stmt(equalsNode(TypedNode)))).bind(""))),
-                *Node, Context)) != nullptr;
+               "", match(stmt(hasParent(
+                             stmt(has(stmt(equalsNode(TypedNode)))).bind(""))),
+                         *Node, Context)) != nullptr;
   }
   bool verify(const BoundNodes &Nodes, ASTContext &Context, const Decl *Node) {
     // Use the original typed pointer to verify we can pass pointers to subtypes
     // to equalsNode.
     const T *TypedNode = cast(Node);
     return selectFirst(
-      "", match(decl(hasParent(
-        decl(has(decl(equalsNode(TypedNode)))).bind(""))),
-                *Node, Context)) != nullptr;
+               "", match(decl(hasParent(
+                             decl(has(decl(equalsNode(TypedNode)))).bind(""))),
+                         *Node, Context)) != nullptr;
   }
   bool verify(const BoundNodes &Nodes, ASTContext &Context, const Type *Node) {
     // Use the original typed pointer to verify we can pass pointers to subtypes
@@ -1999,9 +1953,9 @@ class VerifyAncestorHasChildIsEqual : public BoundNodesCallback {
     const T *TypedNode = cast(Node);
     const auto *Dec = Nodes.getNodeAs("decl");
     return selectFirst(
-      "", match(fieldDecl(hasParent(decl(has(fieldDecl(
-        hasType(type(equalsNode(TypedNode)).bind(""))))))),
-                *Dec, Context)) != nullptr;
+               "", match(fieldDecl(hasParent(decl(has(fieldDecl(
+                             hasType(type(equalsNode(TypedNode)).bind(""))))))),
+                         *Dec, Context)) != nullptr;
   }
 };
 
@@ -2100,43 +2054,31 @@ TEST(ASTMatchersTestObjC, ObjCMessageExpr) {
                           "  Str *up = [text uppercaseString];"
                           "} "
                           "@end ";
-  EXPECT_TRUE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(anything())));
+  EXPECT_TRUE(matchesObjC(Objc1String, objcMessageExpr(anything())));
   EXPECT_TRUE(matchesObjC(Objc1String,
-                          objcMessageExpr(hasAnySelector({
-                                          "contents", "meth:"}))
+                          objcMessageExpr(hasAnySelector({"contents", "meth:"}))
 
-                         ));
-  EXPECT_TRUE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(hasSelector("contents"))));
-  EXPECT_TRUE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(hasAnySelector("contents", "contentsA"))));
-  EXPECT_FALSE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(hasAnySelector("contentsB", "contentsC"))));
+                              ));
+  EXPECT_TRUE(
+      matchesObjC(Objc1String, objcMessageExpr(hasSelector("contents"))));
   EXPECT_TRUE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(matchesSelector("cont*"))));
+      Objc1String, objcMessageExpr(hasAnySelector("contents", "contentsA"))));
   EXPECT_FALSE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(matchesSelector("?cont*"))));
-  EXPECT_TRUE(notMatchesObjC(
-    Objc1String,
-    objcMessageExpr(hasSelector("contents"), hasNullSelector())));
-  EXPECT_TRUE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(hasSelector("contents"), hasUnarySelector())));
-  EXPECT_TRUE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(hasSelector("contents"), numSelectorArgs(0))));
-  EXPECT_TRUE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(matchesSelector("uppercase*"),
-                    argumentCountIs(0)
-    )));
+      Objc1String, objcMessageExpr(hasAnySelector("contentsB", "contentsC"))));
+  EXPECT_TRUE(
+      matchesObjC(Objc1String, objcMessageExpr(matchesSelector("cont*"))));
+  EXPECT_FALSE(
+      matchesObjC(Objc1String, objcMessageExpr(matchesSelector("?cont*"))));
+  EXPECT_TRUE(
+      notMatchesObjC(Objc1String, objcMessageExpr(hasSelector("contents"),
+                                                  hasNullSelector())));
+  EXPECT_TRUE(matchesObjC(Objc1String, objcMessageExpr(hasSelector("contents"),
+                                                       hasUnarySelector())));
+  EXPECT_TRUE(matchesObjC(Objc1String, objcMessageExpr(hasSelector("contents"),
+                                                       numSelectorArgs(0))));
+  EXPECT_TRUE(
+      matchesObjC(Objc1String, objcMessageExpr(matchesSelector("uppercase*"),
+                                               argumentCountIs(0))));
 }
 
 TEST(ASTMatchersTestObjC, ObjCDecls) {
@@ -2157,33 +2099,17 @@ TEST(ASTMatchersTestObjC, ObjCDecls) {
                          "- (void)abc_doThing {} "
                          "@end ";
 
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcProtocolDecl(hasName("Proto"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcImplementationDecl(hasName("Thing"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcCategoryDecl(hasName("ABC"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcCategoryImplDecl(hasName("ABC"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcMethodDecl(hasName("protoDidThing"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcMethodDecl(hasName("abc_doThing"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcMethodDecl(hasName("anything"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcIvarDecl(hasName("_ivar"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcPropertyDecl(hasName("enabled"))));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcProtocolDecl(hasName("Proto"))));
+  EXPECT_TRUE(
+      matchesObjC(ObjCString, objcImplementationDecl(hasName("Thing"))));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcCategoryDecl(hasName("ABC"))));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcCategoryImplDecl(hasName("ABC"))));
+  EXPECT_TRUE(
+      matchesObjC(ObjCString, objcMethodDecl(hasName("protoDidThing"))));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcMethodDecl(hasName("abc_doThing"))));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcMethodDecl(hasName("anything"))));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcIvarDecl(hasName("_ivar"))));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcPropertyDecl(hasName("enabled"))));
 }
 
 TEST(ASTMatchersTestObjC, ObjCExceptionStmts) {
@@ -2194,18 +2120,10 @@ TEST(ASTMatchersTestObjC, ObjCExceptionStmts) {
                          "  } @finally {}"
                          "}";
 
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcTryStmt()));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcThrowStmt()));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcCatchStmt()));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcFinallyStmt()));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcTryStmt()));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcThrowStmt()));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcCatchStmt()));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcFinallyStmt()));
 }
 
 TEST(ASTMatchersTestObjC, ObjCAutoreleasePoolStmt) {
@@ -2274,11 +2192,18 @@ void x() {
   EXPECT_TRUE(matchesWithOpenMP(Source3, Matcher));
 
   StringRef Source4 = R"(
+void x() {
+#pragma omp parallel default(firstprivate)
+;
+})";
+  EXPECT_TRUE(matchesWithOpenMP51(Source4, Matcher));
+
+  StringRef Source5 = R"(
 void x(int x) {
 #pragma omp parallel num_threads(x)
 ;
 })";
-  EXPECT_TRUE(notMatchesWithOpenMP(Source4, Matcher));
+  EXPECT_TRUE(notMatchesWithOpenMP(Source5, Matcher));
 }
 
 TEST(ASTMatchersTest, Finder_DynamicOnlyAcceptsSomeMatchers) {
diff --git a/clang/unittests/ASTMatchers/ASTMatchersTest.h b/clang/unittests/ASTMatchers/ASTMatchersTest.h
index 8669ebd552c83..af248906bf658 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersTest.h
+++ b/clang/unittests/ASTMatchers/ASTMatchersTest.h
@@ -20,10 +20,10 @@ namespace clang {
 namespace ast_matchers {
 
 using clang::tooling::buildASTFromCodeWithArgs;
+using clang::tooling::FileContentMappings;
+using clang::tooling::FrontendActionFactory;
 using clang::tooling::newFrontendActionFactory;
 using clang::tooling::runToolOnCodeWithArgs;
-using clang::tooling::FrontendActionFactory;
-using clang::tooling::FileContentMappings;
 
 class BoundNodesCallback {
 public:
@@ -38,7 +38,8 @@ class BoundNodesCallback {
 // If 'FindResultVerifier' is NULL, sets *Verified to true when Run is called.
 class VerifyMatch : public MatchFinder::MatchCallback {
 public:
-  VerifyMatch(std::unique_ptr FindResultVerifier, bool *Verified)
+  VerifyMatch(std::unique_ptr FindResultVerifier,
+              bool *Verified)
       : Verified(Verified), FindResultReviewer(std::move(FindResultVerifier)) {}
 
   void run(const MatchFinder::MatchResult &Result) override {
@@ -124,17 +125,16 @@ testing::AssertionResult matchesConditionally(
     return testing::AssertionFailure() << "Parsing error in \"" << Code << "\"";
   }
   if (Found != DynamicFound) {
-    return testing::AssertionFailure() << "Dynamic match result ("
-                                       << DynamicFound
-                                       << ") does not match static result ("
-                                       << Found << ")";
+    return testing::AssertionFailure()
+           << "Dynamic match result (" << DynamicFound
+           << ") does not match static result (" << Found << ")";
   }
   if (!Found && ExpectMatch) {
     return testing::AssertionFailure()
-      << "Could not find match in \"" << Code << "\"";
+           << "Could not find match in \"" << Code << "\"";
   } else if (Found && !ExpectMatch) {
     return testing::AssertionFailure()
-      << "Found unexpected match in \"" << Code << "\"";
+           << "Found unexpected match in \"" << Code << "\"";
   }
   return testing::AssertionSuccess();
 }
@@ -183,11 +183,6 @@ testing::AssertionResult matchesC(const Twine &Code, const T &AMatcher) {
                               "input.c");
 }
 
-template 
-testing::AssertionResult notMatchesC(const Twine &Code, const T &AMatcher) {
-  return matchesConditionally(Code, AMatcher, false, {Lang_C89});
-}
-
 template 
 testing::AssertionResult notMatchesObjC(const Twine &Code, const T &AMatcher) {
   return matchesObjC(Code, AMatcher, false);
@@ -216,7 +211,8 @@ matchesConditionallyWithCuda(const Twine &Code, const T &AMatcher,
       "                      size_t sharedSize = 0,"
       "                      cudaStream_t stream = 0);"
       "extern \"C\" unsigned __cudaPushCallConfiguration("
-      "    dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, void *stream = 0);";
+      "    dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, void *stream = "
+      "0);";
 
   bool Found = false, DynamicFound = false;
   MatchFinder Finder;
@@ -233,22 +229,20 @@ matchesConditionallyWithCuda(const Twine &Code, const T &AMatcher,
   std::vector Args = {
       "-xcuda",  "-fno-ms-extensions",     "--cuda-host-only",     "-nocudainc",
       "-target", "x86_64-unknown-unknown", std::string(CompileArg)};
-  if (!runToolOnCodeWithArgs(Factory->create(),
-                             CudaHeader + Code, Args)) {
+  if (!runToolOnCodeWithArgs(Factory->create(), CudaHeader + Code, Args)) {
     return testing::AssertionFailure() << "Parsing error in \"" << Code << "\"";
   }
   if (Found != DynamicFound) {
-    return testing::AssertionFailure() << "Dynamic match result ("
-                                       << DynamicFound
-                                       << ") does not match static result ("
-                                       << Found << ")";
+    return testing::AssertionFailure()
+           << "Dynamic match result (" << DynamicFound
+           << ") does not match static result (" << Found << ")";
   }
   if (!Found && ExpectMatch) {
     return testing::AssertionFailure()
-      << "Could not find match in \"" << Code << "\"";
+           << "Could not find match in \"" << Code << "\"";
   } else if (Found && !ExpectMatch) {
     return testing::AssertionFailure()
-      << "Found unexpected match in \"" << Code << "\"";
+           << "Found unexpected match in \"" << Code << "\"";
   }
   return testing::AssertionSuccess();
 }
@@ -276,13 +270,28 @@ testing::AssertionResult notMatchesWithOpenMP(const Twine &Code,
   return matchesConditionally(Code, AMatcher, false, {"-fopenmp=libomp"});
 }
 
+template 
+testing::AssertionResult matchesWithOpenMP51(const Twine &Code,
+                                             const T &AMatcher) {
+  return matchesConditionally(Code, AMatcher, true,
+                              {"-fopenmp=libomp", "-fopenmp-version=51"});
+}
+
+template 
+testing::AssertionResult notMatchesWithOpenMP51(const Twine &Code,
+                                                const T &AMatcher) {
+  return matchesConditionally(Code, AMatcher, false,
+                              {"-fopenmp=libomp", "-fopenmp-version=51"});
+}
+
 template 
 testing::AssertionResult matchAndVerifyResultConditionally(
     const Twine &Code, const T &AMatcher,
     std::unique_ptr FindResultVerifier, bool ExpectResult) {
   bool VerifiedResult = false;
   MatchFinder Finder;
-  VerifyMatch VerifyVerifiedResult(std::move(FindResultVerifier), &VerifiedResult);
+  VerifyMatch VerifyVerifiedResult(std::move(FindResultVerifier),
+                                   &VerifiedResult);
   Finder.addMatcher(AMatcher, &VerifyVerifiedResult);
   std::unique_ptr Factory(
       newFrontendActionFactory(&Finder));
@@ -296,10 +305,10 @@ testing::AssertionResult matchAndVerifyResultConditionally(
   }
   if (!VerifiedResult && ExpectResult) {
     return testing::AssertionFailure()
-      << "Could not verify result in \"" << Code << "\"";
+           << "Could not verify result in \"" << Code << "\"";
   } else if (VerifiedResult && !ExpectResult) {
     return testing::AssertionFailure()
-      << "Verified unexpected result in \"" << Code << "\"";
+           << "Verified unexpected result in \"" << Code << "\"";
   }
 
   VerifiedResult = false;
@@ -307,15 +316,15 @@ testing::AssertionResult matchAndVerifyResultConditionally(
   std::unique_ptr AST(
       buildASTFromCodeWithArgs(Code.toStringRef(Buffer), Args));
   if (!AST.get())
-    return testing::AssertionFailure() << "Parsing error in \"" << Code
-                                       << "\" while building AST";
+    return testing::AssertionFailure()
+           << "Parsing error in \"" << Code << "\" while building AST";
   Finder.matchAST(AST->getASTContext());
   if (!VerifiedResult && ExpectResult) {
     return testing::AssertionFailure()
-      << "Could not verify result in \"" << Code << "\" with AST";
+           << "Could not verify result in \"" << Code << "\" with AST";
   } else if (VerifiedResult && !ExpectResult) {
     return testing::AssertionFailure()
-      << "Verified unexpected result in \"" << Code << "\" with AST";
+           << "Verified unexpected result in \"" << Code << "\" with AST";
   }
 
   return testing::AssertionSuccess();
@@ -327,8 +336,8 @@ template 
 testing::AssertionResult matchAndVerifyResultTrue(
     const Twine &Code, const T &AMatcher,
     std::unique_ptr FindResultVerifier) {
-  return matchAndVerifyResultConditionally(
-      Code, AMatcher, std::move(FindResultVerifier), true);
+  return matchAndVerifyResultConditionally(Code, AMatcher,
+                                           std::move(FindResultVerifier), true);
 }
 
 template 
@@ -342,8 +351,7 @@ testing::AssertionResult matchAndVerifyResultFalse(
 // Implements a run method that returns whether BoundNodes contains a
 // Decl bound to Id that can be dynamically cast to T.
 // Optionally checks that the check succeeded a specific number of times.
-template 
-class VerifyIdIsBoundTo : public BoundNodesCallback {
+template  class VerifyIdIsBoundTo : public BoundNodesCallback {
 public:
   // Create an object that checks that a node of type \c T was bound to \c Id.
   // Does not check for a certain number of matches.
@@ -386,7 +394,7 @@ class VerifyIdIsBoundTo : public BoundNodesCallback {
       if (const NamedDecl *Named = Nodes->getNodeAs(Id)) {
         Name = Named->getNameAsString();
       } else if (const NestedNameSpecifier *NNS =
-        Nodes->getNodeAs(Id)) {
+                     Nodes->getNodeAs(Id)) {
         llvm::raw_string_ostream OS(Name);
         NNS->print(OS, PrintingPolicy(LangOptions()));
       }
@@ -398,7 +406,7 @@ class VerifyIdIsBoundTo : public BoundNodesCallback {
       return true;
     }
     EXPECT_TRUE(M.count(Id) == 0 ||
-      M.find(Id)->second.template get() == nullptr);
+                M.find(Id)->second.template get() == nullptr);
     return false;
   }
 
@@ -437,4 +445,4 @@ class ASTMatchersTest : public ::testing::Test,
 } // namespace ast_matchers
 } // namespace clang
 
-#endif  // LLVM_CLANG_UNITTESTS_AST_MATCHERS_AST_MATCHERS_TEST_H
+#endif // LLVM_CLANG_UNITTESTS_AST_MATCHERS_AST_MATCHERS_TEST_H
diff --git a/clang/unittests/CrossTU/CrossTranslationUnitTest.cpp b/clang/unittests/CrossTU/CrossTranslationUnitTest.cpp
index 5495f27f5b32a..4e6fbeee86a3f 100644
--- a/clang/unittests/CrossTU/CrossTranslationUnitTest.cpp
+++ b/clang/unittests/CrossTU/CrossTranslationUnitTest.cpp
@@ -29,7 +29,7 @@ class CTUASTConsumer : public clang::ASTConsumer {
   explicit CTUASTConsumer(clang::CompilerInstance &CI, bool *Success)
       : CTU(CI), Success(Success) {}
 
-  void HandleTranslationUnit(ASTContext &Ctx) {
+  void HandleTranslationUnit(ASTContext &Ctx) override {
     auto FindFInTU = [](const TranslationUnitDecl *TU) {
       const FunctionDecl *FD = nullptr;
       for (const Decl *D : TU->decls()) {
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index ff9a64e81d5b5..6ac3ffbffd1c8 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -5325,7 +5325,7 @@ TEST_F(FormatTest, DeductionGuides) {
   verifyFormat("template  S(Ts...) -> S;");
   verifyFormat(
       "template \n"
-      "array(T &&... t) -> array, sizeof...(T)>;");
+      "array(T &&...t) -> array, sizeof...(T)>;");
   verifyFormat("template  A() -> Afoo<3>())>;");
   verifyFormat("template  A() -> A>)>;");
   verifyFormat("template  A() -> Afoo<1>)>;");
@@ -8179,13 +8179,20 @@ TEST_F(FormatTest, AttributePenaltyBreaking) {
 }
 
 TEST_F(FormatTest, UnderstandsEllipsis) {
+  FormatStyle Style = getLLVMStyle();
   verifyFormat("int printf(const char *fmt, ...);");
   verifyFormat("template  void Foo(Ts... ts) { Foo(ts...); }");
-  verifyFormat("template  void Foo(Ts *... ts) {}");
+  verifyFormat("template  void Foo(Ts *...ts) {}");
+
+  verifyFormat("template  a;", Style);
+
+  Style.PointerAlignment = FormatStyle::PAS_Left;
+  verifyFormat("template  void Foo(Ts*... ts) {}", Style);
+
+  verifyFormat("template  a;", Style);
 
-  FormatStyle PointersLeft = getLLVMStyle();
-  PointersLeft.PointerAlignment = FormatStyle::PAS_Left;
-  verifyFormat("template  void Foo(Ts*... ts) {}", PointersLeft);
+  Style.PointerAlignment = FormatStyle::PAS_Middle;
+  verifyFormat("template  a;", Style);
 }
 
 TEST_F(FormatTest, AdaptivelyFormatsPointersAndReferences) {
diff --git a/clang/unittests/Tooling/CompilationDatabaseTest.cpp b/clang/unittests/Tooling/CompilationDatabaseTest.cpp
index cc948b800f4e7..3bfb0ec1f7d56 100644
--- a/clang/unittests/Tooling/CompilationDatabaseTest.cpp
+++ b/clang/unittests/Tooling/CompilationDatabaseTest.cpp
@@ -281,6 +281,15 @@ TEST_F(FileMatchTrieTest, CannotResolveRelativePath) {
   EXPECT_EQ("Cannot resolve relative paths", Error);
 }
 
+TEST_F(FileMatchTrieTest, SingleFile) {
+  Trie.insert("/root/RootFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+  // Add subpath to avoid `if (Children.empty())` special case
+  // which we hit at previous `find()`.
+  Trie.insert("/root/otherpath/OtherFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+}
+
 TEST(findCompileArgsInJsonDatabase, FindsNothingIfEmpty) {
   std::string ErrorMessage;
   CompileCommand NotFound = findCompileArgsInJsonDatabase(
diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp
index 2768441829cec..144015deedbc5 100644
--- a/clang/utils/TableGen/ClangAttrEmitter.cpp
+++ b/clang/utils/TableGen/ClangAttrEmitter.cpp
@@ -2015,10 +2015,10 @@ PragmaClangAttributeSupport::generateStrictConformsTo(const Record &Attr,
     return;
   // Generate a function that constructs a set of matching rules that describe
   // to which declarations the attribute should apply to.
-  OS << "virtual void getPragmaAttributeMatchRules("
+  OS << "void getPragmaAttributeMatchRules("
      << "llvm::SmallVectorImpl> &MatchRules, const LangOptions &LangOpts) const {\n";
+     << ", bool>> &MatchRules, const LangOptions &LangOpts) const override {\n";
   const Record *SubjectObj = Attr.getValueAsDef("Subjects");
   std::vector Subjects = SubjectObj->getValueAsListOfDefs("Subjects");
   for (const auto *Subject : Subjects) {
@@ -3529,8 +3529,8 @@ static void GenerateAppertainsTo(const Record &Attr, raw_ostream &OS) {
   // at all (for instance because it was applied to a type), or that the caller
   // has determined that the check should fail (perhaps prior to the creation
   // of the declaration).
-  OS << "virtual bool diagAppertainsToDecl(Sema &S, ";
-  OS << "const ParsedAttr &Attr, const Decl *D) const {\n";
+  OS << "bool diagAppertainsToDecl(Sema &S, ";
+  OS << "const ParsedAttr &Attr, const Decl *D) const override {\n";
   OS << "  if (";
   for (auto I = Subjects.begin(), E = Subjects.end(); I != E; ++I) {
     // If the subject has custom code associated with it, use the generated
@@ -3604,8 +3604,8 @@ static void GenerateLangOptRequirements(const Record &R,
   if (LangOpts.empty())
     return;
 
-  OS << "virtual bool diagLangOpts(Sema &S, const ParsedAttr &Attr) ";
-  OS << "const {\n";
+  OS << "bool diagLangOpts(Sema &S, const ParsedAttr &Attr) ";
+  OS << "const override {\n";
   OS << "  auto &LangOpts = S.LangOpts;\n";
   OS << "  if (" << GenerateTestExpression(LangOpts) << ")\n";
   OS << "    return true;\n\n";
@@ -3649,7 +3649,7 @@ static void GenerateTargetRequirements(const Record &Attr,
   std::string Test;
   bool UsesT = GenerateTargetSpecificAttrChecks(R, Arches, Test, &FnName);
 
-  OS << "virtual bool existsInTarget(const TargetInfo &Target) const {\n";
+  OS << "bool existsInTarget(const TargetInfo &Target) const override {\n";
   if (UsesT)
     OS << "  const llvm::Triple &T = Target.getTriple(); (void)T;\n";
   OS << "  return " << Test << ";\n";
@@ -3674,8 +3674,8 @@ static void GenerateSpellingIndexToSemanticSpelling(const Record &Attr,
   std::string Enum = CreateSemanticSpellings(Spellings, SemanticToSyntacticMap);
   std::string Name = Attr.getName().str() + "AttrSpellingMap";
 
-  OS << "virtual unsigned spellingIndexToSemanticSpelling(";
-  OS << "const ParsedAttr &Attr) const {\n";
+  OS << "unsigned spellingIndexToSemanticSpelling(";
+  OS << "const ParsedAttr &Attr) const override {\n";
   OS << Enum;
   OS << "  unsigned Idx = Attr.getAttributeSpellingListIndex();\n";
   WriteSemanticSpellingSwitch("Idx", SemanticToSyntacticMap, OS);
@@ -3688,8 +3688,8 @@ static void GenerateHandleDeclAttribute(const Record &Attr, raw_ostream &OS) {
     return;
 
   // Generate a function which just converts from ParsedAttr to the Attr type.
-  OS << "virtual AttrHandling handleDeclAttribute(Sema &S, Decl *D,";
-  OS << "const ParsedAttr &Attr) const {\n";
+  OS << "AttrHandling handleDeclAttribute(Sema &S, Decl *D,";
+  OS << "const ParsedAttr &Attr) const override {\n";
   OS << "  D->addAttr(::new (S.Context) " << Attr.getName();
   OS << "Attr(S.Context, Attr));\n";
   OS << "  return AttributeApplied;\n";
diff --git a/clang/utils/analyzer/Dockerfile b/clang/utils/analyzer/Dockerfile
index 21906011c7dc2..f74ff8aa95c25 100644
--- a/clang/utils/analyzer/Dockerfile
+++ b/clang/utils/analyzer/Dockerfile
@@ -42,6 +42,16 @@ RUN apt-get install -y \
     libjsonrpccpp-dev=0.7.0-1build2 \
     uuid-dev=2.31.1-0.4ubuntu3.6
 
+# tmux dependencies
+RUN apt-get install -y \
+    autotools-dev=20180224.1 \
+    automake=1:1.15.1-3ubuntu2 \
+    libncurses5-dev=6.1-1ubuntu1.18.04 \
+    libevent-dev=2.1.8-stable-4build1 \
+    pkg-config=0.29.1-0ubuntu2 \
+    flex=2.6.4-6 \
+    bison=2:3.0.4.dfsg-1build1
+
 RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
 
 VOLUME /analyzer
diff --git a/clang/utils/analyzer/SATest.py b/clang/utils/analyzer/SATest.py
index 16f1dce0c584e..46e636ad2895e 100755
--- a/clang/utils/analyzer/SATest.py
+++ b/clang/utils/analyzer/SATest.py
@@ -34,29 +34,10 @@ def add(parser, args):
 
 def build(parser, args):
     import SATestBuild
-    from ProjectMap import ProjectMap
 
     SATestBuild.VERBOSE = args.verbose
 
-    project_map = ProjectMap()
-    projects = project_map.projects
-
-    if args.projects:
-        projects_arg = args.projects.split(",")
-        available_projects = [project.name
-                              for project in projects]
-
-        # validate that given projects are present in the project map file
-        for manual_project in projects_arg:
-            if manual_project not in available_projects:
-                parser.error("Project '{project}' is not found in "
-                             "the project map file. Available projects are "
-                             "{all}.".format(project=manual_project,
-                                             all=available_projects))
-
-        projects = [project.with_fields(enabled=project.name in projects_arg)
-                    for project in projects]
-
+    projects = get_projects(parser, args.projects)
     tester = SATestBuild.RegressionTester(args.jobs,
                                           projects,
                                           args.override_compiler,
@@ -100,6 +81,44 @@ def update(parser, args):
         SATestUpdateDiffs.update_reference_results(project)
 
 
+def benchmark(parser, args):
+    from SATestBenchmark import Benchmark
+
+    projects = get_projects(parser, args.projects)
+    benchmark = Benchmark(projects, args.iterations, args.output)
+    benchmark.run()
+
+
+def benchmark_compare(parser, args):
+    import SATestBenchmark
+    SATestBenchmark.compare(args.old, args.new, args.output)
+
+
+def get_projects(parser, projects_str):
+    from ProjectMap import ProjectMap
+
+    project_map = ProjectMap()
+    projects = project_map.projects
+
+    if projects_str:
+        projects_arg = projects_str.split(",")
+        available_projects = [project.name
+                              for project in projects]
+
+        # validate that given projects are present in the project map file
+        for manual_project in projects_arg:
+            if manual_project not in available_projects:
+                parser.error("Project '{project}' is not found in "
+                             "the project map file. Available projects are "
+                             "{all}.".format(project=manual_project,
+                                             all=available_projects))
+
+        projects = [project.with_fields(enabled=project.name in projects_arg)
+                    for project in projects]
+
+    return projects
+
+
 def docker(parser, args):
     if len(args.rest) > 0:
         if args.rest[0] != "--":
@@ -284,6 +303,36 @@ def main():
                              "to the docker's entrypoint.")
     dock_parser.set_defaults(func=docker)
 
+    # benchmark subcommand
+    bench_parser = subparsers.add_parser(
+        "benchmark",
+        help="Run benchmarks by building a set of projects multiple times.")
+
+    bench_parser.add_argument("-i", "--iterations", action="store",
+                              type=int, default=20,
+                              help="Number of iterations for building each "
+                              "project.")
+    bench_parser.add_argument("-o", "--output", action="store",
+                              default="benchmark.csv",
+                              help="Output csv file for the benchmark results")
+    bench_parser.add_argument("--projects", action="store", default="",
+                              help="Comma-separated list of projects to test")
+    bench_parser.set_defaults(func=benchmark)
+
+    bench_subparsers = bench_parser.add_subparsers()
+    bench_compare_parser = bench_subparsers.add_parser(
+        "compare",
+        help="Compare benchmark runs.")
+    bench_compare_parser.add_argument("--old", action="store", required=True,
+                                      help="Benchmark reference results to "
+                                      "compare agains.")
+    bench_compare_parser.add_argument("--new", action="store", required=True,
+                                      help="New benchmark results to check.")
+    bench_compare_parser.add_argument("-o", "--output",
+                                      action="store", required=True,
+                                      help="Output file for plots.")
+    bench_compare_parser.set_defaults(func=benchmark_compare)
+
     args = parser.parse_args()
     args.func(parser, args)
 
diff --git a/clang/utils/analyzer/SATestBenchmark.py b/clang/utils/analyzer/SATestBenchmark.py
new file mode 100644
index 0000000000000..0fa2204bbbe7e
--- /dev/null
+++ b/clang/utils/analyzer/SATestBenchmark.py
@@ -0,0 +1,158 @@
+"""
+Static Analyzer qualification infrastructure.
+
+This source file contains all the functionality related to benchmarking
+the analyzer on a set projects.  Right now, this includes measuring
+execution time and peak memory usage.  Benchmark runs analysis on every
+project multiple times to get a better picture about the distribution
+of measured values.
+
+Additionally, this file includes a comparison routine for two benchmarking
+results that plots the result together on one chart.
+"""
+
+import SATestUtils as utils
+from SATestBuild import ProjectTester, stdout, TestInfo
+from ProjectMap import ProjectInfo
+
+import pandas as pd
+from typing import List, Tuple
+
+
+INDEX_COLUMN = "index"
+
+
+def _save(data: pd.DataFrame, file_path: str):
+    data.to_csv(file_path, index_label=INDEX_COLUMN)
+
+
+def _load(file_path: str) -> pd.DataFrame:
+    return pd.read_csv(file_path, index_col=INDEX_COLUMN)
+
+
+class Benchmark:
+    """
+    Becnhmark class encapsulates one functionality: it runs the analysis
+    multiple times for the given set of projects and stores results in the
+    specified file.
+    """
+    def __init__(self, projects: List[ProjectInfo], iterations: int,
+                 output_path: str):
+        self.projects = projects
+        self.iterations = iterations
+        self.out = output_path
+
+    def run(self):
+        results = [self._benchmark_project(project)
+                   for project in self.projects]
+
+        data = pd.concat(results, ignore_index=True)
+        _save(data, self.out)
+
+    def _benchmark_project(self, project: ProjectInfo) -> pd.DataFrame:
+        if not project.enabled:
+            stdout(f" \n\n--- Skipping disabled project {project.name}\n")
+            return
+
+        stdout(f" \n\n--- Benchmarking project {project.name}\n")
+
+        test_info = TestInfo(project)
+        tester = ProjectTester(test_info, silent=True)
+        project_dir = tester.get_project_dir()
+        output_dir = tester.get_output_dir()
+
+        raw_data = []
+
+        for i in range(self.iterations):
+            stdout(f"Iteration #{i + 1}")
+            time, mem = tester.build(project_dir, output_dir)
+            raw_data.append({"time": time, "memory": mem,
+                             "iteration": i, "project": project.name})
+            stdout(f"time: {utils.time_to_str(time)}, "
+                   f"peak memory: {utils.memory_to_str(mem)}")
+
+        return pd.DataFrame(raw_data)
+
+
+def compare(old_path: str, new_path: str, plot_file: str):
+    """
+    Compare two benchmarking results stored as .csv files
+    and produce a plot in the specified file.
+    """
+    old = _load(old_path)
+    new = _load(new_path)
+
+    old_projects = set(old["project"])
+    new_projects = set(new["project"])
+    common_projects = old_projects & new_projects
+
+    # Leave only rows for projects common to both dataframes.
+    old = old[old["project"].isin(common_projects)]
+    new = new[new["project"].isin(common_projects)]
+
+    old, new = _normalize(old, new)
+
+    # Seaborn prefers all the data to be in one dataframe.
+    old["kind"] = "old"
+    new["kind"] = "new"
+    data = pd.concat([old, new], ignore_index=True)
+
+    # TODO: compare data in old and new dataframes using statistical tests
+    #       to check if they belong to the same distribution
+    _plot(data, plot_file)
+
+
+def _normalize(old: pd.DataFrame,
+               new: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    # This creates a dataframe with all numerical data averaged.
+    means = old.groupby("project").mean()
+    return _normalize_impl(old, means), _normalize_impl(new, means)
+
+
+def _normalize_impl(data: pd.DataFrame, means: pd.DataFrame):
+    # Right now 'means' has one row corresponding to one project,
+    # while 'data' has N rows for each project (one for each iteration).
+    #
+    # In order for us to work easier with this data, we duplicate
+    # 'means' data to match the size of the 'data' dataframe.
+    #
+    # All the columns from 'data' will maintain their names, while
+    # new columns coming from 'means' will have "_mean" suffix.
+    joined_data = data.merge(means, on="project", suffixes=("", "_mean"))
+    _normalize_key(joined_data, "time")
+    _normalize_key(joined_data, "memory")
+    return joined_data
+
+
+def _normalize_key(data: pd.DataFrame, key: str):
+    norm_key = _normalized_name(key)
+    mean_key = f"{key}_mean"
+    data[norm_key] = data[key] / data[mean_key]
+
+
+def _normalized_name(name: str) -> str:
+    return f"normalized {name}"
+
+
+def _plot(data: pd.DataFrame, plot_file: str):
+    import matplotlib
+    import seaborn as sns
+    from matplotlib import pyplot as plt
+
+    sns.set_style("whitegrid")
+    # We want to have time and memory charts one above the other.
+    figure, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6))
+
+    def _subplot(key: str, ax: matplotlib.axes.Axes):
+        sns.boxplot(x="project", y=_normalized_name(key), hue="kind",
+                    data=data, palette=sns.color_palette("BrBG", 2), ax=ax)
+
+    _subplot("time", ax1)
+    # No need to have xlabels on both top and bottom charts.
+    ax1.set_xlabel("")
+
+    _subplot("memory", ax2)
+    # The legend on the top chart is enough.
+    ax2.get_legend().remove()
+
+    figure.savefig(plot_file)
diff --git a/clang/utils/analyzer/SATestBuild.py b/clang/utils/analyzer/SATestBuild.py
index eefab869f6ef9..ed5c7379bb5b4 100644
--- a/clang/utils/analyzer/SATestBuild.py
+++ b/clang/utils/analyzer/SATestBuild.py
@@ -87,10 +87,18 @@ def fileno(self) -> int:
         return 0
 
 
-Logger = logging.getLogger("main")
 LOCAL = threading.local()
-LOCAL.stdout = StreamToLogger(Logger, logging.INFO)
-LOCAL.stderr = StreamToLogger(Logger, logging.ERROR)
+
+
+def init_logger(name: str):
+    # TODO: use debug levels for VERBOSE messages
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.DEBUG)
+    LOCAL.stdout = StreamToLogger(logger, logging.INFO)
+    LOCAL.stderr = StreamToLogger(logger, logging.ERROR)
+
+
+init_logger("main")
 
 
 def stderr(message: str):
@@ -102,7 +110,6 @@ def stdout(message: str):
 
 
 logging.basicConfig(
-    level=logging.DEBUG,
     format='%(asctime)s:%(levelname)s:%(name)s: %(message)s')
 
 
@@ -298,12 +305,13 @@ class ProjectTester:
     """
     A component aggregating testing for one project.
     """
-    def __init__(self, test_info: TestInfo):
+    def __init__(self, test_info: TestInfo, silent: bool = False):
         self.project = test_info.project
         self.override_compiler = test_info.override_compiler
         self.extra_analyzer_config = test_info.extra_analyzer_config
         self.is_reference_build = test_info.is_reference_build
         self.strictness = test_info.strictness
+        self.silent = silent
 
     def test(self) -> bool:
         """
@@ -312,20 +320,19 @@ def test(self) -> bool:
         to the :param strictness: criteria.
         """
         if not self.project.enabled:
-            stdout(f" \n\n--- Skipping disabled project {self.project.name}\n")
+            self.out(
+                f" \n\n--- Skipping disabled project {self.project.name}\n")
             return True
 
-        stdout(f" \n\n--- Building project {self.project.name}\n")
+        self.out(f" \n\n--- Building project {self.project.name}\n")
 
         start_time = time.time()
 
         project_dir = self.get_project_dir()
-        if VERBOSE >= 1:
-            stdout(f"  Build directory: {project_dir}.\n")
+        self.vout(f"  Build directory: {project_dir}.\n")
 
         # Set the build results directory.
         output_dir = self.get_output_dir()
-        output_dir = os.path.join(project_dir, output_dir)
 
         self.build(project_dir, output_dir)
         check_build(output_dir)
@@ -336,8 +343,8 @@ def test(self) -> bool:
         else:
             passed = run_cmp_results(project_dir, self.strictness)
 
-        stdout(f"Completed tests for project {self.project.name} "
-               f"(time: {time.time() - start_time:.2f}).\n")
+        self.out(f"Completed tests for project {self.project.name} "
+                 f"(time: {time.time() - start_time:.2f}).\n")
 
         return passed
 
@@ -346,22 +353,23 @@ def get_project_dir(self) -> str:
 
     def get_output_dir(self) -> str:
         if self.is_reference_build:
-            return REF_PREFIX + OUTPUT_DIR_NAME
+            dirname = REF_PREFIX + OUTPUT_DIR_NAME
         else:
-            return OUTPUT_DIR_NAME
+            dirname = OUTPUT_DIR_NAME
+
+        return os.path.join(self.get_project_dir(), dirname)
 
-    def build(self, directory: str, output_dir: str):
+    def build(self, directory: str, output_dir: str) -> Tuple[float, int]:
         build_log_path = get_build_log_path(output_dir)
 
-        stdout(f"Log file: {build_log_path}\n")
-        stdout(f"Output directory: {output_dir}\n")
+        self.out(f"Log file: {build_log_path}\n")
+        self.out(f"Output directory: {output_dir}\n")
 
         remove_log_file(output_dir)
 
         # Clean up scan build results.
         if os.path.exists(output_dir):
-            if VERBOSE >= 1:
-                stdout(f"  Removing old results: {output_dir}\n")
+            self.vout(f"  Removing old results: {output_dir}\n")
 
             shutil.rmtree(output_dir)
 
@@ -374,7 +382,7 @@ def build(self, directory: str, output_dir: str):
                 self._download_and_patch(directory, build_log_file)
                 run_cleanup_script(directory, build_log_file)
                 build_time, memory = self.scan_build(directory, output_dir,
-                                               build_log_file)
+                                                     build_log_file)
             else:
                 build_time, memory = self.analyze_preprocessed(directory,
                                                                output_dir)
@@ -384,9 +392,11 @@ def build(self, directory: str, output_dir: str):
                 normalize_reference_results(directory, output_dir,
                                             self.project.mode)
 
-        stdout(f"Build complete (time: {utils.time_to_str(build_time)}, "
-               f"peak memory: {utils.memory_to_str(memory)}). "
-               f"See the log for more details: {build_log_path}\n")
+        self.out(f"Build complete (time: {utils.time_to_str(build_time)}, "
+                 f"peak memory: {utils.memory_to_str(memory)}). "
+                 f"See the log for more details: {build_log_path}\n")
+
+        return build_time, memory
 
     def scan_build(self, directory: str, output_dir: str,
                    build_log_file: IO) -> Tuple[float, int]:
@@ -454,8 +464,7 @@ def scan_build(self, directory: str, output_dir: str,
 
                 command_to_run = command_prefix + command
 
-                if VERBOSE >= 1:
-                    stdout(f"  Executing: {command_to_run}\n")
+                self.vout(f"  Executing: {command_to_run}\n")
 
                 time, mem = utils.check_and_measure_call(
                     command_to_run, cwd=cwd,
@@ -522,8 +531,7 @@ def analyze_preprocessed(self, directory: str,
             log_path = os.path.join(fail_path, file_name + ".stderr.txt")
             with open(log_path, "w+") as log_file:
                 try:
-                    if VERBOSE >= 1:
-                        stdout(f"  Executing: {command}\n")
+                    self.vout(f"  Executing: {command}\n")
 
                     time, mem = utils.check_and_measure_call(
                         command, cwd=directory, stderr=log_file,
@@ -592,8 +600,10 @@ def _download(self, directory: str, build_log_file: IO):
                 f"for the '{self.project.name}' project")
 
     def _download_from_git(self, directory: str, build_log_file: IO):
+        repo = self.project.origin
         cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
-        check_call(f"git clone --recursive {self.project.origin} {cached_source}",
+
+        check_call(f"git clone --recursive {repo} {cached_source}",
                    cwd=directory, stderr=build_log_file,
                    stdout=build_log_file, shell=True)
         check_call(f"git checkout --quiet {self.project.commit}",
@@ -624,16 +634,15 @@ def _run_download_script(directory: str, build_log_file: IO):
                          out=LOCAL.stdout, err=LOCAL.stderr,
                          verbose=VERBOSE)
 
-    @staticmethod
-    def _apply_patch(directory: str, build_log_file: IO):
+    def _apply_patch(self, directory: str, build_log_file: IO):
         patchfile_path = os.path.join(directory, PATCHFILE_NAME)
         patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
 
         if not os.path.exists(patchfile_path):
-            stdout("  No local patches.\n")
+            self.out("  No local patches.\n")
             return
 
-        stdout("  Applying patch.\n")
+        self.out("  Applying patch.\n")
         try:
             check_call(f"patch -p1 < '{patchfile_path}'",
                        cwd=patched_source,
@@ -646,6 +655,14 @@ def _apply_patch(directory: str, build_log_file: IO):
                    f"See {build_log_file.name} for details.\n")
             sys.exit(1)
 
+    def out(self, what: str):
+        if not self.silent:
+            stdout(what)
+
+    def vout(self, what: str):
+        if VERBOSE >= 1:
+            self.out(what)
+
 
 class TestProjectThread(threading.Thread):
     def __init__(self, tasks_queue: TestQueue,
@@ -668,10 +685,7 @@ def run(self):
         while not self.tasks_queue.empty():
             try:
                 test_info = self.tasks_queue.get()
-
-                Logger = logging.getLogger(test_info.project.name)
-                LOCAL.stdout = StreamToLogger(Logger, logging.INFO)
-                LOCAL.stderr = StreamToLogger(Logger, logging.ERROR)
+                init_logger(test_info.project.name)
 
                 tester = ProjectTester(test_info)
                 if not tester.test():
diff --git a/clang/utils/analyzer/SATestUpdateDiffs.py b/clang/utils/analyzer/SATestUpdateDiffs.py
index 1a2c41d2debf2..920fa15e4c6f5 100644
--- a/clang/utils/analyzer/SATestUpdateDiffs.py
+++ b/clang/utils/analyzer/SATestUpdateDiffs.py
@@ -21,10 +21,10 @@ def update_reference_results(project: ProjectInfo):
     project_dir = tester.get_project_dir()
 
     tester.is_reference_build = True
-    ref_results_path = os.path.join(project_dir, tester.get_output_dir())
+    ref_results_path = tester.get_output_dir()
 
     tester.is_reference_build = False
-    created_results_path = os.path.join(project_dir, tester.get_output_dir())
+    created_results_path = tester.get_output_dir()
 
     if not os.path.exists(created_results_path):
         print("New results not found, was SATestBuild.py previously run?",
diff --git a/clang/utils/analyzer/entrypoint.py b/clang/utils/analyzer/entrypoint.py
index b440e776b57cb..9c84431da5482 100644
--- a/clang/utils/analyzer/entrypoint.py
+++ b/clang/utils/analyzer/entrypoint.py
@@ -50,7 +50,7 @@ def is_cmake_needed():
 
 CMAKE_COMMAND = "cmake -G Ninja -DCMAKE_BUILD_TYPE=Release " \
     "-DCMAKE_INSTALL_PREFIX=/analyzer -DLLVM_TARGETS_TO_BUILD=X86 " \
-    "-DLLVM_ENABLE_PROJECTS=clang -DLLVM_BUILD_RUNTIME=OFF " \
+    "-DLLVM_ENABLE_PROJECTS=\"clang;openmp\" -DLLVM_BUILD_RUNTIME=OFF " \
     "-DLLVM_ENABLE_TERMINFO=OFF -DCLANG_ENABLE_ARCMT=OFF " \
     "-DCLANG_ENABLE_STATIC_ANALYZER=ON"
 
diff --git a/clang/utils/analyzer/projects/capnproto/cleanup_run_static_analyzer.sh b/clang/utils/analyzer/projects/capnproto/cleanup_run_static_analyzer.sh
new file mode 100755
index 0000000000000..e14c423280ec5
--- /dev/null
+++ b/clang/utils/analyzer/projects/capnproto/cleanup_run_static_analyzer.sh
@@ -0,0 +1 @@
+rm -rf ./build
diff --git a/clang/utils/analyzer/projects/capnproto/run_static_analyzer.cmd b/clang/utils/analyzer/projects/capnproto/run_static_analyzer.cmd
new file mode 100644
index 0000000000000..6678fe635db32
--- /dev/null
+++ b/clang/utils/analyzer/projects/capnproto/run_static_analyzer.cmd
@@ -0,0 +1,2 @@
+cmake . -DCMAKE_BUILD_TYPE=Debug -Bbuild -GNinja
+cmake --build build
diff --git a/clang/utils/analyzer/projects/cppcheck/cleanup_run_static_analyzer.sh b/clang/utils/analyzer/projects/cppcheck/cleanup_run_static_analyzer.sh
new file mode 100755
index 0000000000000..e14c423280ec5
--- /dev/null
+++ b/clang/utils/analyzer/projects/cppcheck/cleanup_run_static_analyzer.sh
@@ -0,0 +1 @@
+rm -rf ./build
diff --git a/clang/utils/analyzer/projects/cppcheck/run_static_analyzer.cmd b/clang/utils/analyzer/projects/cppcheck/run_static_analyzer.cmd
new file mode 100644
index 0000000000000..72cb7f7677e6d
--- /dev/null
+++ b/clang/utils/analyzer/projects/cppcheck/run_static_analyzer.cmd
@@ -0,0 +1,2 @@
+cmake . -DCMAKE_BUILD_TYPE=Debug -DCMAKE_DISABLE_PRECOMPILE_HEADERS=ON -Bbuild -GNinja
+cmake --build build
diff --git a/clang/utils/analyzer/projects/faiss/cleanup_run_static_analyzer.sh b/clang/utils/analyzer/projects/faiss/cleanup_run_static_analyzer.sh
new file mode 100755
index 0000000000000..efcd16e590a18
--- /dev/null
+++ b/clang/utils/analyzer/projects/faiss/cleanup_run_static_analyzer.sh
@@ -0,0 +1 @@
+make clean
diff --git a/clang/utils/analyzer/projects/faiss/run_static_analyzer.cmd b/clang/utils/analyzer/projects/faiss/run_static_analyzer.cmd
new file mode 100644
index 0000000000000..877fa2aa389b7
--- /dev/null
+++ b/clang/utils/analyzer/projects/faiss/run_static_analyzer.cmd
@@ -0,0 +1,2 @@
+./configure --without-cuda
+make
diff --git a/clang/utils/analyzer/projects/harfbuzz/cleanup_run_static_analyzer.sh b/clang/utils/analyzer/projects/harfbuzz/cleanup_run_static_analyzer.sh
new file mode 100755
index 0000000000000..e14c423280ec5
--- /dev/null
+++ b/clang/utils/analyzer/projects/harfbuzz/cleanup_run_static_analyzer.sh
@@ -0,0 +1 @@
+rm -rf ./build
diff --git a/clang/utils/analyzer/projects/harfbuzz/run_static_analyzer.cmd b/clang/utils/analyzer/projects/harfbuzz/run_static_analyzer.cmd
new file mode 100644
index 0000000000000..6678fe635db32
--- /dev/null
+++ b/clang/utils/analyzer/projects/harfbuzz/run_static_analyzer.cmd
@@ -0,0 +1,2 @@
+cmake . -DCMAKE_BUILD_TYPE=Debug -Bbuild -GNinja
+cmake --build build
diff --git a/clang/utils/analyzer/projects/projects.json b/clang/utils/analyzer/projects/projects.json
index e3d853ac6f6a5..84b741035f46c 100644
--- a/clang/utils/analyzer/projects/projects.json
+++ b/clang/utils/analyzer/projects/projects.json
@@ -103,5 +103,40 @@
     "source": "git",
     "origin": "https://github.com/google/re2.git",
     "commit": "2b25567"
+  },
+  {
+    "name": "cppcheck",
+    "mode": 1,
+    "source": "git",
+    "origin": "https://github.com/danmar/cppcheck.git",
+    "commit": "5fa3d53"
+  },
+  {
+    "name": "harfbuzz",
+    "mode": 1,
+    "source": "git",
+    "origin": "https://github.com/harfbuzz/harfbuzz.git",
+    "commit": "f8d345e"
+  },
+  {
+    "name": "capnproto",
+    "mode": 1,
+    "source": "git",
+    "origin": "https://github.com/capnproto/capnproto.git",
+    "commit": "8be1c9f"
+  },
+  {
+    "name": "tmux",
+    "mode": 1,
+    "source": "git",
+    "origin": "https://github.com/tmux/tmux.git",
+    "commit": "a5f99e1"
+  },
+  {
+    "name": "faiss",
+    "mode": 1,
+    "source": "git",
+    "origin": "https://github.com/facebookresearch/faiss.git",
+    "commit": "9e5d5b7"
   }
 ]
diff --git a/clang/utils/analyzer/projects/tmux/cleanup_run_static_analyzer.sh b/clang/utils/analyzer/projects/tmux/cleanup_run_static_analyzer.sh
new file mode 100755
index 0000000000000..f671df1de5f4e
--- /dev/null
+++ b/clang/utils/analyzer/projects/tmux/cleanup_run_static_analyzer.sh
@@ -0,0 +1,2 @@
+make distclean
+exit 0
diff --git a/clang/utils/analyzer/projects/tmux/run_static_analyzer.cmd b/clang/utils/analyzer/projects/tmux/run_static_analyzer.cmd
new file mode 100644
index 0000000000000..78ce31f452a12
--- /dev/null
+++ b/clang/utils/analyzer/projects/tmux/run_static_analyzer.cmd
@@ -0,0 +1,2 @@
+./autogen.sh
+./configure && make
diff --git a/clang/utils/analyzer/requirements.txt b/clang/utils/analyzer/requirements.txt
index ec4f669299523..8ae8bc88ac191 100644
--- a/clang/utils/analyzer/requirements.txt
+++ b/clang/utils/analyzer/requirements.txt
@@ -1,4 +1,6 @@
 graphviz
 humanize
 matplotlib
+pandas
 psutil
+seaborn
diff --git a/clang/utils/clangdiag.py b/clang/utils/clangdiag.py
index 6baf65a8761c3..4de8c570df7f5 100755
--- a/clang/utils/clangdiag.py
+++ b/clang/utils/clangdiag.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 #----------------------------------------------------------------------
 # Be sure to add the python path that points to the LLDB shared library.
diff --git a/clang/utils/modfuzz.py b/clang/utils/modfuzz.py
index 61ca3272aca56..84707f471ada1 100644
--- a/clang/utils/modfuzz.py
+++ b/clang/utils/modfuzz.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#!/usr/bin/env python
 
 # To use:
 #  1) Update the 'decls' list below with your fuzzing configuration.
diff --git a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake
index 425de8bffdf72..be8d7e733c7a0 100644
--- a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake
@@ -344,6 +344,38 @@ function(darwin_lipo_libs name)
   endif()
 endfunction()
 
+# Filter the list of builtin sources for Darwin, then delegate to the generic
+# filtering.
+#
+# `exclude_or_include` must be one of:
+#  - EXCLUDE: remove every item whose name (w/o extension) matches a name in
+#    `excluded_list`.
+#  - INCLUDE: keep only items whose name (w/o extension) matches something
+#    in `excluded_list`.
+function(darwin_filter_builtin_sources output_var name exclude_or_include excluded_list)
+  if(exclude_or_include STREQUAL "EXCLUDE")
+    set(filter_action GREATER)
+    set(filter_value -1)
+  elseif(exclude_or_include STREQUAL "INCLUDE")
+    set(filter_action LESS)
+    set(filter_value 0)
+  else()
+    message(FATAL_ERROR "darwin_filter_builtin_sources called without EXCLUDE|INCLUDE")
+  endif()
+
+  set(intermediate ${ARGN})
+  foreach(_file ${intermediate})
+    get_filename_component(_name_we ${_file} NAME_WE)
+    list(FIND ${excluded_list} ${_name_we} _found)
+    if(_found ${filter_action} ${filter_value})
+      list(REMOVE_ITEM intermediate ${_file})
+    endif()
+  endforeach()
+
+  filter_builtin_sources(intermediate ${name})
+  set(${output_var} ${intermediate} PARENT_SCOPE)
+endfunction()
+
 # Generates builtin libraries for all operating systems specified in ARGN. Each
 # OS library is constructed by lipo-ing together single-architecture libraries.
 macro(darwin_add_builtin_libraries)
@@ -366,7 +398,8 @@ macro(darwin_add_builtin_libraries)
                               ARCH ${arch}
                               MIN_VERSION ${DARWIN_${os}_BUILTIN_MIN_VER})
 
-      filter_builtin_sources(filtered_sources
+      darwin_filter_builtin_sources(filtered_sources
+        ${os}_${arch}
         EXCLUDE ${arch}_${os}_EXCLUDED_BUILTINS
         ${${arch}_SOURCES})
 
@@ -388,7 +421,8 @@ macro(darwin_add_builtin_libraries)
                               OS ${os}
                               ARCH ${arch})
 
-        filter_builtin_sources(filtered_sources
+        darwin_filter_builtin_sources(filtered_sources
+          cc_kext_${os}_${arch}
           EXCLUDE ${arch}_${os}_EXCLUDED_BUILTINS
           ${${arch}_SOURCES})
 
@@ -484,7 +518,8 @@ macro(darwin_add_embedded_builtin_libraries)
     set(x86_64_FUNCTIONS ${common_FUNCTIONS})
 
     foreach(arch ${DARWIN_macho_embedded_ARCHS})
-      filter_builtin_sources(${arch}_filtered_sources
+      darwin_filter_builtin_sources(${arch}_filtered_sources
+        macho_embedded_${arch}
         INCLUDE ${arch}_FUNCTIONS
         ${${arch}_SOURCES})
       if(NOT ${arch}_filtered_sources)
diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
index d3607edd58822..99b9f0e4af44d 100644
--- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
@@ -332,33 +332,30 @@ macro(construct_compiler_rt_default_triple)
   endif()
 endmacro()
 
-# Filter out generic versions of routines that are re-implemented in
-# architecture specific manner.  This prevents multiple definitions of the
-# same symbols, making the symbol selection non-deterministic.
-function(filter_builtin_sources output_var exclude_or_include excluded_list)
-  if(exclude_or_include STREQUAL "EXCLUDE")
-    set(filter_action GREATER)
-    set(filter_value -1)
-  elseif(exclude_or_include STREQUAL "INCLUDE")
-    set(filter_action LESS)
-    set(filter_value 0)
-  else()
-    message(FATAL_ERROR "filter_builtin_sources called without EXCLUDE|INCLUDE")
-  endif()
-
-  set(intermediate ${ARGN})
-  foreach (_file ${intermediate})
-    get_filename_component(_name_we ${_file} NAME_WE)
-    list(FIND ${excluded_list} ${_name_we} _found)
-    if(_found ${filter_action} ${filter_value})
-      list(REMOVE_ITEM intermediate ${_file})
-    elseif(${_file} MATCHES ".*/.*\\.S" OR ${_file} MATCHES ".*/.*\\.c")
+# Filter out generic versions of routines that are re-implemented in an
+# architecture specific manner. This prevents multiple definitions of the same
+# symbols, making the symbol selection non-deterministic.
+#
+# We follow the convention that a source file that exists in a sub-directory
+# (e.g. `ppc/divtc3.c`) is architecture-specific and that if a generic
+# implementation exists it will be a top-level source file with the same name
+# modulo the file extension (e.g. `divtc3.c`).
+function(filter_builtin_sources inout_var name)
+  set(intermediate ${${inout_var}})
+  foreach(_file ${intermediate})
+    get_filename_component(_file_dir ${_file} DIRECTORY)
+    if (NOT "${_file_dir}" STREQUAL "")
+      # Architecture specific file. If a generic version exists, print a notice
+      # and ensure that it is removed from the file list.
       get_filename_component(_name ${_file} NAME)
-      string(REPLACE ".S" ".c" _cname "${_name}")
-      list(REMOVE_ITEM intermediate ${_cname})
-    endif ()
-  endforeach ()
-  set(${output_var} ${intermediate} PARENT_SCOPE)
+      string(REGEX REPLACE "\\.S$" ".c" _cname "${_name}")
+      if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${_cname}")
+        message(STATUS "For ${name} builtins preferring ${_file} to ${_cname}")
+        list(REMOVE_ITEM intermediate ${_cname})
+      endif()
+    endif()
+  endforeach()
+  set(${inout_var} ${intermediate} PARENT_SCOPE)
 endfunction()
 
 function(get_compiler_rt_target arch variable)
diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index a6913527e67f0..6d0ffb12294b0 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -157,6 +157,8 @@ VALUE_PROF_FUNC_PARAM(void *, Data, Type::getInt8PtrTy(Ctx)) INSTR_PROF_COMMA
 #ifndef VALUE_RANGE_PROF
 VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx))
 #else /* VALUE_RANGE_PROF */
+/* FIXME: This is to be removed after switching to the new memop value
+ * profiling. */
 VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) \
                       INSTR_PROF_COMMA
 VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeStart, Type::getInt64Ty(Ctx)) \
@@ -753,9 +755,14 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 #define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target
 #define INSTR_PROF_VALUE_PROF_FUNC_STR \
         INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC)
+/* FIXME: This is to be removed after switching to the new memop value
+ * profiling. */
 #define INSTR_PROF_VALUE_RANGE_PROF_FUNC __llvm_profile_instrument_range
 #define INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR \
         INSTR_PROF_QUOTE(INSTR_PROF_VALUE_RANGE_PROF_FUNC)
+#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC __llvm_profile_instrument_memop
+#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR                                   \
+  INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_MEMOP_FUNC)
 
 /* InstrProfile per-function control data alignment.  */
 #define INSTR_PROF_DATA_ALIGNMENT 8
@@ -783,3 +790,121 @@ typedef struct InstrProfValueData {
 #endif
 
 #undef COVMAP_V2_OR_V3
+
+#ifdef INSTR_PROF_VALUE_PROF_MEMOP_API
+
+#ifdef __cplusplus
+#define INSTR_PROF_INLINE inline
+#else
+#define INSTR_PROF_INLINE
+#endif
+
+/* The value range buckets (22 buckets) for the memop size value profiling looks
+ * like:
+ *
+ *   [0, 0]
+ *   [1, 1]
+ *   [2, 2]
+ *   [3, 3]
+ *   [4, 4]
+ *   [5, 5]
+ *   [6, 6]
+ *   [7, 7]
+ *   [8, 8]
+ *   [9, 15]
+ *   [16, 16]
+ *   [17, 31]
+ *   [32, 32]
+ *   [33, 63]
+ *   [64, 64]
+ *   [65, 127]
+ *   [128, 128]
+ *   [129, 255]
+ *   [256, 256]
+ *   [257, 511]
+ *   [512, 512]
+ *   [513, UINT64_MAX]
+ *
+ * Each range has a 'representative value' which is the lower end value of the
+ * range and used to store in the runtime profile data records and the VP
+ * metadata. For example, it's 2 for [2, 2] and 64 for [65, 127].
+ */
+
+/*
+ * Clz and Popcount. This code was copied from
+ * compiler-rt/lib/fuzzer/{FuzzerBuiltins.h,FuzzerBuiltinsMsvc.h} and
+ * llvm/include/llvm/Support/MathExtras.h.
+ */
+#if defined(_MSC_VER) && !defined(__clang__)
+
+#include 
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfClzll(unsigned long long X) {
+  unsigned long LeadZeroIdx = 0;
+#if !defined(_M_ARM64) && !defined(_M_X64)
+  // Scan the high 32 bits.
+  if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X >> 32)))
+    return (int)(63 - (LeadZeroIdx + 32)); // Create a bit offset
+                                                      // from the MSB.
+  // Scan the low 32 bits.
+  if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X)))
+    return (int)(63 - LeadZeroIdx);
+#else
+  if (_BitScanReverse64(&LeadZeroIdx, X)) return 63 - LeadZeroIdx;
+#endif
+  return 64;
+}
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfPopcountll(unsigned long long X) {
+  // This code originates from https://reviews.llvm.org/rG30626254510f.
+  unsigned long long v = X;
+  v = v - ((v >> 1) & 0x5555555555555555ULL);
+  v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
+  v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
+  return (int)((unsigned long long)(v * 0x0101010101010101ULL) >> 56);
+}
+
+#else
+
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfClzll(unsigned long long X) { return __builtin_clzll(X); }
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfPopcountll(unsigned long long X) { return __builtin_popcountll(X); }
+
+#endif  /* defined(_MSC_VER) && !defined(__clang__) */
+
+/* Map an (observed) memop size value to the representative value of its range.
+ * For example, 5 -> 5, 22 -> 17, 99 -> 65, 256 -> 256, 1001 -> 513. */
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE uint64_t
+InstrProfGetRangeRepValue(uint64_t Value) {
+  if (Value <= 8)
+    // The first ranges are individually tracked. Use the value as is.
+    return Value;
+  else if (Value >= 513)
+    // The last range is mapped to its lowest value.
+    return 513;
+  else if (InstProfPopcountll(Value) == 1)
+    // If it's a power of two, use it as is.
+    return Value;
+  else
+    // Otherwise, take to the previous power of two + 1.
+    return (1 << (64 - InstProfClzll(Value) - 1)) + 1;
+}
+
+/* Return true if the range that an (observed) memop size value belongs to has
+ * only a single value in the range.  For example, 0 -> true, 8 -> true, 10 ->
+ * false, 64 -> true, 100 -> false, 513 -> false. */
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE unsigned
+InstrProfIsSingleValRange(uint64_t Value) {
+  if (Value <= 8)
+    // The first ranges are individually tracked.
+    return 1;
+  else if (InstProfPopcountll(Value) == 1)
+    // If it's a power of two, there's only one value.
+    return 1;
+  else
+    // Otherwise, there's more than one value in the range.
+    return 0;
+}
+
+#endif /* INSTR_PROF_VALUE_PROF_MEMOP_API */
diff --git a/compiler-rt/lib/asan/asan_internal.h b/compiler-rt/lib/asan/asan_internal.h
index d4bfe996b664e..cfb54927c6cf4 100644
--- a/compiler-rt/lib/asan/asan_internal.h
+++ b/compiler-rt/lib/asan/asan_internal.h
@@ -118,8 +118,6 @@ void AppendToErrorMessageBuffer(const char *buffer);
 
 void *AsanDlSymNext(const char *sym);
 
-void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name);
-
 // Returns `true` iff most of ASan init process should be skipped due to the
 // ASan library being loaded via `dlopen()`. Platforms may perform any
 // `dlopen()` specific initialization inside this function.
diff --git a/compiler-rt/lib/asan/asan_linux.cpp b/compiler-rt/lib/asan/asan_linux.cpp
index ce5e873dc5180..aa93bbd79d132 100644
--- a/compiler-rt/lib/asan/asan_linux.cpp
+++ b/compiler-rt/lib/asan/asan_linux.cpp
@@ -87,25 +87,12 @@ void *AsanDoesNotSupportStaticLinkage() {
   return &_DYNAMIC;  // defined in link.h
 }
 
-static void UnmapFromTo(uptr from, uptr to) {
-  CHECK(to >= from);
-  if (to == from) return;
-  uptr res = internal_munmap(reinterpret_cast(from), to - from);
-  if (UNLIKELY(internal_iserror(res))) {
-    Report(
-        "ERROR: AddresSanitizer failed to unmap 0x%zx (%zd) bytes at address "
-        "%p\n",
-        to - from, to - from, from);
-    CHECK("unable to unmap" && 0);
-  }
-}
-
 #if ASAN_PREMAP_SHADOW
-uptr FindPremappedShadowStart() {
+uptr FindPremappedShadowStart(uptr shadow_size_bytes) {
   uptr granularity = GetMmapGranularity();
   uptr shadow_start = reinterpret_cast(&__asan_shadow);
   uptr premap_shadow_size = PremapShadowSize();
-  uptr shadow_size = RoundUpTo(kHighShadowEnd, granularity);
+  uptr shadow_size = RoundUpTo(shadow_size_bytes, granularity);
   // We may have mapped too much. Release extra memory.
   UnmapFromTo(shadow_start + shadow_size, shadow_start + premap_shadow_size);
   return shadow_start;
@@ -113,25 +100,14 @@ uptr FindPremappedShadowStart() {
 #endif
 
 uptr FindDynamicShadowStart() {
+  uptr shadow_size_bytes = MemToShadowSize(kHighMemEnd);
 #if ASAN_PREMAP_SHADOW
   if (!PremapShadowFailed())
-    return FindPremappedShadowStart();
+    return FindPremappedShadowStart(shadow_size_bytes);
 #endif
 
-  uptr granularity = GetMmapGranularity();
-  uptr alignment = granularity * 8;
-  uptr left_padding = granularity;
-  uptr shadow_size = RoundUpTo(kHighShadowEnd, granularity);
-  uptr map_size = shadow_size + left_padding + alignment;
-
-  uptr map_start = (uptr)MmapNoAccess(map_size);
-  CHECK_NE(map_start, ~(uptr)0);
-
-  uptr shadow_start = RoundUpTo(map_start + left_padding, alignment);
-  UnmapFromTo(map_start, shadow_start - left_padding);
-  UnmapFromTo(shadow_start + shadow_size, map_start + map_size);
-
-  return shadow_start;
+  return MapDynamicShadow(shadow_size_bytes, SHADOW_SCALE,
+                          /*min_shadow_base_alignment*/ 0, kHighMemEnd);
 }
 
 void AsanApplyToGlobals(globals_op_fptr op, const void *needle) {
diff --git a/compiler-rt/lib/asan/asan_mac.cpp b/compiler-rt/lib/asan/asan_mac.cpp
index a8d3f5d3473c4..3182aacb0b5e9 100644
--- a/compiler-rt/lib/asan/asan_mac.cpp
+++ b/compiler-rt/lib/asan/asan_mac.cpp
@@ -55,46 +55,8 @@ void *AsanDoesNotSupportStaticLinkage() {
 }
 
 uptr FindDynamicShadowStart() {
-  uptr granularity = GetMmapGranularity();
-  uptr alignment = 8 * granularity;
-  uptr left_padding = granularity;
-  uptr space_size = kHighShadowEnd + left_padding;
-
-  uptr largest_gap_found = 0;
-  uptr max_occupied_addr = 0;
-  VReport(2, "FindDynamicShadowStart, space_size = %p\n", space_size);
-  uptr shadow_start =
-      FindAvailableMemoryRange(space_size, alignment, granularity,
-                               &largest_gap_found, &max_occupied_addr);
-  // If the shadow doesn't fit, restrict the address space to make it fit.
-  if (shadow_start == 0) {
-    VReport(
-        2,
-        "Shadow doesn't fit, largest_gap_found = %p, max_occupied_addr = %p\n",
-        largest_gap_found, max_occupied_addr);
-    uptr new_max_vm = RoundDownTo(largest_gap_found << SHADOW_SCALE, alignment);
-    if (new_max_vm < max_occupied_addr) {
-      Report("Unable to find a memory range for dynamic shadow.\n");
-      Report(
-          "space_size = %p, largest_gap_found = %p, max_occupied_addr = %p, "
-          "new_max_vm = %p\n",
-          space_size, largest_gap_found, max_occupied_addr, new_max_vm);
-      CHECK(0 && "cannot place shadow");
-    }
-    RestrictMemoryToMaxAddress(new_max_vm);
-    kHighMemEnd = new_max_vm - 1;
-    space_size = kHighShadowEnd + left_padding;
-    VReport(2, "FindDynamicShadowStart, space_size = %p\n", space_size);
-    shadow_start = FindAvailableMemoryRange(space_size, alignment, granularity,
-                                            nullptr, nullptr);
-    if (shadow_start == 0) {
-      Report("Unable to find a memory range after restricting VM.\n");
-      CHECK(0 && "cannot place shadow after restricting vm");
-    }
-  }
-  CHECK_NE((uptr)0, shadow_start);
-  CHECK(IsAligned(shadow_start, alignment));
-  return shadow_start;
+  return MapDynamicShadow(MemToShadowSize(kHighMemEnd), SHADOW_SCALE,
+                          /*min_shadow_base_alignment*/ 0, kHighMemEnd);
 }
 
 // No-op. Mac does not support static linkage anyway.
diff --git a/compiler-rt/lib/asan/asan_mapping.h b/compiler-rt/lib/asan/asan_mapping.h
index 41fb49ee46d46..27598171fc29b 100644
--- a/compiler-rt/lib/asan/asan_mapping.h
+++ b/compiler-rt/lib/asan/asan_mapping.h
@@ -206,6 +206,8 @@ static const u64 kMyriadCacheBitMask32 = 0x40000000ULL;
 #else
 #  if SANITIZER_IOS
 #    define SHADOW_OFFSET __asan_shadow_memory_dynamic_address
+#  elif SANITIZER_MAC && defined(__aarch64__)
+#    define SHADOW_OFFSET __asan_shadow_memory_dynamic_address
 #  elif defined(__aarch64__)
 #    define SHADOW_OFFSET kAArch64_ShadowOffset64
 #  elif defined(__powerpc64__)
@@ -355,6 +357,8 @@ static inline bool AddrIsInShadowGap(uptr a) {
 
 namespace __asan {
 
+static inline uptr MemToShadowSize(uptr size) { return size >> SHADOW_SCALE; }
+
 static inline bool AddrIsInMem(uptr a) {
   PROFILE_ASAN_MAPPING();
   return AddrIsInLowMem(a) || AddrIsInMidMem(a) || AddrIsInHighMem(a) ||
diff --git a/compiler-rt/lib/asan/asan_premap_shadow.cpp b/compiler-rt/lib/asan/asan_premap_shadow.cpp
index 7835e99748ffa..666bb9b34bd39 100644
--- a/compiler-rt/lib/asan/asan_premap_shadow.cpp
+++ b/compiler-rt/lib/asan/asan_premap_shadow.cpp
@@ -32,22 +32,8 @@ uptr PremapShadowSize() {
 // Returns an address aligned to 8 pages, such that one page on the left and
 // PremapShadowSize() bytes on the right of it are mapped r/o.
 uptr PremapShadow() {
-  uptr granularity = GetMmapGranularity();
-  uptr alignment = granularity * 8;
-  uptr left_padding = granularity;
-  uptr shadow_size = PremapShadowSize();
-  uptr map_size = shadow_size + left_padding + alignment;
-
-  uptr map_start = (uptr)MmapNoAccess(map_size);
-  CHECK_NE(map_start, ~(uptr)0);
-
-  uptr shadow_start = RoundUpTo(map_start + left_padding, alignment);
-  uptr shadow_end = shadow_start + shadow_size;
-  internal_munmap(reinterpret_cast(map_start),
-                  shadow_start - left_padding - map_start);
-  internal_munmap(reinterpret_cast(shadow_end),
-                  map_start + map_size - shadow_end);
-  return shadow_start;
+  return MapDynamicShadow(PremapShadowSize(), /*mmap_alignment_scale*/ 3,
+                          /*min_shadow_base_alignment*/ 0, kHighMemEnd);
 }
 
 bool PremapShadowFailed() {
diff --git a/compiler-rt/lib/asan/asan_rtl.cpp b/compiler-rt/lib/asan/asan_rtl.cpp
index 463bfa02f9f16..115733cdaa48e 100644
--- a/compiler-rt/lib/asan/asan_rtl.cpp
+++ b/compiler-rt/lib/asan/asan_rtl.cpp
@@ -319,7 +319,7 @@ static void InitializeHighMemEnd() {
   kHighMemEnd = GetMaxUserVirtualAddress();
   // Increase kHighMemEnd to make sure it's properly
   // aligned together with kHighMemBeg:
-  kHighMemEnd |= SHADOW_GRANULARITY * GetMmapGranularity() - 1;
+  kHighMemEnd |= (GetMmapGranularity() << SHADOW_SCALE) - 1;
 #endif  // !ASAN_FIXED_MAPPING
   CHECK_EQ((kHighMemBeg % GetMmapGranularity()), 0);
 #endif  // !SANITIZER_MYRIAD2
diff --git a/compiler-rt/lib/asan/asan_shadow_setup.cpp b/compiler-rt/lib/asan/asan_shadow_setup.cpp
index 17324932a86f9..0e2623a23028e 100644
--- a/compiler-rt/lib/asan/asan_shadow_setup.cpp
+++ b/compiler-rt/lib/asan/asan_shadow_setup.cpp
@@ -22,24 +22,6 @@
 
 namespace __asan {
 
-// ---------------------- mmap -------------------- {{{1
-// Reserve memory range [beg, end].
-// We need to use inclusive range because end+1 may not be representable.
-void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
-  CHECK_EQ((beg % GetMmapGranularity()), 0);
-  CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
-  uptr size = end - beg + 1;
-  DecreaseTotalMmap(size);  // Don't count the shadow against mmap_limit_mb.
-  if (!MmapFixedSuperNoReserve(beg, size, name)) {
-    Report(
-        "ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
-        "Perhaps you're using ulimit -v\n",
-        size);
-    Abort();
-  }
-  if (common_flags()->use_madv_dontdump) DontDumpShadowMemory(beg, size);
-}
-
 static void ProtectGap(uptr addr, uptr size) {
   if (!flags()->protect_shadow_gap) {
     // The shadow gap is unprotected, so there is a chance that someone
@@ -57,26 +39,8 @@ static void ProtectGap(uptr addr, uptr size) {
                              "unprotected gap shadow");
     return;
   }
-  void *res = MmapFixedNoAccess(addr, size, "shadow gap");
-  if (addr == (uptr)res) return;
-  // A few pages at the start of the address space can not be protected.
-  // But we really want to protect as much as possible, to prevent this memory
-  // being returned as a result of a non-FIXED mmap().
-  if (addr == kZeroBaseShadowStart) {
-    uptr step = GetMmapGranularity();
-    while (size > step && addr < kZeroBaseMaxShadowStart) {
-      addr += step;
-      size -= step;
-      void *res = MmapFixedNoAccess(addr, size, "shadow gap");
-      if (addr == (uptr)res) return;
-    }
-  }
-
-  Report(
-      "ERROR: Failed to protect the shadow gap. "
-      "ASan cannot proceed correctly. ABORTING.\n");
-  DumpProcessMap();
-  Die();
+  __sanitizer::ProtectGap(addr, size, kZeroBaseShadowStart,
+                          kZeroBaseMaxShadowStart);
 }
 
 static void MaybeReportLinuxPIEBug() {
@@ -99,8 +63,6 @@ void InitializeShadowMemory() {
   // |kDefaultShadowSentinel|.
   bool full_shadow_is_available = false;
   if (shadow_start == kDefaultShadowSentinel) {
-    __asan_shadow_memory_dynamic_address = 0;
-    CHECK_EQ(0, kLowShadowBeg);
     shadow_start = FindDynamicShadowStart();
     if (SANITIZER_LINUX) full_shadow_is_available = true;
   }
diff --git a/compiler-rt/lib/asan/asan_win.cpp b/compiler-rt/lib/asan/asan_win.cpp
index 03feddbe86b44..fe635c2d5b6b4 100644
--- a/compiler-rt/lib/asan/asan_win.cpp
+++ b/compiler-rt/lib/asan/asan_win.cpp
@@ -247,15 +247,8 @@ void *AsanDoesNotSupportStaticLinkage() {
 }
 
 uptr FindDynamicShadowStart() {
-  uptr granularity = GetMmapGranularity();
-  uptr alignment = 8 * granularity;
-  uptr left_padding = granularity;
-  uptr space_size = kHighShadowEnd + left_padding;
-  uptr shadow_start = FindAvailableMemoryRange(space_size, alignment,
-                                               granularity, nullptr, nullptr);
-  CHECK_NE((uptr)0, shadow_start);
-  CHECK(IsAligned(shadow_start, alignment));
-  return shadow_start;
+  return MapDynamicShadow(MemToShadowSize(kHighMemEnd), SHADOW_SCALE,
+                          /*min_shadow_base_alignment*/ 0, kHighMemEnd);
 }
 
 void AsanCheckDynamicRTPrereqs() {}
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 5e3c901322ec6..058bfc815a1a6 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -161,6 +161,8 @@ set(GENERIC_SOURCES
   umodti3.c
 )
 
+# TODO: Several "tf" files (and divtc3.c, but not multc3.c) are in
+# GENERIC_SOURCES instead of here.
 set(GENERIC_TF_SOURCES
   addtf3.c
   comparetf2.c
@@ -234,9 +236,21 @@ if (NOT FUCHSIA)
   )
 endif()
 
-# These sources work on all x86 variants, but only x86 variants.
+# These files are used on 32-bit and 64-bit x86.
 set(x86_ARCH_SOURCES
   cpu_model.c
+  )
+
+if (NOT MSVC)
+  set(x86_ARCH_SOURCES
+    ${x86_ARCH_SOURCES}
+    i386/fp_mode.c
+  )
+endif ()
+
+# Implement extended-precision builtins, assuming long double is 80 bits.
+# long double is not 80 bits on Android or MSVC.
+set(x86_80_BIT_SOURCES
   divxc3.c
   fixxfdi.c
   fixxfti.c
@@ -251,24 +265,27 @@ set(x86_ARCH_SOURCES
   powixf2.c
 )
 
-if (NOT MSVC)
-  set(x86_ARCH_SOURCES
-    ${x86_ARCH_SOURCES}
-    i386/fp_mode.c
-  )
-endif ()
-
 if (NOT MSVC)
   set(x86_64_SOURCES
+    ${GENERIC_SOURCES}
     ${GENERIC_TF_SOURCES}
+    ${x86_ARCH_SOURCES}
     x86_64/floatdidf.c
     x86_64/floatdisf.c
-    x86_64/floatdixf.c
     x86_64/floatundidf.S
     x86_64/floatundisf.S
-    x86_64/floatundixf.S
   )
-  filter_builtin_sources(x86_64_SOURCES EXCLUDE x86_64_SOURCES "${x86_64_SOURCES};${GENERIC_SOURCES}")
+
+  if (NOT ANDROID)
+    set(x86_64_SOURCES
+      ${x86_64_SOURCES}
+      ${x86_80_BIT_SOURCES}
+      x86_64/floatdixf.c
+      x86_64/floatundixf.S
+    )
+  endif()
+
+  # Darwin x86_64 Haswell
   set(x86_64h_SOURCES ${x86_64_SOURCES})
 
   if (WIN32)
@@ -280,22 +297,30 @@ if (NOT MSVC)
   endif()
 
   set(i386_SOURCES
+    ${GENERIC_SOURCES}
+    ${x86_ARCH_SOURCES}
     i386/ashldi3.S
     i386/ashrdi3.S
     i386/divdi3.S
     i386/floatdidf.S
     i386/floatdisf.S
-    i386/floatdixf.S
     i386/floatundidf.S
     i386/floatundisf.S
-    i386/floatundixf.S
     i386/lshrdi3.S
     i386/moddi3.S
     i386/muldi3.S
     i386/udivdi3.S
     i386/umoddi3.S
   )
-  filter_builtin_sources(i386_SOURCES EXCLUDE i386_SOURCES "${i386_SOURCES};${GENERIC_SOURCES}")
+
+  if (NOT ANDROID)
+    set(i386_SOURCES
+      ${i386_SOURCES}
+      ${x86_80_BIT_SOURCES}
+      i386/floatdixf.S
+      i386/floatundixf.S
+    )
+  endif()
 
   if (WIN32)
     set(i386_SOURCES
@@ -309,20 +334,14 @@ else () # MSVC
   # MSVC's assembler takes Intel syntax, not AT&T syntax.
   # Also use only MSVC compilable builtin implementations.
   set(x86_64_SOURCES
+    ${GENERIC_SOURCES}
+    ${x86_ARCH_SOURCES}
     x86_64/floatdidf.c
     x86_64/floatdisf.c
-    x86_64/floatdixf.c
-    ${GENERIC_SOURCES}
   )
-  set(x86_64h_SOURCES ${x86_64_SOURCES})
-  set(i386_SOURCES ${GENERIC_SOURCES})
+  set(i386_SOURCES ${GENERIC_SOURCES} ${x86_ARCH_SOURCES})
 endif () # if (NOT MSVC)
 
-set(x86_64h_SOURCES ${x86_64h_SOURCES} ${x86_ARCH_SOURCES})
-set(x86_64_SOURCES ${x86_64_SOURCES} ${x86_ARCH_SOURCES})
-set(i386_SOURCES ${i386_SOURCES} ${x86_ARCH_SOURCES})
-set(i686_SOURCES ${i686_SOURCES} ${x86_ARCH_SOURCES})
-
 set(arm_SOURCES
   arm/fp_mode.c
   arm/bswapdi2.S
@@ -356,8 +375,8 @@ set(arm_SOURCES
   arm/udivmodsi4.S
   arm/udivsi3.S
   arm/umodsi3.S
+  ${GENERIC_SOURCES}
 )
-filter_builtin_sources(arm_SOURCES EXCLUDE arm_SOURCES "${arm_SOURCES};${GENERIC_SOURCES}")
 
 set(thumb1_SOURCES
   arm/divsi3.S
@@ -451,8 +470,8 @@ if(MINGW)
     arm/aeabi_uldivmod.S
     arm/chkstk.S
     mingw_fixfloat.c
+    ${GENERIC_SOURCES}
   )
-  filter_builtin_sources(arm_SOURCES EXCLUDE arm_SOURCES "${arm_SOURCES};${GENERIC_SOURCES}")
 elseif(NOT WIN32)
   # TODO the EABI sources should only be added to EABI targets
   set(arm_SOURCES
@@ -619,25 +638,8 @@ else ()
         endif()
       endif()
 
-      # Filter out generic versions of routines that are re-implemented in
-      # architecture specific manner.  This prevents multiple definitions of the
-      # same symbols, making the symbol selection non-deterministic.
-      foreach (_file ${${arch}_SOURCES})
-        get_filename_component(_file_dir "${_file}" DIRECTORY)
-        if (NOT "${_file_dir}" STREQUAL "")
-          # Architecture specific file. We follow the convention that a source
-          # file that exists in a sub-directory (e.g. `ppc/divtc3.c`) is
-          # architecture specific and that if a generic implementation exists
-          # it will be a top-level source file with the same name modulo the
-          # file extension (e.g. `divtc3.c`).
-          get_filename_component(_name ${_file} NAME)
-          string(REPLACE ".S" ".c" _cname "${_name}")
-          if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${_cname}")
-            message(STATUS "For ${arch} builtins preferring ${_file} to ${_cname}")
-            list(REMOVE_ITEM ${arch}_SOURCES ${_cname})
-          endif()
-        endif ()
-      endforeach ()
+      # Remove a generic C builtin when an arch-specific builtin is specified.
+      filter_builtin_sources(${arch}_SOURCES ${arch})
 
       # Needed for clear_cache on debug mode, due to r7's usage in inline asm.
       # Release mode already sets it via -O2/3, Debug mode doesn't.
diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c
index 042657232d8e8..8346bb62dcfb4 100644
--- a/compiler-rt/lib/builtins/cpu_model.c
+++ b/compiler-rt/lib/builtins/cpu_model.c
@@ -272,12 +272,17 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
   }
 }
 
-static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
-                                            const unsigned *Features,
-                                            unsigned *Type, unsigned *Subtype) {
+static const char *
+getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+                                const unsigned *Features,
+                                unsigned *Type, unsigned *Subtype) {
 #define testFeature(F)                                                         \
   (Features[F / 32] & (F % 32)) != 0
 
+  // We select CPU strings to match the code in Host.cpp, but we don't use them
+  // in compiler-rt.
+  const char *CPU = 0;
+
   switch (Family) {
   case 6:
     switch (Model) {
@@ -288,13 +293,17 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
                // 0Fh. All processors are manufactured using the 65 nm process.
     case 0x16: // Intel Celeron processor model 16h. All processors are
                // manufactured using the 65 nm process
+      CPU = "core2";
+      *Type = INTEL_CORE2;
+      break;
     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
                // 17h. All processors are manufactured using the 45 nm process.
                //
                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
                // the 45 nm process.
-      *Type = INTEL_CORE2; // "penryn"
+      CPU = "penryn";
+      *Type = INTEL_CORE2;
       break;
     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 45 nm process.
@@ -302,25 +311,29 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
                // As found in a Summer 2010 model iMac.
     case 0x1f:
     case 0x2e:              // Nehalem EX
-      *Type = INTEL_COREI7; // "nehalem"
+      CPU = "nehalem";
+      *Type = INTEL_COREI7;
       *Subtype = INTEL_COREI7_NEHALEM;
       break;
     case 0x25: // Intel Core i7, laptop version.
     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 32 nm process.
     case 0x2f: // Westmere EX
-      *Type = INTEL_COREI7; // "westmere"
+      CPU = "westmere";
+      *Type = INTEL_COREI7;
       *Subtype = INTEL_COREI7_WESTMERE;
       break;
     case 0x2a: // Intel Core i7 processor. All processors are manufactured
                // using the 32 nm process.
     case 0x2d:
-      *Type = INTEL_COREI7; //"sandybridge"
+      CPU = "sandybridge";
+      *Type = INTEL_COREI7;
       *Subtype = INTEL_COREI7_SANDYBRIDGE;
       break;
     case 0x3a:
     case 0x3e:              // Ivy Bridge EP
-      *Type = INTEL_COREI7; // "ivybridge"
+      CPU = "ivybridge";
+      *Type = INTEL_COREI7;
       *Subtype = INTEL_COREI7_IVYBRIDGE;
       break;
 
@@ -329,7 +342,8 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x3f:
     case 0x45:
     case 0x46:
-      *Type = INTEL_COREI7; // "haswell"
+      CPU = "haswell";
+      *Type = INTEL_COREI7;
       *Subtype = INTEL_COREI7_HASWELL;
       break;
 
@@ -338,7 +352,8 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x47:
     case 0x4f:
     case 0x56:
-      *Type = INTEL_COREI7; // "broadwell"
+      CPU = "broadwell";
+      *Type = INTEL_COREI7;
       *Subtype = INTEL_COREI7_BROADWELL;
       break;
 
@@ -349,39 +364,47 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x9e:              // Kaby Lake desktop
     case 0xa5:              // Comet Lake-H/S
     case 0xa6:              // Comet Lake-U
-      *Type = INTEL_COREI7; // "skylake"
+      CPU = "skylake";
+      *Type = INTEL_COREI7;
       *Subtype = INTEL_COREI7_SKYLAKE;
       break;
 
     // Skylake Xeon:
     case 0x55:
       *Type = INTEL_COREI7;
-      if (testFeature(FEATURE_AVX512BF16))
-        *Subtype = INTEL_COREI7_COOPERLAKE; // "cooperlake"
-      else if (testFeature(FEATURE_AVX512VNNI))
-        *Subtype = INTEL_COREI7_CASCADELAKE; // "cascadelake"
-      else
-        *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+      if (testFeature(FEATURE_AVX512BF16)) {
+        CPU = "cooperlake";
+        *Subtype = INTEL_COREI7_COOPERLAKE;
+      } else if (testFeature(FEATURE_AVX512VNNI)) {
+        CPU = "cascadelake";
+        *Subtype = INTEL_COREI7_CASCADELAKE;
+      } else {
+        CPU = "skylake-avx512";
+        *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
+      }
       break;
 
     // Cannonlake:
     case 0x66:
+      CPU = "cannonlake";
       *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake"
+      *Subtype = INTEL_COREI7_CANNONLAKE;
       break;
 
     // Icelake:
     case 0x7d:
     case 0x7e:
+      CPU = "icelake-client";
       *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client"
+      *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
       break;
 
     // Icelake Xeon:
     case 0x6a:
     case 0x6c:
+      CPU = "icelake-server";
       *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_ICELAKE_SERVER; // "icelake-server"
+      *Subtype = INTEL_COREI7_ICELAKE_SERVER;
       break;
 
     case 0x1c: // Most 45 nm Intel Atom processors
@@ -389,8 +412,9 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x27: // 32 nm Atom Medfield
     case 0x35: // 32 nm Atom Midview
     case 0x36: // 32 nm Atom Midview
+      CPU = "bonnell";
       *Type = INTEL_BONNELL;
-      break; // "bonnell"
+      break;
 
     // Atom Silvermont codes from the Intel software optimization guide.
     case 0x37:
@@ -399,26 +423,32 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x5a:
     case 0x5d:
     case 0x4c: // really airmont
+      CPU = "silvermont";
       *Type = INTEL_SILVERMONT;
-      break; // "silvermont"
+      break;
     // Goldmont:
     case 0x5c: // Apollo Lake
     case 0x5f: // Denverton
+      CPU = "goldmont";
       *Type = INTEL_GOLDMONT;
       break; // "goldmont"
     case 0x7a:
+      CPU = "goldmont-plus";
       *Type = INTEL_GOLDMONT_PLUS;
       break;
     case 0x86:
+      CPU = "tremont";
       *Type = INTEL_TREMONT;
       break;
 
     case 0x57:
-      *Type = INTEL_KNL; // knl
+      CPU = "knl";
+      *Type = INTEL_KNL;
       break;
 
     case 0x85:
-      *Type = INTEL_KNM; // knm
+      CPU = "knm";
+      *Type = INTEL_KNM;
       break;
 
     default: // Unknown family 6 CPU.
@@ -428,17 +458,22 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
   default:
     break; // Unknown.
   }
+
+  return CPU;
 }
 
-static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
-                                          const unsigned *Features,
-                                          unsigned *Type, unsigned *Subtype) {
-  // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
-  // appears to be no way to generate the wide variety of AMD-specific targets
-  // from the information returned from CPUID.
+static const char *
+getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+                              const unsigned *Features,
+                              unsigned *Type, unsigned *Subtype) {
+  // We select CPU strings to match the code in Host.cpp, but we don't use them
+  // in compiler-rt.
+  const char *CPU = 0;
+
   switch (Family) {
   case 16:
-    *Type = AMDFAM10H; // "amdfam10"
+    CPU = "amdfam10";
+    *Type = AMDFAM10H;
     switch (Model) {
     case 2:
       *Subtype = AMDFAM10H_BARCELONA;
@@ -452,44 +487,54 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     }
     break;
   case 20:
+    CPU = "btver1";
     *Type = AMD_BTVER1;
-    break; // "btver1";
+    break;
   case 21:
+    CPU = "bdver1";
     *Type = AMDFAM15H;
     if (Model >= 0x60 && Model <= 0x7f) {
+      CPU = "bdver4";
       *Subtype = AMDFAM15H_BDVER4;
-      break; // "bdver4"; 60h-7Fh: Excavator
+      break; // 60h-7Fh: Excavator
     }
     if (Model >= 0x30 && Model <= 0x3f) {
+      CPU = "bdver3";
       *Subtype = AMDFAM15H_BDVER3;
-      break; // "bdver3"; 30h-3Fh: Steamroller
+      break; // 30h-3Fh: Steamroller
     }
     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
+      CPU = "bdver2";
       *Subtype = AMDFAM15H_BDVER2;
-      break; // "bdver2"; 02h, 10h-1Fh: Piledriver
+      break; // 02h, 10h-1Fh: Piledriver
     }
     if (Model <= 0x0f) {
       *Subtype = AMDFAM15H_BDVER1;
-      break; // "bdver1"; 00h-0Fh: Bulldozer
+      break; // 00h-0Fh: Bulldozer
     }
     break;
   case 22:
+    CPU = "btver2";
     *Type = AMD_BTVER2;
-    break; // "btver2"
+    break;
   case 23:
+    CPU = "znver1";
     *Type = AMDFAM17H;
     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
+      CPU = "znver2";
       *Subtype = AMDFAM17H_ZNVER2;
-      break; // "znver2"; 30h-3fh, 71h: Zen2
+      break; // 30h-3fh, 71h: Zen2
     }
     if (Model <= 0x0f) {
       *Subtype = AMDFAM17H_ZNVER1;
-      break; // "znver1"; 00h-0Fh: Zen1
+      break; // 00h-0Fh: Zen1
     }
     break;
   default:
-    break; // "generic"
+    break; // Unknown AMD CPU.
   }
+
+  return CPU;
 }
 
 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
diff --git a/compiler-rt/lib/builtins/int_types.h b/compiler-rt/lib/builtins/int_types.h
index a575c07456561..705355a4840d4 100644
--- a/compiler-rt/lib/builtins/int_types.h
+++ b/compiler-rt/lib/builtins/int_types.h
@@ -144,9 +144,12 @@ typedef struct {
 // Check if the target supports 80 bit extended precision long doubles.
 // Notably, on x86 Windows, MSVC only provides a 64-bit long double, but GCC
 // still makes it 80 bits. Clang will match whatever compiler it is trying to
-// be compatible with.
-#if ((defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER)) ||      \
-    defined(__m68k__) || defined(__ia64__)
+// be compatible with. On 32-bit x86 Android, long double is 64 bits, while on
+// x86_64 Android, long double is 128 bits.
+#if (defined(__i386__) || defined(__x86_64__)) &&                              \
+    !(defined(_MSC_VER) || defined(__ANDROID__))
+#define HAS_80_BIT_LONG_DOUBLE 1
+#elif defined(__m68k__) || defined(__ia64__)
 #define HAS_80_BIT_LONG_DOUBLE 1
 #else
 #define HAS_80_BIT_LONG_DOUBLE 0
diff --git a/compiler-rt/lib/fuzzer/FuzzerBuiltins.h b/compiler-rt/lib/fuzzer/FuzzerBuiltins.h
index 5f1ccef8a9cad..4c0ada82662dd 100644
--- a/compiler-rt/lib/fuzzer/FuzzerBuiltins.h
+++ b/compiler-rt/lib/fuzzer/FuzzerBuiltins.h
@@ -11,7 +11,7 @@
 #ifndef LLVM_FUZZER_BUILTINS_H
 #define LLVM_FUZZER_BUILTINS_H
 
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 
 #if !LIBFUZZER_MSVC
 #include 
diff --git a/compiler-rt/lib/fuzzer/FuzzerBuiltinsMsvc.h b/compiler-rt/lib/fuzzer/FuzzerBuiltinsMsvc.h
index bc65c60098be9..c5bec9787d8ed 100644
--- a/compiler-rt/lib/fuzzer/FuzzerBuiltinsMsvc.h
+++ b/compiler-rt/lib/fuzzer/FuzzerBuiltinsMsvc.h
@@ -12,7 +12,7 @@
 #ifndef LLVM_FUZZER_BUILTINS_MSVC_H
 #define LLVM_FUZZER_BUILTINS_MSVC_H
 
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 
 #if LIBFUZZER_MSVC
 #include 
diff --git a/compiler-rt/lib/fuzzer/FuzzerDefs.h b/compiler-rt/lib/fuzzer/FuzzerDefs.h
index 5793e86aa804b..1a2752af2f4d5 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDefs.h
+++ b/compiler-rt/lib/fuzzer/FuzzerDefs.h
@@ -21,156 +21,6 @@
 #include 
 
 
-// Platform detection.
-#ifdef __linux__
-#define LIBFUZZER_APPLE 0
-#define LIBFUZZER_FUCHSIA 0
-#define LIBFUZZER_LINUX 1
-#define LIBFUZZER_NETBSD 0
-#define LIBFUZZER_FREEBSD 0
-#define LIBFUZZER_OPENBSD 0
-#define LIBFUZZER_WINDOWS 0
-#define LIBFUZZER_EMSCRIPTEN 0
-#elif __APPLE__
-#define LIBFUZZER_APPLE 1
-#define LIBFUZZER_FUCHSIA 0
-#define LIBFUZZER_LINUX 0
-#define LIBFUZZER_NETBSD 0
-#define LIBFUZZER_FREEBSD 0
-#define LIBFUZZER_OPENBSD 0
-#define LIBFUZZER_WINDOWS 0
-#define LIBFUZZER_EMSCRIPTEN 0
-#elif __NetBSD__
-#define LIBFUZZER_APPLE 0
-#define LIBFUZZER_FUCHSIA 0
-#define LIBFUZZER_LINUX 0
-#define LIBFUZZER_NETBSD 1
-#define LIBFUZZER_FREEBSD 0
-#define LIBFUZZER_OPENBSD 0
-#define LIBFUZZER_WINDOWS 0
-#define LIBFUZZER_EMSCRIPTEN 0
-#elif __FreeBSD__
-#define LIBFUZZER_APPLE 0
-#define LIBFUZZER_FUCHSIA 0
-#define LIBFUZZER_LINUX 0
-#define LIBFUZZER_NETBSD 0
-#define LIBFUZZER_FREEBSD 1
-#define LIBFUZZER_OPENBSD 0
-#define LIBFUZZER_WINDOWS 0
-#define LIBFUZZER_EMSCRIPTEN 0
-#elif __OpenBSD__
-#define LIBFUZZER_APPLE 0
-#define LIBFUZZER_FUCHSIA 0
-#define LIBFUZZER_LINUX 0
-#define LIBFUZZER_NETBSD 0
-#define LIBFUZZER_FREEBSD 0
-#define LIBFUZZER_OPENBSD 1
-#define LIBFUZZER_WINDOWS 0
-#define LIBFUZZER_EMSCRIPTEN 0
-#elif _WIN32
-#define LIBFUZZER_APPLE 0
-#define LIBFUZZER_FUCHSIA 0
-#define LIBFUZZER_LINUX 0
-#define LIBFUZZER_NETBSD 0
-#define LIBFUZZER_FREEBSD 0
-#define LIBFUZZER_OPENBSD 0
-#define LIBFUZZER_WINDOWS 1
-#define LIBFUZZER_EMSCRIPTEN 0
-#elif __Fuchsia__
-#define LIBFUZZER_APPLE 0
-#define LIBFUZZER_FUCHSIA 1
-#define LIBFUZZER_LINUX 0
-#define LIBFUZZER_NETBSD 0
-#define LIBFUZZER_FREEBSD 0
-#define LIBFUZZER_OPENBSD 0
-#define LIBFUZZER_WINDOWS 0
-#define LIBFUZZER_EMSCRIPTEN 0
-#elif __EMSCRIPTEN__
-#define LIBFUZZER_APPLE 0
-#define LIBFUZZER_FUCHSIA 0
-#define LIBFUZZER_LINUX 0
-#define LIBFUZZER_NETBSD 0
-#define LIBFUZZER_FREEBSD 0
-#define LIBFUZZER_OPENBSD 0
-#define LIBFUZZER_WINDOWS 0
-#define LIBFUZZER_EMSCRIPTEN 1
-#else
-#error "Support for your platform has not been implemented"
-#endif
-
-#if defined(_MSC_VER) && !defined(__clang__)
-// MSVC compiler is being used.
-#define LIBFUZZER_MSVC 1
-#else
-#define LIBFUZZER_MSVC 0
-#endif
-
-#ifndef __has_attribute
-#  define __has_attribute(x) 0
-#endif
-
-#define LIBFUZZER_POSIX                                                        \
-  (LIBFUZZER_APPLE || LIBFUZZER_LINUX || LIBFUZZER_NETBSD ||                   \
-   LIBFUZZER_FREEBSD || LIBFUZZER_OPENBSD || LIBFUZZER_EMSCRIPTEN)
-
-#ifdef __x86_64
-#  if __has_attribute(target)
-#    define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt")))
-#  else
-#    define ATTRIBUTE_TARGET_POPCNT
-#  endif
-#else
-#  define ATTRIBUTE_TARGET_POPCNT
-#endif
-
-
-#ifdef __clang__  // avoid gcc warning.
-#  if __has_attribute(no_sanitize)
-#    define ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory")))
-#  else
-#    define ATTRIBUTE_NO_SANITIZE_MEMORY
-#  endif
-#  define ALWAYS_INLINE __attribute__((always_inline))
-#else
-#  define ATTRIBUTE_NO_SANITIZE_MEMORY
-#  define ALWAYS_INLINE
-#endif // __clang__
-
-#if LIBFUZZER_WINDOWS
-#define ATTRIBUTE_NO_SANITIZE_ADDRESS
-#else
-#define ATTRIBUTE_NO_SANITIZE_ADDRESS __attribute__((no_sanitize_address))
-#endif
-
-#if LIBFUZZER_WINDOWS
-#define ATTRIBUTE_ALIGNED(X) __declspec(align(X))
-#define ATTRIBUTE_INTERFACE __declspec(dllexport)
-// This is used for __sancov_lowest_stack which is needed for
-// -fsanitize-coverage=stack-depth. That feature is not yet available on
-// Windows, so make the symbol static to avoid linking errors.
-#define ATTRIBUTES_INTERFACE_TLS_INITIAL_EXEC static
-#define ATTRIBUTE_NOINLINE __declspec(noinline)
-#else
-#define ATTRIBUTE_ALIGNED(X) __attribute__((aligned(X)))
-#define ATTRIBUTE_INTERFACE __attribute__((visibility("default")))
-#define ATTRIBUTES_INTERFACE_TLS_INITIAL_EXEC \
-  ATTRIBUTE_INTERFACE __attribute__((tls_model("initial-exec"))) thread_local
-
-#define ATTRIBUTE_NOINLINE __attribute__((noinline))
-#endif
-
-#if defined(__has_feature)
-#  if __has_feature(address_sanitizer)
-#    define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_ADDRESS
-#  elif __has_feature(memory_sanitizer)
-#    define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_MEMORY
-#  else
-#    define ATTRIBUTE_NO_SANITIZE_ALL
-#  endif
-#else
-#  define ATTRIBUTE_NO_SANITIZE_ALL
-#endif
-
 namespace fuzzer {
 
 template  T Min(T a, T b) { return a < b ? a : b; }
diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
index 1a0b2580c5b71..00a33a413d2f3 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
@@ -16,6 +16,7 @@
 #include "FuzzerInternal.h"
 #include "FuzzerMerge.h"
 #include "FuzzerMutate.h"
+#include "FuzzerPlatform.h"
 #include "FuzzerRandom.h"
 #include "FuzzerTracePC.h"
 #include 
@@ -32,7 +33,11 @@
 // binary can test for its existence.
 #if LIBFUZZER_MSVC
 extern "C" void __libfuzzer_is_present() {}
+#if defined(_M_IX86) || defined(__i386__)
+#pragma comment(linker, "/include:___libfuzzer_is_present")
+#else
 #pragma comment(linker, "/include:__libfuzzer_is_present")
+#endif
 #else
 extern "C" __attribute__((used)) void __libfuzzer_is_present() {}
 #endif  // LIBFUZZER_MSVC
diff --git a/compiler-rt/lib/fuzzer/FuzzerExtFunctionsDlsym.cpp b/compiler-rt/lib/fuzzer/FuzzerExtFunctionsDlsym.cpp
index dcd7134594870..95233d2a10d37 100644
--- a/compiler-rt/lib/fuzzer/FuzzerExtFunctionsDlsym.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerExtFunctionsDlsym.cpp
@@ -10,7 +10,7 @@
 // requires that clients of LibFuzzer pass ``--export-dynamic`` to the linker.
 // That is a complication we don't wish to expose to clients right now.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_APPLE
 
 #include "FuzzerExtFunctions.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWeak.cpp b/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWeak.cpp
index d56dab36c646c..24ddc57d47d6b 100644
--- a/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWeak.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWeak.cpp
@@ -11,7 +11,7 @@
 // weak symbols to be undefined. That is a complication we don't want to expose
 // to clients right now.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_LINUX || LIBFUZZER_NETBSD || LIBFUZZER_FUCHSIA ||                \
     LIBFUZZER_FREEBSD || LIBFUZZER_OPENBSD || LIBFUZZER_EMSCRIPTEN
 
diff --git a/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWindows.cpp
index 55efe8f80e908..688bad1d51ca5 100644
--- a/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWindows.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWindows.cpp
@@ -9,7 +9,7 @@
 // compiled with MSVC. Uses weak aliases when compiled with clang. Unfortunately
 // the method each compiler supports is not supported by the other.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_WINDOWS
 
 #include "FuzzerExtFunctions.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerExtraCounters.cpp b/compiler-rt/lib/fuzzer/FuzzerExtraCounters.cpp
index b2face778203e..d36beba1b1ba9 100644
--- a/compiler-rt/lib/fuzzer/FuzzerExtraCounters.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerExtraCounters.cpp
@@ -8,7 +8,8 @@
 // Extra coverage counters defined by user code.
 //===----------------------------------------------------------------------===//
 
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
+#include 
 
 #if LIBFUZZER_LINUX || LIBFUZZER_NETBSD || LIBFUZZER_FREEBSD ||                \
     LIBFUZZER_OPENBSD || LIBFUZZER_FUCHSIA || LIBFUZZER_EMSCRIPTEN
diff --git a/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp b/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp
index fcd9b8d8b9c7e..aac85b08727ab 100644
--- a/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // IO functions implementation using Posix API.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_POSIX || LIBFUZZER_FUCHSIA
 
 #include "FuzzerExtFunctions.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerIOWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerIOWindows.cpp
index 56757aa09a370..651283a551cf0 100644
--- a/compiler-rt/lib/fuzzer/FuzzerIOWindows.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerIOWindows.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // IO functions implementation for Windows.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_WINDOWS
 
 #include "FuzzerExtFunctions.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
index 7c3288fc57502..02db6d27b0a3e 100644
--- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
@@ -12,6 +12,7 @@
 #include "FuzzerIO.h"
 #include "FuzzerInternal.h"
 #include "FuzzerMutate.h"
+#include "FuzzerPlatform.h"
 #include "FuzzerRandom.h"
 #include "FuzzerTracePC.h"
 #include 
diff --git a/compiler-rt/lib/fuzzer/FuzzerMain.cpp b/compiler-rt/lib/fuzzer/FuzzerMain.cpp
index 771a34aed3167..75f2f8e75c9bd 100644
--- a/compiler-rt/lib/fuzzer/FuzzerMain.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerMain.cpp
@@ -9,6 +9,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 
 extern "C" {
 // This function should be defined by the user.
diff --git a/compiler-rt/lib/fuzzer/FuzzerPlatform.h b/compiler-rt/lib/fuzzer/FuzzerPlatform.h
new file mode 100644
index 0000000000000..8befdb882cc6e
--- /dev/null
+++ b/compiler-rt/lib/fuzzer/FuzzerPlatform.h
@@ -0,0 +1,163 @@
+//===-- FuzzerPlatform.h --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Common platform macros.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_PLATFORM_H
+#define LLVM_FUZZER_PLATFORM_H
+
+// Platform detection.
+#ifdef __linux__
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_FUCHSIA 0
+#define LIBFUZZER_LINUX 1
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 0
+#define LIBFUZZER_WINDOWS 0
+#define LIBFUZZER_EMSCRIPTEN 0
+#elif __APPLE__
+#define LIBFUZZER_APPLE 1
+#define LIBFUZZER_FUCHSIA 0
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 0
+#define LIBFUZZER_WINDOWS 0
+#define LIBFUZZER_EMSCRIPTEN 0
+#elif __NetBSD__
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_FUCHSIA 0
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 1
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 0
+#define LIBFUZZER_WINDOWS 0
+#define LIBFUZZER_EMSCRIPTEN 0
+#elif __FreeBSD__
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_FUCHSIA 0
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 1
+#define LIBFUZZER_OPENBSD 0
+#define LIBFUZZER_WINDOWS 0
+#define LIBFUZZER_EMSCRIPTEN 0
+#elif __OpenBSD__
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_FUCHSIA 0
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 1
+#define LIBFUZZER_WINDOWS 0
+#define LIBFUZZER_EMSCRIPTEN 0
+#elif _WIN32
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_FUCHSIA 0
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 0
+#define LIBFUZZER_WINDOWS 1
+#define LIBFUZZER_EMSCRIPTEN 0
+#elif __Fuchsia__
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_FUCHSIA 1
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 0
+#define LIBFUZZER_WINDOWS 0
+#define LIBFUZZER_EMSCRIPTEN 0
+#elif __EMSCRIPTEN__
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_FUCHSIA 0
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 0
+#define LIBFUZZER_WINDOWS 0
+#define LIBFUZZER_EMSCRIPTEN 1
+#else
+#error "Support for your platform has not been implemented"
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+// MSVC compiler is being used.
+#define LIBFUZZER_MSVC 1
+#else
+#define LIBFUZZER_MSVC 0
+#endif
+
+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+
+#define LIBFUZZER_POSIX                                                        \
+  (LIBFUZZER_APPLE || LIBFUZZER_LINUX || LIBFUZZER_NETBSD ||                   \
+   LIBFUZZER_FREEBSD || LIBFUZZER_OPENBSD || LIBFUZZER_EMSCRIPTEN)
+
+#ifdef __x86_64
+#if __has_attribute(target)
+#define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt")))
+#else
+#define ATTRIBUTE_TARGET_POPCNT
+#endif
+#else
+#define ATTRIBUTE_TARGET_POPCNT
+#endif
+
+#ifdef __clang__ // avoid gcc warning.
+#if __has_attribute(no_sanitize)
+#define ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory")))
+#else
+#define ATTRIBUTE_NO_SANITIZE_MEMORY
+#endif
+#define ALWAYS_INLINE __attribute__((always_inline))
+#else
+#define ATTRIBUTE_NO_SANITIZE_MEMORY
+#define ALWAYS_INLINE
+#endif // __clang__
+
+#if LIBFUZZER_WINDOWS
+#define ATTRIBUTE_NO_SANITIZE_ADDRESS
+#else
+#define ATTRIBUTE_NO_SANITIZE_ADDRESS __attribute__((no_sanitize_address))
+#endif
+
+#if LIBFUZZER_WINDOWS
+#define ATTRIBUTE_ALIGNED(X) __declspec(align(X))
+#define ATTRIBUTE_INTERFACE __declspec(dllexport)
+// This is used for __sancov_lowest_stack which is needed for
+// -fsanitize-coverage=stack-depth. That feature is not yet available on
+// Windows, so make the symbol static to avoid linking errors.
+#define ATTRIBUTES_INTERFACE_TLS_INITIAL_EXEC static
+#define ATTRIBUTE_NOINLINE __declspec(noinline)
+#else
+#define ATTRIBUTE_ALIGNED(X) __attribute__((aligned(X)))
+#define ATTRIBUTE_INTERFACE __attribute__((visibility("default")))
+#define ATTRIBUTES_INTERFACE_TLS_INITIAL_EXEC                                  \
+  ATTRIBUTE_INTERFACE __attribute__((tls_model("initial-exec"))) thread_local
+
+#define ATTRIBUTE_NOINLINE __attribute__((noinline))
+#endif
+
+#if defined(__has_feature)
+#if __has_feature(address_sanitizer)
+#define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_ADDRESS
+#elif __has_feature(memory_sanitizer)
+#define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_MEMORY
+#else
+#define ATTRIBUTE_NO_SANITIZE_ALL
+#endif
+#else
+#define ATTRIBUTE_NO_SANITIZE_ALL
+#endif
+
+#endif // LLVM_FUZZER_PLATFORM_H
diff --git a/compiler-rt/lib/fuzzer/FuzzerSHA1.cpp b/compiler-rt/lib/fuzzer/FuzzerSHA1.cpp
index 43e5e78cd7877..2005dc7003053 100644
--- a/compiler-rt/lib/fuzzer/FuzzerSHA1.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerSHA1.cpp
@@ -17,6 +17,7 @@
 
 #include "FuzzerSHA1.h"
 #include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 
 /* This code is public-domain - it is based on libcrypt
  * placed in the public domain by Wei Dai and other contributors.
diff --git a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp
index 86649f9e095cb..b2ca7693e540e 100644
--- a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp
@@ -19,6 +19,7 @@
 #include "FuzzerDictionary.h"
 #include "FuzzerExtFunctions.h"
 #include "FuzzerIO.h"
+#include "FuzzerPlatform.h"
 #include "FuzzerUtil.h"
 #include "FuzzerValueBitMap.h"
 #include 
diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilDarwin.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilDarwin.cpp
index d449bc248f095..a5bed658a446c 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtilDarwin.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtilDarwin.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // Misc utils for Darwin.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_APPLE
 #include "FuzzerCommand.h"
 #include "FuzzerIO.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp
index 12239c6e1b3e3..190fb7866649e 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // Misc utils implementation using Fuchsia/Zircon APIs.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 
 #if LIBFUZZER_FUCHSIA
 
diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilLinux.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilLinux.cpp
index 993023e703931..95490b992e0bc 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtilLinux.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtilLinux.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // Misc utils for Linux.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_LINUX || LIBFUZZER_NETBSD || LIBFUZZER_FREEBSD ||                \
     LIBFUZZER_OPENBSD || LIBFUZZER_EMSCRIPTEN
 #include "FuzzerCommand.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp
index 48073cfda3749..fc57b724db108 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // Misc utils implementation using Posix API.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_POSIX
 #include "FuzzerIO.h"
 #include "FuzzerInternal.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
index b86306afddb6d..6c693e3d7eea3 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // Misc utils implementation for Windows.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_WINDOWS
 #include "FuzzerCommand.h"
 #include "FuzzerIO.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerValueBitMap.h b/compiler-rt/lib/fuzzer/FuzzerValueBitMap.h
index bc039f1df27f3..ddbfe200af9c6 100644
--- a/compiler-rt/lib/fuzzer/FuzzerValueBitMap.h
+++ b/compiler-rt/lib/fuzzer/FuzzerValueBitMap.h
@@ -11,7 +11,8 @@
 #ifndef LLVM_FUZZER_VALUE_BIT_MAP_H
 #define LLVM_FUZZER_VALUE_BIT_MAP_H
 
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
+#include 
 
 namespace fuzzer {
 
diff --git a/compiler-rt/lib/gwp_asan/optional/backtrace_linux_libc.cpp b/compiler-rt/lib/gwp_asan/optional/backtrace_linux_libc.cpp
index bb0aad224a14c..92eb293dab499 100644
--- a/compiler-rt/lib/gwp_asan/optional/backtrace_linux_libc.cpp
+++ b/compiler-rt/lib/gwp_asan/optional/backtrace_linux_libc.cpp
@@ -23,6 +23,14 @@ size_t Backtrace(uintptr_t *TraceBuffer, size_t Size) {
   return backtrace(reinterpret_cast(TraceBuffer), Size);
 }
 
+// We don't need any custom handling for the Segv backtrace - the libc unwinder
+// has no problems with unwinding through a signal handler. Force inlining here
+// to avoid the additional frame.
+GWP_ASAN_ALWAYS_INLINE size_t SegvBacktrace(uintptr_t *TraceBuffer, size_t Size,
+                                            void * /*Context*/) {
+  return Backtrace(TraceBuffer, Size);
+}
+
 static void PrintBacktrace(uintptr_t *Trace, size_t TraceLength,
                            gwp_asan::crash_handler::Printf_t Printf) {
   if (TraceLength == 0) {
@@ -53,4 +61,8 @@ crash_handler::PrintBacktrace_t getPrintBacktraceFunction() {
   return PrintBacktrace;
 }
 } // namespace options
+
+namespace crash_handler {
+SegvBacktrace_t getSegvBacktraceFunction() { return SegvBacktrace; }
+} // namespace crash_handler
 } // namespace gwp_asan
diff --git a/compiler-rt/lib/gwp_asan/optional/backtrace_sanitizer_common.cpp b/compiler-rt/lib/gwp_asan/optional/backtrace_sanitizer_common.cpp
index 3ac4b52bfc271..a8083e4e64cb3 100644
--- a/compiler-rt/lib/gwp_asan/optional/backtrace_sanitizer_common.cpp
+++ b/compiler-rt/lib/gwp_asan/optional/backtrace_sanitizer_common.cpp
@@ -22,28 +22,45 @@ void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp,
                                                  void *context,
                                                  bool request_fast,
                                                  u32 max_depth) {
-  if (!StackTrace::WillUseFastUnwind(request_fast)) {
-    return Unwind(max_depth, pc, bp, context, 0, 0, request_fast);
-  }
-  Unwind(max_depth, pc, 0, context, 0, 0, false);
+  if (!StackTrace::WillUseFastUnwind(request_fast))
+    return Unwind(max_depth, pc, 0, context, 0, 0, false);
+
+  uptr top = 0;
+  uptr bottom = 0;
+  GetThreadStackTopAndBottom(/*at_initialization*/ false, &top, &bottom);
+
+  return Unwind(max_depth, pc, bp, context, top, bottom, request_fast);
 }
 
 namespace {
-size_t Backtrace(uintptr_t *TraceBuffer, size_t Size) {
+size_t BacktraceCommon(uintptr_t *TraceBuffer, size_t Size, void *Context) {
+  // Use the slow sanitizer unwinder in the segv handler. Fast frame pointer
+  // unwinders can end up dropping frames because the kernel sigreturn() frame's
+  // return address is the return address at time of fault. This has the result
+  // of never actually capturing the PC where the signal was raised.
+  bool UseFastUnwind = (Context == nullptr);
+
   __sanitizer::BufferedStackTrace Trace;
   Trace.Reset();
   if (Size > __sanitizer::kStackTraceMax)
     Size = __sanitizer::kStackTraceMax;
 
   Trace.Unwind((__sanitizer::uptr)__builtin_return_address(0),
-               (__sanitizer::uptr)__builtin_frame_address(0),
-               /* ucontext */ nullptr,
-               /* fast unwind */ true, Size - 1);
+               (__sanitizer::uptr)__builtin_frame_address(0), Context,
+               UseFastUnwind, Size - 1);
 
   memcpy(TraceBuffer, Trace.trace, Trace.size * sizeof(uintptr_t));
   return Trace.size;
 }
 
+size_t Backtrace(uintptr_t *TraceBuffer, size_t Size) {
+  return BacktraceCommon(TraceBuffer, Size, nullptr);
+}
+
+size_t SegvBacktrace(uintptr_t *TraceBuffer, size_t Size, void *Context) {
+  return BacktraceCommon(TraceBuffer, Size, Context);
+}
+
 static void PrintBacktrace(uintptr_t *Trace, size_t TraceLength,
                            gwp_asan::crash_handler::Printf_t Printf) {
   __sanitizer::StackTrace StackTrace;
@@ -77,4 +94,8 @@ crash_handler::PrintBacktrace_t getPrintBacktraceFunction() {
   return PrintBacktrace;
 }
 } // namespace options
+
+namespace crash_handler {
+SegvBacktrace_t getSegvBacktraceFunction() { return SegvBacktrace; }
+} // namespace crash_handler
 } // namespace gwp_asan
diff --git a/compiler-rt/lib/gwp_asan/optional/segv_handler.h b/compiler-rt/lib/gwp_asan/optional/segv_handler.h
index 10af15055e2a8..0fed4f2e012e9 100644
--- a/compiler-rt/lib/gwp_asan/optional/segv_handler.h
+++ b/compiler-rt/lib/gwp_asan/optional/segv_handler.h
@@ -59,6 +59,15 @@ typedef void (*PrintBacktrace_t)(uintptr_t *TraceBuffer, size_t TraceLength,
 // without any symbolization.
 PrintBacktrace_t getBasicPrintBacktraceFunction();
 
+// Returns a function pointer to a backtrace function that's suitable for
+// unwinding through a signal handler. This is important primarily for frame-
+// pointer based unwinders, DWARF or other unwinders can simply provide the
+// normal backtrace function as the implementation here. On POSIX, SignalContext
+// should be the `ucontext_t` from the signal handler.
+typedef size_t (*SegvBacktrace_t)(uintptr_t *TraceBuffer, size_t Size,
+                                  void *SignalContext);
+SegvBacktrace_t getSegvBacktraceFunction();
+
 // Install the SIGSEGV crash handler for printing use-after-free and heap-
 // buffer-{under|over}flow exceptions if the user asked for it. This is platform
 // specific as even though POSIX and Windows both support registering handlers
@@ -67,14 +76,14 @@ PrintBacktrace_t getBasicPrintBacktraceFunction();
 // before this function.
 void installSignalHandlers(gwp_asan::GuardedPoolAllocator *GPA, Printf_t Printf,
                            PrintBacktrace_t PrintBacktrace,
-                           options::Backtrace_t Backtrace);
+                           SegvBacktrace_t SegvBacktrace);
 
 void uninstallSignalHandlers();
 
 void dumpReport(uintptr_t ErrorPtr, const gwp_asan::AllocatorState *State,
                 const gwp_asan::AllocationMetadata *Metadata,
-                options::Backtrace_t Backtrace, Printf_t Printf,
-                PrintBacktrace_t PrintBacktrace);
+                SegvBacktrace_t SegvBacktrace, Printf_t Printf,
+                PrintBacktrace_t PrintBacktrace, void *Context);
 } // namespace crash_handler
 } // namespace gwp_asan
 
diff --git a/compiler-rt/lib/gwp_asan/optional/segv_handler_posix.cpp b/compiler-rt/lib/gwp_asan/optional/segv_handler_posix.cpp
index 22589b893604f..1bd7a606c2136 100644
--- a/compiler-rt/lib/gwp_asan/optional/segv_handler_posix.cpp
+++ b/compiler-rt/lib/gwp_asan/optional/segv_handler_posix.cpp
@@ -23,14 +23,14 @@ using gwp_asan::Error;
 using gwp_asan::GuardedPoolAllocator;
 using gwp_asan::crash_handler::PrintBacktrace_t;
 using gwp_asan::crash_handler::Printf_t;
-using gwp_asan::options::Backtrace_t;
+using gwp_asan::crash_handler::SegvBacktrace_t;
 
 struct sigaction PreviousHandler;
 bool SignalHandlerInstalled;
 gwp_asan::GuardedPoolAllocator *GPAForSignalHandler;
 Printf_t PrintfForSignalHandler;
 PrintBacktrace_t PrintBacktraceForSignalHandler;
-Backtrace_t BacktraceForSignalHandler;
+SegvBacktrace_t BacktraceForSignalHandler;
 
 static void sigSegvHandler(int sig, siginfo_t *info, void *ucontext) {
   if (GPAForSignalHandler) {
@@ -40,7 +40,7 @@ static void sigSegvHandler(int sig, siginfo_t *info, void *ucontext) {
         reinterpret_cast(info->si_addr),
         GPAForSignalHandler->getAllocatorState(),
         GPAForSignalHandler->getMetadataRegion(), BacktraceForSignalHandler,
-        PrintfForSignalHandler, PrintBacktraceForSignalHandler);
+        PrintfForSignalHandler, PrintBacktraceForSignalHandler, ucontext);
   }
 
   // Process any previous handlers.
@@ -138,11 +138,11 @@ PrintBacktrace_t getBasicPrintBacktraceFunction() {
 
 void installSignalHandlers(gwp_asan::GuardedPoolAllocator *GPA, Printf_t Printf,
                            PrintBacktrace_t PrintBacktrace,
-                           options::Backtrace_t Backtrace) {
+                           SegvBacktrace_t SegvBacktrace) {
   GPAForSignalHandler = GPA;
   PrintfForSignalHandler = Printf;
   PrintBacktraceForSignalHandler = PrintBacktrace;
-  BacktraceForSignalHandler = Backtrace;
+  BacktraceForSignalHandler = SegvBacktrace;
 
   struct sigaction Action;
   Action.sa_sigaction = sigSegvHandler;
@@ -160,8 +160,8 @@ void uninstallSignalHandlers() {
 
 void dumpReport(uintptr_t ErrorPtr, const gwp_asan::AllocatorState *State,
                 const gwp_asan::AllocationMetadata *Metadata,
-                options::Backtrace_t Backtrace, Printf_t Printf,
-                PrintBacktrace_t PrintBacktrace) {
+                SegvBacktrace_t SegvBacktrace, Printf_t Printf,
+                PrintBacktrace_t PrintBacktrace, void *Context) {
   assert(State && "dumpReport missing Allocator State.");
   assert(Metadata && "dumpReport missing Metadata.");
   assert(Printf && "dumpReport missing Printf.");
@@ -194,7 +194,8 @@ void dumpReport(uintptr_t ErrorPtr, const gwp_asan::AllocatorState *State,
   // Print the fault backtrace.
   static constexpr unsigned kMaximumStackFramesForCrashTrace = 512;
   uintptr_t Trace[kMaximumStackFramesForCrashTrace];
-  size_t TraceLength = Backtrace(Trace, kMaximumStackFramesForCrashTrace);
+  size_t TraceLength =
+      SegvBacktrace(Trace, kMaximumStackFramesForCrashTrace, Context);
 
   PrintBacktrace(Trace, TraceLength, Printf);
 
diff --git a/compiler-rt/lib/gwp_asan/tests/CMakeLists.txt b/compiler-rt/lib/gwp_asan/tests/CMakeLists.txt
index feac23df9fe5d..f88d90c19d5b9 100644
--- a/compiler-rt/lib/gwp_asan/tests/CMakeLists.txt
+++ b/compiler-rt/lib/gwp_asan/tests/CMakeLists.txt
@@ -5,7 +5,8 @@ set(GWP_ASAN_UNITTEST_CFLAGS
   ${COMPILER_RT_GTEST_CFLAGS}
   -I${COMPILER_RT_SOURCE_DIR}/lib/
   -O2
-  -g)
+  -g
+  -fno-omit-frame-pointer)
 
 file(GLOB GWP_ASAN_HEADERS ../*.h)
 set(GWP_ASAN_UNITTESTS
diff --git a/compiler-rt/lib/gwp_asan/tests/harness.h b/compiler-rt/lib/gwp_asan/tests/harness.h
index e47254e13c467..d303b2cfa6470 100644
--- a/compiler-rt/lib/gwp_asan/tests/harness.h
+++ b/compiler-rt/lib/gwp_asan/tests/harness.h
@@ -86,7 +86,8 @@ class BacktraceGuardedPoolAllocator : public ::testing::Test {
 
     gwp_asan::crash_handler::installSignalHandlers(
         &GPA, gwp_asan::test::getPrintfFunction(),
-        gwp_asan::options::getPrintBacktraceFunction(), Opts.Backtrace);
+        gwp_asan::options::getPrintBacktraceFunction(),
+        gwp_asan::crash_handler::getSegvBacktraceFunction());
   }
 
   void TearDown() override {
diff --git a/compiler-rt/lib/hwasan/hwasan.cpp b/compiler-rt/lib/hwasan/hwasan.cpp
index d67a88d455eff..11b4d3891bc2c 100644
--- a/compiler-rt/lib/hwasan/hwasan.cpp
+++ b/compiler-rt/lib/hwasan/hwasan.cpp
@@ -286,8 +286,6 @@ void __hwasan_init() {
   // initialized when InitInstrumentation() was called.
   GetCurrentThread()->InitRandomState();
 
-  MadviseShadow();
-
   SetPrintfAndReportCallback(AppendToErrorMessageBuffer);
   // This may call libc -> needs initialized shadow.
   AndroidLogInit();
diff --git a/compiler-rt/lib/hwasan/hwasan.h b/compiler-rt/lib/hwasan/hwasan.h
index 8cbd9e74e3350..b8b7a1865e860 100644
--- a/compiler-rt/lib/hwasan/hwasan.h
+++ b/compiler-rt/lib/hwasan/hwasan.h
@@ -75,7 +75,6 @@ extern int hwasan_report_count;
 bool InitShadow();
 void InitPrctl();
 void InitThreads();
-void MadviseShadow();
 void InitializeInterceptors();
 
 void HwasanAllocatorInit();
diff --git a/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp b/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp
index a04751f44a311..12730b29bae36 100644
--- a/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp
@@ -24,47 +24,6 @@
 // The code in this file needs to run in an unrelocated binary. It should not
 // access any external symbol, including its own non-hidden globals.
 
-namespace __hwasan {
-
-static void UnmapFromTo(uptr from, uptr to) {
-  if (to == from)
-    return;
-  CHECK(to >= from);
-  uptr res = internal_munmap(reinterpret_cast(from), to - from);
-  if (UNLIKELY(internal_iserror(res))) {
-    Report("ERROR: %s failed to unmap 0x%zx (%zd) bytes at address %p\n",
-           SanitizerToolName, to - from, to - from, from);
-    CHECK("unable to unmap" && 0);
-  }
-}
-
-// Returns an address aligned to kShadowBaseAlignment, such that
-// 2**kShadowBaseAlingment on the left and shadow_size_bytes bytes on the right
-// of it are mapped no access.
-static uptr MapDynamicShadow(uptr shadow_size_bytes) {
-  const uptr granularity = GetMmapGranularity();
-  const uptr min_alignment = granularity << kShadowScale;
-  const uptr alignment = 1ULL << kShadowBaseAlignment;
-  CHECK_GE(alignment, min_alignment);
-
-  const uptr left_padding = 1ULL << kShadowBaseAlignment;
-  const uptr shadow_size =
-      RoundUpTo(shadow_size_bytes, granularity);
-  const uptr map_size = shadow_size + left_padding + alignment;
-
-  const uptr map_start = (uptr)MmapNoAccess(map_size);
-  CHECK_NE(map_start, ~(uptr)0);
-
-  const uptr shadow_start = RoundUpTo(map_start + left_padding, alignment);
-
-  UnmapFromTo(map_start, shadow_start - left_padding);
-  UnmapFromTo(shadow_start + shadow_size, map_start + map_size);
-
-  return shadow_start;
-}
-
-}  // namespace __hwasan
-
 #if SANITIZER_ANDROID
 extern "C" {
 
@@ -82,7 +41,8 @@ static uptr PremapShadowSize() {
 }
 
 static uptr PremapShadow() {
-  return MapDynamicShadow(PremapShadowSize());
+  return MapDynamicShadow(PremapShadowSize(), kShadowScale,
+                          kShadowBaseAlignment, kHighMemEnd);
 }
 
 static bool IsPremapShadowAvailable() {
@@ -146,7 +106,8 @@ void InitShadowGOT() {
 uptr FindDynamicShadowStart(uptr shadow_size_bytes) {
   if (IsPremapShadowAvailable())
     return FindPremappedShadowStart(shadow_size_bytes);
-  return MapDynamicShadow(shadow_size_bytes);
+  return MapDynamicShadow(shadow_size_bytes, kShadowScale, kShadowBaseAlignment,
+                          kHighMemEnd);
 }
 
 }  // namespace __hwasan
@@ -156,7 +117,8 @@ namespace __hwasan {
 void InitShadowGOT() {}
 
 uptr FindDynamicShadowStart(uptr shadow_size_bytes) {
-  return MapDynamicShadow(shadow_size_bytes);
+  return MapDynamicShadow(shadow_size_bytes, kShadowScale, kShadowBaseAlignment,
+                          kHighMemEnd);
 }
 
 }  // namespace __hwasan
diff --git a/compiler-rt/lib/hwasan/hwasan_linux.cpp b/compiler-rt/lib/hwasan/hwasan_linux.cpp
index f1e830ddf901f..e99926d355cfa 100644
--- a/compiler-rt/lib/hwasan/hwasan_linux.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_linux.cpp
@@ -57,56 +57,24 @@ THREADLOCAL uptr __hwasan_tls;
 
 namespace __hwasan {
 
-static void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
-  CHECK_EQ((beg % GetMmapGranularity()), 0);
-  CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
-  uptr size = end - beg + 1;
-  DecreaseTotalMmap(size);  // Don't count the shadow against mmap_limit_mb.
-  if (!MmapFixedNoReserve(beg, size, name)) {
-    Report(
-        "ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
-        "Perhaps you're using ulimit -v\n",
-        size);
-    Abort();
-  }
-}
+// With the zero shadow base we can not actually map pages starting from 0.
+// This constant is somewhat arbitrary.
+constexpr uptr kZeroBaseShadowStart = 0;
+constexpr uptr kZeroBaseMaxShadowStart = 1 << 18;
 
 static void ProtectGap(uptr addr, uptr size) {
-  if (!size)
-    return;
-  void *res = MmapFixedNoAccess(addr, size, "shadow gap");
-  if (addr == (uptr)res)
-    return;
-  // A few pages at the start of the address space can not be protected.
-  // But we really want to protect as much as possible, to prevent this memory
-  // being returned as a result of a non-FIXED mmap().
-  if (addr == 0) {
-    uptr step = GetMmapGranularity();
-    while (size > step) {
-      addr += step;
-      size -= step;
-      void *res = MmapFixedNoAccess(addr, size, "shadow gap");
-      if (addr == (uptr)res)
-        return;
-    }
-  }
-
-  Report(
-      "ERROR: Failed to protect shadow gap [%p, %p]. "
-      "HWASan cannot proceed correctly. ABORTING.\n", (void *)addr,
-      (void *)(addr + size));
-  DumpProcessMap();
-  Die();
+  __sanitizer::ProtectGap(addr, size, kZeroBaseShadowStart,
+                          kZeroBaseMaxShadowStart);
 }
 
-static uptr kLowMemStart;
-static uptr kLowMemEnd;
-static uptr kLowShadowEnd;
-static uptr kLowShadowStart;
-static uptr kHighShadowStart;
-static uptr kHighShadowEnd;
-static uptr kHighMemStart;
-static uptr kHighMemEnd;
+uptr kLowMemStart;
+uptr kLowMemEnd;
+uptr kLowShadowEnd;
+uptr kLowShadowStart;
+uptr kHighShadowStart;
+uptr kHighShadowEnd;
+uptr kHighMemStart;
+uptr kHighMemEnd;
 
 static void PrintRange(uptr start, uptr end, const char *name) {
   Printf("|| [%p, %p] || %.*s ||\n", (void *)start, (void *)end, 10, name);
@@ -242,24 +210,12 @@ void InitThreads() {
   uptr thread_space_end =
       __hwasan_shadow_memory_dynamic_address - guard_page_size;
   ReserveShadowMemoryRange(thread_space_start, thread_space_end - 1,
-                           "hwasan threads");
+                           "hwasan threads", /*madvise_shadow*/ false);
   ProtectGap(thread_space_end,
              __hwasan_shadow_memory_dynamic_address - thread_space_end);
   InitThreadList(thread_space_start, thread_space_end - thread_space_start);
 }
 
-static void MadviseShadowRegion(uptr beg, uptr end) {
-  uptr size = end - beg + 1;
-  SetShadowRegionHugePageMode(beg, size);
-  if (common_flags()->use_madv_dontdump)
-    DontDumpShadowMemory(beg, size);
-}
-
-void MadviseShadow() {
-  MadviseShadowRegion(kLowShadowStart, kLowShadowEnd);
-  MadviseShadowRegion(kHighShadowStart, kHighShadowEnd);
-}
-
 bool MemIsApp(uptr p) {
   CHECK(GetTagFromPointer(p) == 0);
   return p >= kHighMemStart || (p >= kLowMemStart && p <= kLowMemEnd);
diff --git a/compiler-rt/lib/hwasan/hwasan_mapping.h b/compiler-rt/lib/hwasan/hwasan_mapping.h
index a86ad7ca80360..c149687bdfa60 100644
--- a/compiler-rt/lib/hwasan/hwasan_mapping.h
+++ b/compiler-rt/lib/hwasan/hwasan_mapping.h
@@ -39,6 +39,15 @@ constexpr uptr kShadowAlignment = 1ULL << kShadowScale;
 
 namespace __hwasan {
 
+extern uptr kLowMemStart;
+extern uptr kLowMemEnd;
+extern uptr kLowShadowEnd;
+extern uptr kLowShadowStart;
+extern uptr kHighShadowStart;
+extern uptr kHighShadowEnd;
+extern uptr kHighMemStart;
+extern uptr kHighMemEnd;
+
 inline uptr MemToShadow(uptr untagged_addr) {
   return (untagged_addr >> kShadowScale) +
          __hwasan_shadow_memory_dynamic_address;
diff --git a/compiler-rt/lib/profile/CMakeLists.txt b/compiler-rt/lib/profile/CMakeLists.txt
index ece674b2daa14..63532b72ff82e 100644
--- a/compiler-rt/lib/profile/CMakeLists.txt
+++ b/compiler-rt/lib/profile/CMakeLists.txt
@@ -59,6 +59,7 @@ set(PROFILE_SOURCES
   InstrProfilingMerge.c
   InstrProfilingMergeFile.c
   InstrProfilingNameVar.c
+  InstrProfilingVersionVar.c
   InstrProfilingWriter.c
   InstrProfilingPlatformDarwin.c
   InstrProfilingPlatformFuchsia.c
diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c
index 57d8dec423cc0..fa4b951383324 100644
--- a/compiler-rt/lib/profile/GCDAProfiling.c
+++ b/compiler-rt/lib/profile/GCDAProfiling.c
@@ -639,25 +639,6 @@ static void llvm_writeout_and_clear(void) {
   fn_list_remove(&writeout_fn_list);
 }
 
-COMPILER_RT_VISIBILITY
-void llvm_register_flush_function(fn_ptr fn) {
-  fn_list_insert(&flush_fn_list, fn);
-}
-
-void __gcov_flush() {
-  struct fn_node* curr = flush_fn_list.head;
-
-  while (curr) {
-    curr->fn();
-    curr = curr->next;
-  }
-}
-
-COMPILER_RT_VISIBILITY
-void llvm_delete_flush_function_list(void) {
-  fn_list_remove(&flush_fn_list);
-}
-
 COMPILER_RT_VISIBILITY
 void llvm_register_reset_function(fn_ptr fn) {
   fn_list_insert(&reset_fn_list, fn);
@@ -698,15 +679,12 @@ pid_t __gcov_fork() {
 #endif
 
 COMPILER_RT_VISIBILITY
-void llvm_gcov_init(fn_ptr wfn, fn_ptr ffn, fn_ptr rfn) {
+void llvm_gcov_init(fn_ptr wfn, fn_ptr rfn) {
   static int atexit_ran = 0;
 
   if (wfn)
     llvm_register_writeout_function(wfn);
 
-  if (ffn)
-    llvm_register_flush_function(ffn);
-
   if (rfn)
     llvm_register_reset_function(rfn);
 
@@ -715,11 +693,20 @@ void llvm_gcov_init(fn_ptr wfn, fn_ptr ffn, fn_ptr rfn) {
 
     /* Make sure we write out the data and delete the data structures. */
     atexit(llvm_delete_reset_function_list);
-    atexit(llvm_delete_flush_function_list);
 #ifdef _WIN32
     atexit(llvm_writeout_and_clear);
 #endif
   }
 }
 
+void __gcov_dump(void) {
+  for (struct fn_node *f = writeout_fn_list.head; f; f = f->next)
+    f->fn();
+}
+
+void __gcov_reset(void) {
+  for (struct fn_node *f = reset_fn_list.head; f; f = f->next)
+    f->fn();
+}
+
 #endif
diff --git a/compiler-rt/lib/profile/InstrProfiling.c b/compiler-rt/lib/profile/InstrProfiling.c
index 31a9fe9962931..92ad25f62cd1b 100644
--- a/compiler-rt/lib/profile/InstrProfiling.c
+++ b/compiler-rt/lib/profile/InstrProfiling.c
@@ -18,8 +18,6 @@
 #include "profile/InstrProfData.inc"
 
 
-COMPILER_RT_WEAK uint64_t INSTR_PROF_RAW_VERSION_VAR = INSTR_PROF_RAW_VERSION;
-
 COMPILER_RT_VISIBILITY uint64_t __llvm_profile_get_magic(void) {
   return sizeof(void *) == sizeof(uint64_t) ? (INSTR_PROF_RAW_MAGIC_64)
                                             : (INSTR_PROF_RAW_MAGIC_32);
diff --git a/compiler-rt/lib/profile/InstrProfilingValue.c b/compiler-rt/lib/profile/InstrProfilingValue.c
index fd53cac3dff31..76e1d3fa11b80 100644
--- a/compiler-rt/lib/profile/InstrProfilingValue.c
+++ b/compiler-rt/lib/profile/InstrProfilingValue.c
@@ -17,13 +17,14 @@
 
 #define INSTR_PROF_VALUE_PROF_DATA
 #define INSTR_PROF_COMMON_API_IMPL
+#define INSTR_PROF_VALUE_PROF_MEMOP_API
 #include "profile/InstrProfData.inc"
 
 static int hasStaticCounters = 1;
 static int OutOfNodesWarnings = 0;
 static int hasNonDefaultValsPerSite = 0;
 #define INSTR_PROF_MAX_VP_WARNS 10
-#define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 16
+#define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24
 #define INSTR_PROF_VNODE_POOL_SIZE 1024
 
 #ifndef _MSC_VER
@@ -250,6 +251,8 @@ __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
  * The range for large values is optional. The default value of INT64_MIN
  * indicates it is not specified.
  */
+/* FIXME: This is to be removed after switching to the new memop value
+ * profiling. */
 COMPILER_RT_VISIBILITY void __llvm_profile_instrument_range(
     uint64_t TargetValue, void *Data, uint32_t CounterIndex,
     int64_t PreciseRangeStart, int64_t PreciseRangeLast, int64_t LargeValue) {
@@ -263,6 +266,18 @@ COMPILER_RT_VISIBILITY void __llvm_profile_instrument_range(
   __llvm_profile_instrument_target(TargetValue, Data, CounterIndex);
 }
 
+/*
+ * The target values are partitioned into multiple ranges. The range spec is
+ * defined in InstrProfData.inc.
+ */
+COMPILER_RT_VISIBILITY void
+__llvm_profile_instrument_memop(uint64_t TargetValue, void *Data,
+                                uint32_t CounterIndex) {
+  // Map the target value to the representative value of its range.
+  uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue);
+  __llvm_profile_instrument_target(RepValue, Data, CounterIndex);
+}
+
 /*
  * A wrapper struct that represents value profile runtime data.
  * Like InstrProfRecord class which is used by profiling host tools,
diff --git a/compiler-rt/lib/profile/InstrProfilingVersionVar.c b/compiler-rt/lib/profile/InstrProfilingVersionVar.c
new file mode 100644
index 0000000000000..a6f222150794d
--- /dev/null
+++ b/compiler-rt/lib/profile/InstrProfilingVersionVar.c
@@ -0,0 +1,17 @@
+/*===- InstrProfilingVersionVar.c - profile version variable setup  -------===*\
+|*
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+|* See https://llvm.org/LICENSE.txt for license information.
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+|*
+\*===----------------------------------------------------------------------===*/
+
+#include "InstrProfiling.h"
+
+/* uint64 __llvm_profile_raw_version
+ *
+ * The runtime should only provide its own definition of this symbol when the
+ * user has not specified one. Set this up by moving the runtime's copy of this
+ * symbol to an object file within the archive.
+ */
+COMPILER_RT_WEAK uint64_t INSTR_PROF_RAW_VERSION_VAR = INSTR_PROF_RAW_VERSION;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
index 07b307a602c97..bf6ca735fb0d8 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
@@ -121,6 +121,31 @@ bool MprotectReadOnly(uptr addr, uptr size);
 
 void MprotectMallocZones(void *addr, int prot);
 
+#if SANITIZER_LINUX
+// Unmap memory. Currently only used on Linux.
+void UnmapFromTo(uptr from, uptr to);
+#endif
+
+// Maps shadow_size_bytes of shadow memory and returns shadow address. It will
+// be aligned to the mmap granularity * 2^shadow_scale, or to
+// 2^min_shadow_base_alignment if that is larger. The returned address will
+// have max(2^min_shadow_base_alignment, mmap granularity) on the left, and
+// shadow_size_bytes bytes on the right, mapped no access.
+// The high_mem_end may be updated if the original shadow size doesn't fit.
+uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale,
+                      uptr min_shadow_base_alignment, uptr &high_mem_end);
+
+// Reserve memory range [beg, end]. If madvise_shadow is true then apply
+// madvise (e.g. hugepages, core dumping) requested by options.
+void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name,
+                              bool madvise_shadow = true);
+
+// Protect size bytes of memory starting at addr. Also try to protect
+// several pages at the start of the address space as specified by
+// zero_base_shadow_start, at most up to the size or zero_base_max_shadow_start.
+void ProtectGap(uptr addr, uptr size, uptr zero_base_shadow_start,
+                uptr zero_base_max_shadow_start);
+
 // Find an available address space.
 uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
                               uptr *largest_gap_found, uptr *max_occupied_addr);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index ea9c71ba88032..0fdaf00e67c17 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -2199,6 +2199,24 @@ INTERCEPTOR(int, clock_settime, u32 clk_id, const void *tp) {
 #define INIT_CLOCK_GETTIME
 #endif
 
+#if SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID
+INTERCEPTOR(int, clock_getcpuclockid, pid_t pid,
+            __sanitizer_clockid_t *clockid) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, clock_getcpuclockid, pid, clockid);
+  int res = REAL(clock_getcpuclockid)(pid, clockid);
+  if (!res && clockid) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, clockid, sizeof *clockid);
+  }
+  return res;
+}
+
+#define INIT_CLOCK_GETCPUCLOCKID                  \
+  COMMON_INTERCEPT_FUNCTION(clock_getcpuclockid);
+#else
+#define INIT_CLOCK_GETCPUCLOCKID
+#endif
+
 #if SANITIZER_INTERCEPT_GETITIMER
 INTERCEPTOR(int, getitimer, int which, void *curr_value) {
   void *ctx;
@@ -4067,6 +4085,33 @@ INTERCEPTOR(int, sigfillset, __sanitizer_sigset_t *set) {
 #define INIT_SIGSETOPS
 #endif
 
+#if SANITIZER_INTERCEPT_SIGSET_LOGICOPS
+INTERCEPTOR(int, sigandset, __sanitizer_sigset_t *dst, __sanitizer_sigset_t *src1, __sanitizer_sigset_t *src2) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, sigandset, dst, src1, src2);
+  if (src1) COMMON_INTERCEPTOR_READ_RANGE(ctx, src1, sizeof(*src1));
+  if (src2) COMMON_INTERCEPTOR_READ_RANGE(ctx, src2, sizeof(*src2));
+  int res = REAL(sigandset)(dst, src1, src2);
+  if (!res && dst) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sizeof(*dst));
+  return res;
+}
+
+INTERCEPTOR(int, sigorset, __sanitizer_sigset_t *dst, __sanitizer_sigset_t *src1, __sanitizer_sigset_t *src2) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, sigorset, dst, src1, src2);
+  if (src1) COMMON_INTERCEPTOR_READ_RANGE(ctx, src1, sizeof(*src1));
+  if (src2) COMMON_INTERCEPTOR_READ_RANGE(ctx, src2, sizeof(*src2));
+  int res = REAL(sigorset)(dst, src1, src2);
+  if (!res && dst) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sizeof(*dst));
+  return res;
+}
+#define INIT_SIGSET_LOGICOPS                    \
+  COMMON_INTERCEPT_FUNCTION(sigandset);   \
+  COMMON_INTERCEPT_FUNCTION(sigorset);
+#else
+#define INIT_SIGSET_LOGICOPS
+#endif
+
 #if SANITIZER_INTERCEPT_SIGPENDING
 INTERCEPTOR(int, sigpending, __sanitizer_sigset_t *set) {
   void *ctx;
@@ -9914,6 +9959,7 @@ static void InitializeCommonInterceptors() {
   INIT_FGETGRENT_R;
   INIT_SETPWENT;
   INIT_CLOCK_GETTIME;
+  INIT_CLOCK_GETCPUCLOCKID;
   INIT_GETITIMER;
   INIT_TIME;
   INIT_GLOB;
@@ -9977,6 +10023,7 @@ static void InitializeCommonInterceptors() {
   INIT_SIGWAITINFO;
   INIT_SIGTIMEDWAIT;
   INIT_SIGSETOPS;
+  INIT_SIGSET_LOGICOPS;
   INIT_SIGPENDING;
   INIT_SIGPROCMASK;
   INIT_PTHREAD_SIGMASK;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
index 0c918ebb4a9d6..047c5a17ea6e7 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
@@ -139,6 +139,59 @@ uptr ReservedAddressRange::InitAligned(uptr size, uptr align,
   return start;
 }
 
+#if !SANITIZER_FUCHSIA && !SANITIZER_RTEMS
+
+// Reserve memory range [beg, end].
+// We need to use inclusive range because end+1 may not be representable.
+void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name,
+                              bool madvise_shadow) {
+  CHECK_EQ((beg % GetMmapGranularity()), 0);
+  CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
+  uptr size = end - beg + 1;
+  DecreaseTotalMmap(size);  // Don't count the shadow against mmap_limit_mb.
+  if (madvise_shadow ? !MmapFixedSuperNoReserve(beg, size, name)
+                     : !MmapFixedNoReserve(beg, size, name)) {
+    Report(
+        "ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
+        "Perhaps you're using ulimit -v\n",
+        size);
+    Abort();
+  }
+  if (madvise_shadow && common_flags()->use_madv_dontdump)
+    DontDumpShadowMemory(beg, size);
+}
+
+void ProtectGap(uptr addr, uptr size, uptr zero_base_shadow_start,
+                uptr zero_base_max_shadow_start) {
+  if (!size)
+    return;
+  void *res = MmapFixedNoAccess(addr, size, "shadow gap");
+  if (addr == (uptr)res)
+    return;
+  // A few pages at the start of the address space can not be protected.
+  // But we really want to protect as much as possible, to prevent this memory
+  // being returned as a result of a non-FIXED mmap().
+  if (addr == zero_base_shadow_start) {
+    uptr step = GetMmapGranularity();
+    while (size > step && addr < zero_base_max_shadow_start) {
+      addr += step;
+      size -= step;
+      void *res = MmapFixedNoAccess(addr, size, "shadow gap");
+      if (addr == (uptr)res)
+        return;
+    }
+  }
+
+  Report(
+      "ERROR: Failed to protect the shadow gap. "
+      "%s cannot proceed correctly. ABORTING.\n",
+      SanitizerToolName);
+  DumpProcessMap();
+  Die();
+}
+
+#endif  // !SANITIZER_FUCHSIA && !SANITIZER_RTEMS
+
 }  // namespace __sanitizer
 
 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_sandbox_on_notify,
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_flags.cpp
index 684ee1e0b9995..d3290493fd6da 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.cpp
@@ -91,7 +91,7 @@ class FlagHandlerInclude : public FlagHandlerBase {
     }
     return parser_->ParseFile(value, ignore_missing_);
   }
-  bool Format(char *buffer, uptr size) {
+  bool Format(char *buffer, uptr size) override {
     // Note `original_path_` isn't actually what's parsed due to `%`
     // substitutions. Printing the substituted path would require holding onto
     // mmap'ed memory.
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 91caa6a35693b..470f4b70f0592 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -2042,13 +2042,13 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
 # ifndef REG_EBP
 #  define REG_EBP  6 // REG_FP
 # endif
-# ifndef REG_ESP
-#  define REG_ESP 17 // REG_SP
+# ifndef REG_UESP
+#  define REG_UESP 17 // REG_SP
 # endif
 # endif
   *pc = ucontext->uc_mcontext.gregs[REG_EIP];
   *bp = ucontext->uc_mcontext.gregs[REG_EBP];
-  *sp = ucontext->uc_mcontext.gregs[REG_ESP];
+  *sp = ucontext->uc_mcontext.gregs[REG_UESP];
 # endif
 #elif defined(__powerpc__) || defined(__powerpc64__)
   ucontext_t *ucontext = (ucontext_t*)context;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
index 4d17c9686e4ed..86918a51a2460 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -841,6 +841,41 @@ void ReExec() {
 }
 #endif  // !SANITIZER_OPENBSD
 
+void UnmapFromTo(uptr from, uptr to) {
+  if (to == from)
+    return;
+  CHECK(to >= from);
+  uptr res = internal_munmap(reinterpret_cast(from), to - from);
+  if (UNLIKELY(internal_iserror(res))) {
+    Report("ERROR: %s failed to unmap 0x%zx (%zd) bytes at address %p\n",
+           SanitizerToolName, to - from, to - from, (void *)from);
+    CHECK("unable to unmap" && 0);
+  }
+}
+
+uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale,
+                      uptr min_shadow_base_alignment,
+                      UNUSED uptr &high_mem_end) {
+  const uptr granularity = GetMmapGranularity();
+  const uptr alignment =
+      Max(granularity << shadow_scale, 1ULL << min_shadow_base_alignment);
+  const uptr left_padding =
+      Max(granularity, 1ULL << min_shadow_base_alignment);
+
+  const uptr shadow_size = RoundUpTo(shadow_size_bytes, granularity);
+  const uptr map_size = shadow_size + left_padding + alignment;
+
+  const uptr map_start = (uptr)MmapNoAccess(map_size);
+  CHECK_NE(map_start, ~(uptr)0);
+
+  const uptr shadow_start = RoundUpTo(map_start + left_padding, alignment);
+
+  UnmapFromTo(map_start, shadow_start - left_padding);
+  UnmapFromTo(shadow_start + shadow_size, map_start + map_size);
+
+  return shadow_start;
+}
+
 } // namespace __sanitizer
 
 #endif
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
index 7a3dfbcc27607..db8a09e6f0de3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
@@ -1070,6 +1070,53 @@ uptr GetMaxVirtualAddress() {
   return GetMaxUserVirtualAddress();
 }
 
+uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale,
+                      uptr min_shadow_base_alignment, uptr &high_mem_end) {
+  const uptr granularity = GetMmapGranularity();
+  const uptr alignment =
+      Max(granularity << shadow_scale, 1ULL << min_shadow_base_alignment);
+  const uptr left_padding =
+      Max(granularity, 1ULL << min_shadow_base_alignment);
+
+  uptr space_size = shadow_size_bytes + left_padding;
+
+  uptr largest_gap_found = 0;
+  uptr max_occupied_addr = 0;
+  VReport(2, "FindDynamicShadowStart, space_size = %p\n", space_size);
+  uptr shadow_start =
+      FindAvailableMemoryRange(space_size, alignment, granularity,
+                               &largest_gap_found, &max_occupied_addr);
+  // If the shadow doesn't fit, restrict the address space to make it fit.
+  if (shadow_start == 0) {
+    VReport(
+        2,
+        "Shadow doesn't fit, largest_gap_found = %p, max_occupied_addr = %p\n",
+        largest_gap_found, max_occupied_addr);
+    uptr new_max_vm = RoundDownTo(largest_gap_found << shadow_scale, alignment);
+    if (new_max_vm < max_occupied_addr) {
+      Report("Unable to find a memory range for dynamic shadow.\n");
+      Report(
+          "space_size = %p, largest_gap_found = %p, max_occupied_addr = %p, "
+          "new_max_vm = %p\n",
+          space_size, largest_gap_found, max_occupied_addr, new_max_vm);
+      CHECK(0 && "cannot place shadow");
+    }
+    RestrictMemoryToMaxAddress(new_max_vm);
+    high_mem_end = new_max_vm - 1;
+    space_size = (high_mem_end >> shadow_scale) + left_padding;
+    VReport(2, "FindDynamicShadowStart, space_size = %p\n", space_size);
+    shadow_start = FindAvailableMemoryRange(space_size, alignment, granularity,
+                                            nullptr, nullptr);
+    if (shadow_start == 0) {
+      Report("Unable to find a memory range after restricting VM.\n");
+      CHECK(0 && "cannot place shadow after restricting vm");
+    }
+  }
+  CHECK_NE((uptr)0, shadow_start);
+  CHECK(IsAligned(shadow_start, alignment));
+  return shadow_start;
+}
+
 uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
                               uptr *largest_gap_found,
                               uptr *max_occupied_addr) {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
index 2d48e9d0ae1ad..04b61d6daae78 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -240,6 +240,7 @@
   (SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS)
 #define SANITIZER_INTERCEPT_CLOCK_GETTIME \
   (SI_FREEBSD || SI_NETBSD || SI_OPENBSD || SI_LINUX || SI_SOLARIS)
+#define SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID SI_LINUX
 #define SANITIZER_INTERCEPT_GETITIMER SI_POSIX
 #define SANITIZER_INTERCEPT_TIME SI_POSIX
 #define SANITIZER_INTERCEPT_GLOB SI_LINUX_NOT_ANDROID || SI_SOLARIS
@@ -332,6 +333,7 @@
 #define SANITIZER_INTERCEPT_SIGTIMEDWAIT SI_LINUX_NOT_ANDROID || SI_SOLARIS
 #define SANITIZER_INTERCEPT_SIGSETOPS \
   (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS)
+#define SANITIZER_INTERCEPT_SIGSET_LOGICOPS SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_SIGPENDING SI_POSIX
 #define SANITIZER_INTERCEPT_SIGPROCMASK SI_POSIX
 #define SANITIZER_INTERCEPT_PTHREAD_SIGMASK SI_POSIX
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp
index 6c577426ad566..7f9529aa35562 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp
@@ -31,15 +31,15 @@ class SuspendedThreadsListMac : public SuspendedThreadsList {
  public:
   SuspendedThreadsListMac() : threads_(1024) {}
 
-  tid_t GetThreadID(uptr index) const;
+  tid_t GetThreadID(uptr index) const override;
   thread_t GetThread(uptr index) const;
-  uptr ThreadCount() const;
+  uptr ThreadCount() const override;
   bool ContainsThread(thread_t thread) const;
   void Append(thread_t thread);
 
   PtraceRegistersStatus GetRegistersAndSP(uptr index, uptr *buffer,
-                                          uptr *sp) const;
-  uptr RegisterCount() const;
+                                          uptr *sp) const override;
+  uptr RegisterCount() const override;
 
  private:
   InternalMmapVector threads_;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
index fca15beb61612..53a537d398475 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
@@ -348,6 +348,22 @@ bool DontDumpShadowMemory(uptr addr, uptr length) {
   return true;
 }
 
+uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale,
+                      uptr min_shadow_base_alignment,
+                      UNUSED uptr &high_mem_end) {
+  const uptr granularity = GetMmapGranularity();
+  const uptr alignment =
+      Max(granularity << shadow_scale, 1ULL << min_shadow_base_alignment);
+  const uptr left_padding =
+      Max(granularity, 1ULL << min_shadow_base_alignment);
+  uptr space_size = shadow_size_bytes + left_padding;
+  uptr shadow_start = FindAvailableMemoryRange(space_size, alignment,
+                                               granularity, nullptr, nullptr);
+  CHECK_NE((uptr)0, shadow_start);
+  CHECK(IsAligned(shadow_start, alignment));
+  return shadow_start;
+}
+
 uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
                               uptr *largest_gap_found,
                               uptr *max_occupied_addr) {
diff --git a/compiler-rt/lib/sanitizer_common/scripts/litlint_test.py b/compiler-rt/lib/sanitizer_common/scripts/litlint_test.py
index 3ce482d704442..30c9f16efed54 100755
--- a/compiler-rt/lib/sanitizer_common/scripts/litlint_test.py
+++ b/compiler-rt/lib/sanitizer_common/scripts/litlint_test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 # Tests for litlint.py
 #
diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
index a0aa79ee54bb5..5b6433011a098 100755
--- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
+++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
@@ -98,7 +98,7 @@ make -j${J} libz.a
 if [[ ! -d ${LIBCXX_BUILD} ]]; then
   mkdir -p ${LIBCXX_BUILD}
   cd ${LIBCXX_BUILD}
-  LIBCXX_FLAGS="${FLAGS} -Wno-macro-redefined -I${LIBCXX_SRC}/include"
+  LIBCXX_FLAGS="${FLAGS} -Wno-macro-redefined"
   PROJECTS=
   if [[ ! -d $LLVM_SRC/projects/libcxxabi ]] ; then
     PROJECTS="-DLLVM_ENABLE_PROJECTS='libcxx;libcxxabi'"
diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
index c3f41f19c3656..29b2960e11fe4 100644
--- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
+++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
@@ -31,6 +31,8 @@ __interceptor_pthread_setspecific w
 __interceptor_read w
 __interceptor_realpath w
 __isinf U
+__isoc99_sscanf U
+__isoc99_vsscanf U
 __moddi3 U
 __sanitizer_symbolize_code T
 __sanitizer_symbolize_data T
diff --git a/compiler-rt/lib/scudo/scudo_allocator.cpp b/compiler-rt/lib/scudo/scudo_allocator.cpp
index d9023c2f7ab64..343f85a4ef88b 100644
--- a/compiler-rt/lib/scudo/scudo_allocator.cpp
+++ b/compiler-rt/lib/scudo/scudo_allocator.cpp
@@ -29,6 +29,7 @@
 # include "gwp_asan/guarded_pool_allocator.h"
 # include "gwp_asan/optional/backtrace.h"
 # include "gwp_asan/optional/options_parser.h"
+#include "gwp_asan/optional/segv_handler.h"
 #endif // GWP_ASAN_HOOKS
 
 #include 
@@ -679,7 +680,8 @@ void initScudo() {
   if (Opts.InstallSignalHandlers)
     gwp_asan::crash_handler::installSignalHandlers(
         &GuardedAlloc, __sanitizer::Printf,
-        gwp_asan::options::getPrintBacktraceFunction(), Opts.Backtrace);
+        gwp_asan::options::getPrintBacktraceFunction(),
+        gwp_asan::crash_handler::getSegvBacktraceFunction());
 #endif // GWP_ASAN_HOOKS
 }
 
diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
index 3bb41eca88f72..ae085befc4f15 100644
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -192,7 +192,7 @@ class Allocator {
     if (Opt.InstallSignalHandlers)
       gwp_asan::crash_handler::installSignalHandlers(
           &GuardedAlloc, Printf, gwp_asan::options::getPrintBacktraceFunction(),
-          Opt.Backtrace);
+          gwp_asan::crash_handler::getSegvBacktraceFunction());
 #endif // GWP_ASAN_HOOKS
   }
 
diff --git a/compiler-rt/lib/scudo/standalone/local_cache.h b/compiler-rt/lib/scudo/standalone/local_cache.h
index a6425fc6d1ea1..089aeb939627d 100644
--- a/compiler-rt/lib/scudo/standalone/local_cache.h
+++ b/compiler-rt/lib/scudo/standalone/local_cache.h
@@ -159,6 +159,7 @@ template  struct SizeClassAllocatorLocalCache {
     DCHECK_GT(B->getCount(), 0);
     C->Count = B->getCount();
     B->copyToArray(C->Chunks);
+    B->clear();
     destroyBatch(ClassId, B);
     return true;
   }
diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index 29a2680981852..2ee0f6c600ab2 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -444,6 +444,18 @@ class SizeClassAllocator32 {
     if (BytesPushed < PageSize)
       return 0; // Nothing new to release.
 
+    // Releasing smaller blocks is expensive, so we want to make sure that a
+    // significant amount of bytes are free, and that there has been a good
+    // amount of batches pushed to the freelist before attempting to release.
+    if (BlockSize < PageSize / 16U) {
+      if (!Force && BytesPushed < Sci->AllocatedUser / 16U)
+        return 0;
+      // We want 8x% to 9x% free bytes (the larger the bock, the lower the %).
+      if ((BytesInFreeList * 100U) / Sci->AllocatedUser <
+          (100U - 1U - BlockSize / 16U))
+        return 0;
+    }
+
     if (!Force) {
       const s32 IntervalMs = getReleaseToOsIntervalMs();
       if (IntervalMs < 0)
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index d4767882ba2c7..01e674bf3fba5 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -213,9 +213,7 @@ class SizeClassAllocator64 {
     return reinterpret_cast(RegionInfoArray);
   }
 
-  static uptr getRegionInfoArraySize() {
-    return sizeof(RegionInfoArray);
-  }
+  static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); }
 
   static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) {
     const RegionInfo *RegionInfoArray =
@@ -458,6 +456,18 @@ class SizeClassAllocator64 {
     if (BytesPushed < PageSize)
       return 0; // Nothing new to release.
 
+    // Releasing smaller blocks is expensive, so we want to make sure that a
+    // significant amount of bytes are free, and that there has been a good
+    // amount of batches pushed to the freelist before attempting to release.
+    if (BlockSize < PageSize / 16U) {
+      if (!Force && BytesPushed < Region->AllocatedUser / 16U)
+        return 0;
+      // We want 8x% to 9x% free bytes (the larger the bock, the lower the %).
+      if ((BytesInFreeList * 100U) / Region->AllocatedUser <
+          (100U - 1U - BlockSize / 16U))
+        return 0;
+    }
+
     if (!Force) {
       const s32 IntervalMs = getReleaseToOsIntervalMs();
       if (IntervalMs < 0)
diff --git a/compiler-rt/lib/scudo/standalone/release.cpp b/compiler-rt/lib/scudo/standalone/release.cpp
index e144b354b258a..5d7c6c5fc110b 100644
--- a/compiler-rt/lib/scudo/standalone/release.cpp
+++ b/compiler-rt/lib/scudo/standalone/release.cpp
@@ -11,6 +11,6 @@
 namespace scudo {
 
 HybridMutex PackedCounterArray::Mutex = {};
-uptr PackedCounterArray::StaticBuffer[1024];
+uptr PackedCounterArray::StaticBuffer[PackedCounterArray::StaticBufferCount];
 
 } // namespace scudo
diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h
index 323bf9db6dcac..fd55ea24132e6 100644
--- a/compiler-rt/lib/scudo/standalone/release.h
+++ b/compiler-rt/lib/scudo/standalone/release.h
@@ -69,7 +69,8 @@ class PackedCounterArray {
     BufferSize = (roundUpTo(N, static_cast(1U) << PackingRatioLog) >>
                   PackingRatioLog) *
                  sizeof(*Buffer);
-    if (BufferSize <= StaticBufferSize && Mutex.tryLock()) {
+    if (BufferSize <= (StaticBufferCount * sizeof(Buffer[0])) &&
+        Mutex.tryLock()) {
       Buffer = &StaticBuffer[0];
       memset(Buffer, 0, BufferSize);
     } else {
@@ -114,6 +115,8 @@ class PackedCounterArray {
 
   uptr getBufferSize() const { return BufferSize; }
 
+  static const uptr StaticBufferCount = 1024U;
+
 private:
   const uptr N;
   uptr CounterSizeBitsLog;
@@ -125,8 +128,7 @@ class PackedCounterArray {
   uptr *Buffer;
 
   static HybridMutex Mutex;
-  static const uptr StaticBufferSize = 1024U;
-  static uptr StaticBuffer[StaticBufferSize];
+  static uptr StaticBuffer[StaticBufferCount];
 };
 
 template  class FreePagesRangeTracker {
diff --git a/compiler-rt/lib/tsan/go/buildgo.sh b/compiler-rt/lib/tsan/go/buildgo.sh
index 2238caf53b34a..e9b4b4c9f9c07 100755
--- a/compiler-rt/lib/tsan/go/buildgo.sh
+++ b/compiler-rt/lib/tsan/go/buildgo.sh
@@ -138,6 +138,7 @@ elif [ "`uname -a | grep Darwin`" != "" ]; then
 		$SRCS
 		../rtl/tsan_platform_mac.cpp
 		../../sanitizer_common/sanitizer_mac.cpp
+		../../sanitizer_common/sanitizer_mac_libcdep.cpp
 		../../sanitizer_common/sanitizer_posix.cpp
 		../../sanitizer_common/sanitizer_posix_libcdep.cpp
 		../../sanitizer_common/sanitizer_procmaps_mac.cpp
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
index 949beac1c5513..3354546c2a107 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
@@ -439,65 +439,61 @@ void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
   ExtractTagFromStack(stk, tag);
 }
 
-static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2],
-                             uptr addr_min, uptr addr_max) {
-  bool equal_stack = false;
-  RacyStacks hash;
-  bool equal_address = false;
-  RacyAddress ra0 = {addr_min, addr_max};
-  {
-    ReadLock lock(&ctx->racy_mtx);
-    if (flags()->suppress_equal_stacks) {
-      hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
-      hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
-      for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
-        if (hash == ctx->racy_stacks[i]) {
-          VPrintf(2,
-              "ThreadSanitizer: suppressing report as doubled (stack)\n");
-          equal_stack = true;
-          break;
-        }
-      }
-    }
-    if (flags()->suppress_equal_addresses) {
-      for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
-        RacyAddress ra2 = ctx->racy_addresses[i];
-        uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
-        uptr minend = min(ra0.addr_max, ra2.addr_max);
-        if (maxbeg < minend) {
-          VPrintf(2, "ThreadSanitizer: suppressing report as doubled (addr)\n");
-          equal_address = true;
-          break;
-        }
-      }
+static bool FindRacyStacks(const RacyStacks &hash) {
+  for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
+    if (hash == ctx->racy_stacks[i]) {
+      VPrintf(2, "ThreadSanitizer: suppressing report as doubled (stack)\n");
+      return true;
     }
   }
-  if (!equal_stack && !equal_address)
+  return false;
+}
+
+static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2]) {
+  if (!flags()->suppress_equal_stacks)
     return false;
-  if (!equal_stack) {
-    Lock lock(&ctx->racy_mtx);
-    ctx->racy_stacks.PushBack(hash);
-  }
-  if (!equal_address) {
-    Lock lock(&ctx->racy_mtx);
-    ctx->racy_addresses.PushBack(ra0);
+  RacyStacks hash;
+  hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
+  hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
+  {
+    ReadLock lock(&ctx->racy_mtx);
+    if (FindRacyStacks(hash))
+      return true;
   }
-  return true;
+  Lock lock(&ctx->racy_mtx);
+  if (FindRacyStacks(hash))
+    return true;
+  ctx->racy_stacks.PushBack(hash);
+  return false;
 }
 
-static void AddRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2],
-                          uptr addr_min, uptr addr_max) {
-  Lock lock(&ctx->racy_mtx);
-  if (flags()->suppress_equal_stacks) {
-    RacyStacks hash;
-    hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
-    hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
-    ctx->racy_stacks.PushBack(hash);
+static bool FindRacyAddress(const RacyAddress &ra0) {
+  for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
+    RacyAddress ra2 = ctx->racy_addresses[i];
+    uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
+    uptr minend = min(ra0.addr_max, ra2.addr_max);
+    if (maxbeg < minend) {
+      VPrintf(2, "ThreadSanitizer: suppressing report as doubled (addr)\n");
+      return true;
+    }
   }
-  if (flags()->suppress_equal_addresses) {
-    RacyAddress ra0 = {addr_min, addr_max};
-    ctx->racy_addresses.PushBack(ra0);
+  return false;
+}
+
+static bool HandleRacyAddress(ThreadState *thr, uptr addr_min, uptr addr_max) {
+  if (!flags()->suppress_equal_addresses)
+    return false;
+  RacyAddress ra0 = {addr_min, addr_max};
+  {
+    ReadLock lock(&ctx->racy_mtx);
+    if (FindRacyAddress(ra0))
+      return true;
   }
+  Lock lock(&ctx->racy_mtx);
+  if (FindRacyAddress(ra0))
+    return true;
+  ctx->racy_addresses.PushBack(ra0);
+  return false;
 }
 
 bool OutputReport(ThreadState *thr, const ScopedReport &srep) {
@@ -618,6 +614,8 @@ void ReportRace(ThreadState *thr) {
     if (IsExpectedReport(addr_min, addr_max - addr_min))
       return;
   }
+  if (HandleRacyAddress(thr, addr_min, addr_max))
+    return;
 
   ReportType typ = ReportTypeRace;
   if (thr->is_vptr_access && freed)
@@ -668,7 +666,7 @@ void ReportRace(ThreadState *thr) {
   if (IsFiredSuppression(ctx, typ, traces[1]))
     return;
 
-  if (HandleRacyStacks(thr, traces, addr_min, addr_max))
+  if (HandleRacyStacks(thr, traces))
     return;
 
   // If any of the accesses has a tag, treat this as an "external" race.
@@ -711,7 +709,6 @@ void ReportRace(ThreadState *thr) {
   if (!OutputReport(thr, rep))
     return;
 
-  AddRacyStacks(thr, traces, addr_min, addr_max);
 }
 
 void PrintCurrentStack(ThreadState *thr, uptr pc) {
diff --git a/compiler-rt/lib/tsan/tests/rtl/tsan_test_util_posix.cpp b/compiler-rt/lib/tsan/tests/rtl/tsan_test_util_posix.cpp
index a24d04f470073..733e5d282a379 100644
--- a/compiler-rt/lib/tsan/tests/rtl/tsan_test_util_posix.cpp
+++ b/compiler-rt/lib/tsan/tests/rtl/tsan_test_util_posix.cpp
@@ -27,6 +27,8 @@
 #include 
 #include 
 
+#define CALLERPC (__builtin_return_address(0))
+
 using namespace __tsan;
 
 static __thread bool expect_report;
@@ -249,22 +251,42 @@ void ScopedThread::Impl::HandleEvent(Event *ev) {
   switch (ev->type) {
   case Event::READ:
   case Event::WRITE: {
-    void (*tsan_mop)(void *addr) = 0;
+    void (*tsan_mop)(void *addr, void *pc) = 0;
     if (ev->type == Event::READ) {
       switch (ev->arg /*size*/) {
-        case 1: tsan_mop = __tsan_read1; break;
-        case 2: tsan_mop = __tsan_read2; break;
-        case 4: tsan_mop = __tsan_read4; break;
-        case 8: tsan_mop = __tsan_read8; break;
-        case 16: tsan_mop = __tsan_read16; break;
+        case 1:
+          tsan_mop = __tsan_read1_pc;
+          break;
+        case 2:
+          tsan_mop = __tsan_read2_pc;
+          break;
+        case 4:
+          tsan_mop = __tsan_read4_pc;
+          break;
+        case 8:
+          tsan_mop = __tsan_read8_pc;
+          break;
+        case 16:
+          tsan_mop = __tsan_read16_pc;
+          break;
       }
     } else {
       switch (ev->arg /*size*/) {
-        case 1: tsan_mop = __tsan_write1; break;
-        case 2: tsan_mop = __tsan_write2; break;
-        case 4: tsan_mop = __tsan_write4; break;
-        case 8: tsan_mop = __tsan_write8; break;
-        case 16: tsan_mop = __tsan_write16; break;
+        case 1:
+          tsan_mop = __tsan_write1_pc;
+          break;
+        case 2:
+          tsan_mop = __tsan_write2_pc;
+          break;
+        case 4:
+          tsan_mop = __tsan_write4_pc;
+          break;
+        case 8:
+          tsan_mop = __tsan_write8_pc;
+          break;
+        case 16:
+          tsan_mop = __tsan_write16_pc;
+          break;
       }
     }
     CHECK_NE(tsan_mop, 0);
@@ -274,7 +296,7 @@ void ScopedThread::Impl::HandleEvent(Event *ev) {
     const int ErrCode = ECHRNG;
 #endif
     errno = ErrCode;
-    tsan_mop(ev->ptr);
+    tsan_mop(ev->ptr, (void *)ev->arg2);
     CHECK_EQ(ErrCode, errno);  // In no case must errno be changed.
     break;
   }
@@ -327,7 +349,7 @@ void ScopedThread::Impl::HandleEvent(Event *ev) {
 }
 
 void *ScopedThread::Impl::ScopedThreadCallback(void *arg) {
-  __tsan_func_entry(__builtin_return_address(0));
+  __tsan_func_entry(CALLERPC);
   Impl *impl = (Impl*)arg;
   for (;;) {
     Event* ev = (Event*)atomic_load(&impl->event, memory_order_acquire);
@@ -392,7 +414,8 @@ void ScopedThread::Detach() {
 
 void ScopedThread::Access(void *addr, bool is_write,
                           int size, bool expect_race) {
-  Event event(is_write ? Event::WRITE : Event::READ, addr, size);
+  Event event(is_write ? Event::WRITE : Event::READ, addr, size,
+              (uptr)CALLERPC);
   if (expect_race)
     event.ExpectReport(ReportTypeRace);
   impl_->send(&event);
diff --git a/compiler-rt/lib/ubsan/ubsan_checks.inc b/compiler-rt/lib/ubsan/ubsan_checks.inc
index 2c1529a7d92c5..846cd89ee19f8 100644
--- a/compiler-rt/lib/ubsan/ubsan_checks.inc
+++ b/compiler-rt/lib/ubsan/ubsan_checks.inc
@@ -37,6 +37,7 @@ UBSAN_CHECK(IntegerDivideByZero, "integer-divide-by-zero",
             "integer-divide-by-zero")
 UBSAN_CHECK(FloatDivideByZero, "float-divide-by-zero", "float-divide-by-zero")
 UBSAN_CHECK(InvalidBuiltin, "invalid-builtin-use", "invalid-builtin-use")
+UBSAN_CHECK(InvalidObjCCast, "invalid-objc-cast", "invalid-objc-cast")
 UBSAN_CHECK(ImplicitUnsignedIntegerTruncation,
             "implicit-unsigned-integer-truncation",
             "implicit-unsigned-integer-truncation")
diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.cpp b/compiler-rt/lib/ubsan/ubsan_handlers.cpp
index 7f6a46fb6cf08..e201e6bba2207 100644
--- a/compiler-rt/lib/ubsan/ubsan_handlers.cpp
+++ b/compiler-rt/lib/ubsan/ubsan_handlers.cpp
@@ -16,6 +16,7 @@
 #include "ubsan_diag.h"
 #include "ubsan_flags.h"
 #include "ubsan_monitor.h"
+#include "ubsan_value.h"
 
 #include "sanitizer_common/sanitizer_common.h"
 
@@ -640,6 +641,36 @@ void __ubsan::__ubsan_handle_invalid_builtin_abort(InvalidBuiltinData *Data) {
   Die();
 }
 
+static void handleInvalidObjCCast(InvalidObjCCast *Data, ValueHandle Pointer,
+                                  ReportOptions Opts) {
+  SourceLocation Loc = Data->Loc.acquire();
+  ErrorType ET = ErrorType::InvalidObjCCast;
+
+  if (ignoreReport(Loc, Opts, ET))
+    return;
+
+  ScopedReport R(Opts, Loc, ET);
+
+  const char *GivenClass = getObjCClassName(Pointer);
+  const char *GivenClassStr = GivenClass ? GivenClass : "";
+
+  Diag(Loc, DL_Error, ET,
+       "invalid ObjC cast, object is a '%0', but expected a %1")
+      << GivenClassStr << Data->ExpectedType;
+}
+
+void __ubsan::__ubsan_handle_invalid_objc_cast(InvalidObjCCast *Data,
+                                               ValueHandle Pointer) {
+  GET_REPORT_OPTIONS(false);
+  handleInvalidObjCCast(Data, Pointer, Opts);
+}
+void __ubsan::__ubsan_handle_invalid_objc_cast_abort(InvalidObjCCast *Data,
+                                                     ValueHandle Pointer) {
+  GET_REPORT_OPTIONS(true);
+  handleInvalidObjCCast(Data, Pointer, Opts);
+  Die();
+}
+
 static void handleNonNullReturn(NonNullReturnData *Data, SourceLocation *LocPtr,
                                 ReportOptions Opts, bool IsAttr) {
   if (!LocPtr)
diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.h b/compiler-rt/lib/ubsan/ubsan_handlers.h
index 22ca96422381c..219fb15de55fe 100644
--- a/compiler-rt/lib/ubsan/ubsan_handlers.h
+++ b/compiler-rt/lib/ubsan/ubsan_handlers.h
@@ -168,6 +168,14 @@ struct InvalidBuiltinData {
 /// Handle a builtin called in an invalid way.
 RECOVERABLE(invalid_builtin, InvalidBuiltinData *Data)
 
+struct InvalidObjCCast {
+  SourceLocation Loc;
+  const TypeDescriptor &ExpectedType;
+};
+
+/// Handle an invalid ObjC cast.
+RECOVERABLE(invalid_objc_cast, InvalidObjCCast *Data, ValueHandle Pointer)
+
 struct NonNullReturnData {
   SourceLocation AttrLoc;
 };
diff --git a/compiler-rt/lib/ubsan/ubsan_interface.inc b/compiler-rt/lib/ubsan/ubsan_interface.inc
index 1e44bc2171ded..94337d85017b4 100644
--- a/compiler-rt/lib/ubsan/ubsan_interface.inc
+++ b/compiler-rt/lib/ubsan/ubsan_interface.inc
@@ -27,6 +27,8 @@ INTERFACE_FUNCTION(__ubsan_handle_implicit_conversion)
 INTERFACE_FUNCTION(__ubsan_handle_implicit_conversion_abort)
 INTERFACE_FUNCTION(__ubsan_handle_invalid_builtin)
 INTERFACE_FUNCTION(__ubsan_handle_invalid_builtin_abort)
+INTERFACE_FUNCTION(__ubsan_handle_invalid_objc_cast)
+INTERFACE_FUNCTION(__ubsan_handle_invalid_objc_cast_abort)
 INTERFACE_FUNCTION(__ubsan_handle_load_invalid_value)
 INTERFACE_FUNCTION(__ubsan_handle_load_invalid_value_abort)
 INTERFACE_FUNCTION(__ubsan_handle_missing_return)
diff --git a/compiler-rt/lib/ubsan/ubsan_value.cpp b/compiler-rt/lib/ubsan/ubsan_value.cpp
index 60f0b5c993482..79c3ba991d398 100644
--- a/compiler-rt/lib/ubsan/ubsan_value.cpp
+++ b/compiler-rt/lib/ubsan/ubsan_value.cpp
@@ -16,9 +16,57 @@
 #include "ubsan_value.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+
+// TODO(dliew): Prefer '__APPLE__' here over 'SANITIZER_MAC', as the latter is
+// unclear. rdar://58124919 tracks using a more obviously portable guard.
+#if defined(__APPLE__)
+#include 
+#endif
 
 using namespace __ubsan;
 
+typedef const char *(*ObjCGetClassNameTy)(void *);
+
+const char *__ubsan::getObjCClassName(ValueHandle Pointer) {
+#if defined(__APPLE__)
+  // We need to query the ObjC runtime for some information, but do not want
+  // to introduce a static dependency from the ubsan runtime onto ObjC. Try to
+  // grab a handle to the ObjC runtime used by the process.
+  static bool AttemptedDlopen = false;
+  static void *ObjCHandle = nullptr;
+  static void *ObjCObjectGetClassName = nullptr;
+
+  // Prevent threads from racing to dlopen().
+  static __sanitizer::StaticSpinMutex Lock;
+  {
+    __sanitizer::SpinMutexLock Guard(&Lock);
+
+    if (!AttemptedDlopen) {
+      ObjCHandle = dlopen(
+          "/usr/lib/libobjc.A.dylib",
+          RTLD_LAZY         // Only bind symbols when used.
+              | RTLD_LOCAL  // Only make symbols available via the handle.
+              | RTLD_NOLOAD // Do not load the dylib, just grab a handle if the
+                            // image is already loaded.
+              | RTLD_FIRST  // Only search the image pointed-to by the handle.
+      );
+      AttemptedDlopen = true;
+      if (!ObjCHandle)
+        return nullptr;
+      ObjCObjectGetClassName = dlsym(ObjCHandle, "object_getClassName");
+    }
+  }
+
+  if (!ObjCObjectGetClassName)
+    return nullptr;
+
+  return ObjCGetClassNameTy(ObjCObjectGetClassName)((void *)Pointer);
+#else
+  return nullptr;
+#endif
+}
+
 SIntMax Value::getSIntValue() const {
   CHECK(getType().isSignedIntegerTy());
   if (isInlineInt()) {
diff --git a/compiler-rt/lib/ubsan/ubsan_value.h b/compiler-rt/lib/ubsan/ubsan_value.h
index a216e3a147e91..e0957276dd241 100644
--- a/compiler-rt/lib/ubsan/ubsan_value.h
+++ b/compiler-rt/lib/ubsan/ubsan_value.h
@@ -135,6 +135,9 @@ class TypeDescriptor {
 /// \brief An opaque handle to a value.
 typedef uptr ValueHandle;
 
+/// Returns the class name of the given ObjC object, or null if the name
+/// cannot be found.
+const char *getObjCClassName(ValueHandle Pointer);
 
 /// \brief Representation of an operand value provided by the instrumented code.
 ///
diff --git a/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp b/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp
index ed62ddd0fa348..8654c705cfbb0 100644
--- a/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp
+++ b/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp
@@ -109,6 +109,7 @@ HANDLER(vla_bound_not_positive, "vla-bound-not-positive")
 HANDLER(float_cast_overflow, "float-cast-overflow")
 HANDLER(load_invalid_value, "load-invalid-value")
 HANDLER(invalid_builtin, "invalid-builtin")
+HANDLER(invalid_objc_cast, "invalid-objc-cast")
 HANDLER(function_type_mismatch, "function-type-mismatch")
 HANDLER(implicit_conversion, "implicit-conversion")
 HANDLER(nonnull_arg, "nonnull-arg")
diff --git a/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmpeq_test.c b/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmpeq_test.c
index 9778e59184048..fb19e0c601919 100644
--- a/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmpeq_test.c
+++ b/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmpeq_test.c
@@ -1,6 +1,5 @@
 // REQUIRES: arm-target-arch || armv6m-target-arch
-// RUN: %arm_call_apsr -o %t.aspr.o
-// RUN: %clang_builtins %s %t.aspr.o %librt -o %t && %run %t
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 
 #include 
 #include 
diff --git a/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmple_test.c b/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmple_test.c
index 1feeac5ffd7e0..7dbf2acb28726 100644
--- a/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmple_test.c
+++ b/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmple_test.c
@@ -1,6 +1,5 @@
 // REQUIRES: arm-target-arch || armv6m-target-arch
-// RUN: %arm_call_apsr -o %t.aspr.o
-// RUN: %clang_builtins %s  %t.aspr.o %librt -o %t && %run %t
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 
 #include 
 #include 
diff --git a/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmpeq_test.c b/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmpeq_test.c
index 8da56071364b7..bfd67d1eee4d5 100644
--- a/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmpeq_test.c
+++ b/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmpeq_test.c
@@ -1,6 +1,5 @@
 // REQUIRES: arm-target-arch || armv6m-target-arch
-// RUN: %arm_call_apsr -o %t.aspr.o
-// RUN: %clang_builtins %s  %t.aspr.o %librt -o %t && %run %t
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 
 #include 
 #include 
diff --git a/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmple_test.c b/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmple_test.c
index a9358c4cf5f77..d80c45d224473 100644
--- a/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmple_test.c
+++ b/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmple_test.c
@@ -1,6 +1,5 @@
 // REQUIRES: arm-target-arch || armv6m-target-arch
-// RUN: %arm_call_apsr -o %t.aspr.o
-// RUN: %clang_builtins %s  %t.aspr.o %librt -o %t && %run %t
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 
 #include 
 #include 
diff --git a/compiler-rt/test/builtins/Unit/arm/call_apsr.S b/compiler-rt/test/builtins/Unit/arm/call_apsr.S
deleted file mode 100644
index 116f930c6e1e7..0000000000000
--- a/compiler-rt/test/builtins/Unit/arm/call_apsr.S
+++ /dev/null
@@ -1,29 +0,0 @@
-#include "../../../../lib/builtins/assembly.h"
-
-.syntax unified
-// __attribute__((pcs("aapcs")))
-// int32_t call_apsr_d(double a, double b, void(*fn)(double, double)) {
-//   fn(a, b);
-//   return apsr;
-// }
-
-DEFINE_COMPILERRT_PRIVATE_FUNCTION(call_apsr_d)
-    push {r7, lr}
-    ldr r7, [sp, #8]
-    blx r7
-    mrs r0, apsr
-    pop {r7, pc}
-END_COMPILERRT_FUNCTION(call_apsr_d)
-
-// __attribute__((pcs("aapcs")))
-// int32_t call_apsr_f(float a, float b, void(*fn)(float, float)) {
-//   fn(a, b);
-//   return apsr;
-// }
-
-DEFINE_COMPILERRT_PRIVATE_FUNCTION(call_apsr_f)
-    push {lr}
-    blx r2
-    mrs r0, apsr
-    pop {pc}
-END_COMPILERRT_FUNCTION(call_apsr_f)
diff --git a/compiler-rt/test/builtins/Unit/arm/call_apsr.h b/compiler-rt/test/builtins/Unit/arm/call_apsr.h
index fa81e892e3f00..87a7a74cb2a5e 100644
--- a/compiler-rt/test/builtins/Unit/arm/call_apsr.h
+++ b/compiler-rt/test/builtins/Unit/arm/call_apsr.h
@@ -16,10 +16,22 @@ union cpsr {
     uint32_t value;
 };
 
-extern __attribute__((pcs("aapcs")))
-uint32_t call_apsr_f(float a, float b, __attribute__((pcs("aapcs"))) void (*fn)(float, float));
+__attribute__((noinline, pcs("aapcs"))) static uint32_t call_apsr_f(float a, float b,
+                                                                    __attribute__((pcs("aapcs"))) void (*fn)(float, float)) {
+  uint32_t result;
+  fn(a, b);
+  asm volatile("mrs %0, apsr"
+               : "=r"(result));
+  return result;
+}
 
-extern __attribute__((pcs("aapcs")))
-uint32_t call_apsr_d(double a, double b, __attribute__((pcs("aapcs"))) void (*fn)(double, double));
+__attribute__((noinline, pcs("aapcs"))) static uint32_t call_apsr_d(double a, double b,
+                                                                    __attribute__((pcs("aapcs"))) void (*fn)(double, double)) {
+  uint32_t result;
+  fn(a, b);
+  asm volatile("mrs %0, apsr"
+               : "=r"(result));
+  return result;
+}
 
 #endif // CALL_APSR_H
diff --git a/compiler-rt/test/builtins/Unit/lit.cfg.py b/compiler-rt/test/builtins/Unit/lit.cfg.py
index c8888078be507..fa6dc86783d3e 100644
--- a/compiler-rt/test/builtins/Unit/lit.cfg.py
+++ b/compiler-rt/test/builtins/Unit/lit.cfg.py
@@ -87,10 +87,6 @@ def build_invocation(compile_flags):
   return " " + " ".join([clang_wrapper, config.clang] + compile_flags) + " "
 
 
-target_arch = config.target_arch
-if (target_arch == "arm"):
-  target_arch = "armv7"
-
 config.substitutions.append( ("%clang ", build_invocation(target_cflags)) )
 config.substitutions.append( ("%clangxx ", build_invocation(target_cxxflags)) )
 config.substitutions.append( ("%clang_builtins ", \
@@ -98,14 +94,6 @@ def build_invocation(compile_flags):
 config.substitutions.append( ("%clangxx_builtins ", \
                               build_invocation(clang_builtins_cxxflags)))
 
-# FIXME: move the call_apsr.s into call_apsr.h as inline-asm.
-# some ARM tests needs call_apsr.s
-call_apsr_source = os.path.join(builtins_lit_source_dir, 'arm', 'call_apsr.S')
-march_flag = '-march=' + target_arch
-call_apsr_flags = ['-c', march_flag, call_apsr_source]
-config.substitutions.append( ("%arm_call_apsr ", \
-                              build_invocation(call_apsr_flags)) )
-
 # Default test suffixes.
 config.suffixes = ['.c', '.cpp']
 
diff --git a/compiler-rt/test/gwp_asan/CMakeLists.txt b/compiler-rt/test/gwp_asan/CMakeLists.txt
index 95796521f31dc..de53ad5b7eed4 100644
--- a/compiler-rt/test/gwp_asan/CMakeLists.txt
+++ b/compiler-rt/test/gwp_asan/CMakeLists.txt
@@ -19,12 +19,8 @@ if (COMPILER_RT_INCLUDE_TESTS AND COMPILER_RT_HAS_GWP_ASAN AND NOT ANDROID)
   configure_lit_site_cfg(
     ${CMAKE_CURRENT_SOURCE_DIR}/unit/lit.site.cfg.py.in
     ${CMAKE_CURRENT_BINARY_DIR}/unit/lit.site.cfg.py)
-  add_lit_testsuite(check-gwp_asan-unit "Running GWP-ASan unit tests"
-    ${CMAKE_CURRENT_BINARY_DIR}/unit
-    DEPENDS ${GWP_ASAN_TEST_DEPS})
-  set_target_properties(check-gwp_asan-unit PROPERTIES FOLDER
-    "Compiler-RT Tests")
-    list(APPEND GWP_ASAN_TEST_DEPS check-gwp_asan-unit)
+  list(APPEND GWP_ASAN_TEST_DEPS ${GWP_ASAN_TEST_DEPS})
+  list(APPEND GWP_ASAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/unit)
 endif()
 
 configure_lit_site_cfg(
diff --git a/compiler-rt/test/gwp_asan/backtrace.c b/compiler-rt/test/gwp_asan/backtrace.c
new file mode 100644
index 0000000000000..0ba32f85cbf97
--- /dev/null
+++ b/compiler-rt/test/gwp_asan/backtrace.c
@@ -0,0 +1,29 @@
+// REQUIRES: gwp_asan
+// RUN: %clang_gwp_asan %s -g -o %t
+// RUN: %expect_crash %t 2>&1 | FileCheck %s
+
+#include 
+
+__attribute__((noinline)) void *allocate_mem() { return malloc(1); }
+
+__attribute__((noinline)) void free_mem(void *ptr) { free(ptr); }
+
+__attribute__((noinline)) void touch_mem(void *ptr) {
+  volatile char sink = *((volatile char *)ptr);
+}
+
+// CHECK: Use After Free
+// CHECK: touch_mem
+// CHECK: was deallocated
+// CHECK: free_mem
+// CHECK: was allocated
+// CHECK: allocate_mem
+
+int main() {
+  for (unsigned i = 0; i < 0x10000; ++i) {
+    void *ptr = allocate_mem();
+    free_mem(ptr);
+    touch_mem(ptr);
+  }
+  return 0;
+}
diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py
index 9d0c214bd9a76..7c98c387c8705 100644
--- a/compiler-rt/test/lit.common.cfg.py
+++ b/compiler-rt/test/lit.common.cfg.py
@@ -23,9 +23,6 @@
     # bash on Windows is usually very slow.
     execute_external = (not sys.platform in ['win32'])
 
-# Allow expanding substitutions that are based on other substitutions
-config.recursiveExpansionLimit = 10
-
 # Setup test format.
 config.test_format = lit.formats.ShTest(execute_external)
 if execute_external:
@@ -70,6 +67,8 @@
     # to link. In r19 and later we just use the default which is libc++.
     config.cxx_mode_flags.append('-stdlib=libstdc++')
 
+config.environment = dict(os.environ)
+
 # Clear some environment variables that might affect Clang.
 possibly_dangerous_env_vars = ['ASAN_OPTIONS', 'DFSAN_OPTIONS', 'LSAN_OPTIONS',
                                'MSAN_OPTIONS', 'UBSAN_OPTIONS',
diff --git a/compiler-rt/test/lsan/lit.common.cfg.py b/compiler-rt/test/lsan/lit.common.cfg.py
index 6b699b274c637..1d393880af6a9 100644
--- a/compiler-rt/test/lsan/lit.common.cfg.py
+++ b/compiler-rt/test/lsan/lit.common.cfg.py
@@ -70,7 +70,7 @@ def build_invocation(compile_flags):
 config.substitutions.append( ("%clangxx_lsan ", build_invocation(clang_lsan_cxxflags)) )
 
 # LeakSanitizer tests are currently supported on x86-64 Linux, PowerPC64 Linux, arm Linux, mips64 Linux, s390x Linux and x86_64 Darwin.
-supported_linux = config.host_os is 'Linux' and config.host_arch in ['x86_64', 'ppc64', 'ppc64le', 'mips64', 'arm', 'armhf', 'armv7l', 's390x']
+supported_linux = config.host_os == 'Linux' and config.host_arch in ['x86_64', 'ppc64', 'ppc64le', 'mips64', 'arm', 'armhf', 'armv7l', 's390x']
 supported_darwin = config.host_os == 'Darwin' and config.target_arch in ['x86_64']
 supported_netbsd = config.host_os == 'NetBSD' and config.target_arch in ['x86_64', 'i386']
 if not (supported_linux or supported_darwin or supported_netbsd):
diff --git a/compiler-rt/test/msan/Linux/sigandorset.cpp b/compiler-rt/test/msan/Linux/sigandorset.cpp
new file mode 100644
index 0000000000000..da983020a4c68
--- /dev/null
+++ b/compiler-rt/test/msan/Linux/sigandorset.cpp
@@ -0,0 +1,28 @@
+// RUN: %clangxx_msan -std=c++11 -O0 -g %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_msan -DLEFT_OK -std=c++11 -O0 -g %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_msan -DRIGHT_OK -std=c++11 -O0 -g %s -o %t && not %run %t 2<&1 | FileCheck %s
+// RUN: %clangxx_msan -DLEFT_OK -DRIGHT_OK -std=c++11 -O0 -g %s -o %t && %run %t
+// REQUIRES: !android
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+int main(void) {
+  sigset_t s, t, u;
+#ifdef LEFT_OK
+  sigemptyset(&t);
+#endif
+#ifdef RIGHT_OK
+  sigemptyset(&u);
+#endif
+
+  // CHECK:  MemorySanitizer: use-of-uninitialized-value
+  // CHECK-NEXT: in main {{.*}}sigandorset.cpp:[[@LINE+1]]
+  sigandset(&s, &t, &u);
+  sigorset(&s, &t, &u);
+  __msan_check_mem_is_initialized(&s, sizeof s);
+  return 0;
+}
diff --git a/compiler-rt/test/msan/__strxfrm_l.cpp b/compiler-rt/test/msan/__strxfrm_l.cpp
index c4eb10efb3e0b..9766d33056857 100644
--- a/compiler-rt/test/msan/__strxfrm_l.cpp
+++ b/compiler-rt/test/msan/__strxfrm_l.cpp
@@ -10,7 +10,7 @@
 extern "C" decltype(strxfrm_l) __strxfrm_l;
 
 int main(void) {
-  char q[10];
+  char q[100];
   locale_t loc = newlocale(LC_ALL_MASK, "", (locale_t)0);
   size_t n = __strxfrm_l(q, "qwerty", sizeof(q), loc);
   assert(n < sizeof(q));
diff --git a/compiler-rt/test/msan/strxfrm.cpp b/compiler-rt/test/msan/strxfrm.cpp
index 94b8c70240907..b4fee6f55c4c5 100644
--- a/compiler-rt/test/msan/strxfrm.cpp
+++ b/compiler-rt/test/msan/strxfrm.cpp
@@ -12,7 +12,7 @@ int main(void) {
   assert(n < sizeof(q));
   __msan_check_mem_is_initialized(q, n + 1);
 
-  locale_t loc = newlocale(LC_ALL_MASK, "", (locale_t)0);
+  locale_t loc = newlocale(LC_ALL_MASK, "C", (locale_t)0);
 
   __msan_poison(&q, sizeof(q));
   n = strxfrm_l(q, "qwerty", sizeof(q), loc);
diff --git a/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main.c b/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main.c
index 3f4a4f6cc6a63..416b90384c7d2 100644
--- a/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main.c
+++ b/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main.c
@@ -46,21 +46,21 @@ int main(int argc, char *argv[]) {
 #endif
 
   dlerror();
-  void (*gcov_flush1)() = (void (*)())dlsym(f1_handle, "__gcov_flush");
-  if (gcov_flush1 == NULL) {
-    fprintf(stderr, "unable to find __gcov_flush in func.shared': %s\n", dlerror());
+  void (*gcov_reset1)() = (void (*)())dlsym(f1_handle, "__gcov_reset");
+  if (gcov_reset1 == NULL) {
+    fprintf(stderr, "unable to find __gcov_reset in func.shared': %s\n", dlerror());
     return EXIT_FAILURE;
   }
 
   dlerror();
-  void (*gcov_flush2)() = (void (*)())dlsym(f2_handle, "__gcov_flush");
-  if (gcov_flush2 == NULL) {
-    fprintf(stderr, "unable to find __gcov_flush in func2.shared': %s\n", dlerror());
+  void (*gcov_reset2)() = (void (*)())dlsym(f2_handle, "__gcov_reset");
+  if (gcov_reset2 == NULL) {
+    fprintf(stderr, "unable to find __gcov_reset in func2.shared': %s\n", dlerror());
     return EXIT_FAILURE;
   }
 
-  if (gcov_flush1 == gcov_flush2) {
-    fprintf(stderr, "Same __gcov_flush found in func.shared and func2.shared\n");
+  if (gcov_reset1 == gcov_reset2) {
+    fprintf(stderr, "Same __gcov_reset found in func.shared and func2.shared\n");
     return EXIT_FAILURE;
   }
 
diff --git a/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main.c.gcov b/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main.c.gcov
deleted file mode 100644
index 2d538f63eb46b..0000000000000
--- a/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main.c.gcov
+++ /dev/null
@@ -1,91 +0,0 @@
-// CHECK:        -:    0:Source:{{.*}}Inputs/instrprof-dlopen-dlclose-main.c
-// CHECK-NEXT:        -:    0:Graph:instrprof-dlopen-dlclose-main.gcno
-// CHECK-NEXT:        -:    0:Data:instrprof-dlopen-dlclose-main.gcda
-// CHECK-NEXT:        -:    0:Runs:1
-// CHECK-NEXT:        -:    0:Programs:1
-// CHECK-NEXT:        -:    1:#include 
-// CHECK-NEXT:        -:    2:#include 
-// CHECK-NEXT:        -:    3:#include 
-// CHECK-NEXT:        -:    4:
-// CHECK-NEXT:        1:    5:int main(int argc, char *argv[]) {
-// CHECK-NEXT:        1:    6:  dlerror();
-// CHECK-NEXT:        1:    7:  void *f1_handle = dlopen("func.shared", RTLD_LAZY | RTLD_GLOBAL);
-// CHECK-NEXT:        1:    8:  if (f1_handle == NULL) {
-// CHECK-NEXT:    #####:    9:    fprintf(stderr, "unable to open 'func.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   10:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   11:  }
-// CHECK-NEXT:        -:   12:
-// CHECK-NEXT:        1:   13:  void (*func)(void) = (void (*)(void))dlsym(f1_handle, "func");
-// CHECK-NEXT:        1:   14:  if (func == NULL) {
-// CHECK-NEXT:    #####:   15:    fprintf(stderr, "unable to lookup symbol 'func': %s\n", dlerror());
-// CHECK-NEXT:    #####:   16:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   17:  }
-// CHECK-NEXT:        -:   18:
-// CHECK-NEXT:        1:   19:  dlerror();
-// CHECK-NEXT:        1:   20:  void *f2_handle = dlopen("func2.shared", RTLD_LAZY | RTLD_GLOBAL);
-// CHECK-NEXT:        1:   21:  if (f2_handle == NULL) {
-// CHECK-NEXT:    #####:   22:    fprintf(stderr, "unable to open 'func2.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   23:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   24:  }
-// CHECK-NEXT:        -:   25:
-// CHECK-NEXT:        1:   26:  void (*func2)(void) = (void (*)(void))dlsym(f2_handle, "func2");
-// CHECK-NEXT:        1:   27:  if (func2 == NULL) {
-// CHECK-NEXT:    #####:   28:    fprintf(stderr, "unable to lookup symbol 'func2': %s\n", dlerror());
-// CHECK-NEXT:    #####:   29:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   30:  }
-// CHECK-NEXT:        1:   31:  func2();
-// CHECK-NEXT:        -:   32:
-// CHECK-NEXT:        -:   33:#ifdef USE_LIB3
-// CHECK-NEXT:        -:   34:  void *f3_handle = dlopen("func3.shared", RTLD_LAZY | RTLD_GLOBAL);
-// CHECK-NEXT:        -:   35:  if (f3_handle == NULL) {
-// CHECK-NEXT:        -:   36:    fprintf(stderr, "unable to open 'func3.shared': %s\n", dlerror());
-// CHECK-NEXT:        -:   37:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   38:  }
-// CHECK-NEXT:        -:   39:
-// CHECK-NEXT:        -:   40:  void (*func3)(void) = (void (*)(void))dlsym(f3_handle, "func3");
-// CHECK-NEXT:        -:   41:  if (func3 == NULL) {
-// CHECK-NEXT:        -:   42:    fprintf(stderr, "unable to lookup symbol 'func3': %s\n", dlerror());
-// CHECK-NEXT:        -:   43:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   44:  }
-// CHECK-NEXT:        -:   45:  func3();
-// CHECK-NEXT:        -:   46:#endif
-// CHECK-NEXT:        -:   47:
-// CHECK-NEXT:        1:   48:  dlerror();
-// CHECK-NEXT:        1:   49:  void (*gcov_flush1)() = (void (*)())dlsym(f1_handle, "__gcov_flush");
-// CHECK-NEXT:        1:   50:  if (gcov_flush1 == NULL) {
-// CHECK-NEXT:    #####:   51:    fprintf(stderr, "unable to find __gcov_flush in func.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   52:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   53:  }
-// CHECK-NEXT:        -:   54:
-// CHECK-NEXT:        1:   55:  dlerror();
-// CHECK-NEXT:        1:   56:  void (*gcov_flush2)() = (void (*)())dlsym(f2_handle, "__gcov_flush");
-// CHECK-NEXT:        1:   57:  if (gcov_flush2 == NULL) {
-// CHECK-NEXT:    #####:   58:    fprintf(stderr, "unable to find __gcov_flush in func2.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   59:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   60:  }
-// CHECK-NEXT:        -:   61:
-// CHECK-NEXT:        1:   62:  if (gcov_flush1 == gcov_flush2) {
-// CHECK-NEXT:    #####:   63:    fprintf(stderr, "Same __gcov_flush found in func.shared and func2.shared\n");
-// CHECK-NEXT:    #####:   64:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   65:  }
-// CHECK-NEXT:        -:   66:
-// CHECK-NEXT:        1:   67:  dlerror();
-// CHECK-NEXT:        1:   68:  if (dlclose(f2_handle) != 0) {
-// CHECK-NEXT:    #####:   69:    fprintf(stderr, "unable to close 'func2.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   70:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   71:  }
-// CHECK-NEXT:        -:   72:
-// CHECK-NEXT:        1:   73:  func();
-// CHECK-NEXT:        -:   74:
-// CHECK-NEXT:        1:   75:  int g1 = 0;
-// CHECK-NEXT:        1:   76:  int g2 = 0;
-// CHECK-NEXT:        1:   77:  int n = 10;
-// CHECK-NEXT:        -:   78:
-// CHECK-NEXT:        1:   79:  if (n % 5 == 0)
-// CHECK-NEXT:        1:   80:    g1++;
-// CHECK-NEXT:        -:   81:  else
-// CHECK-NEXT:    #####:   82:    g2++;
-// CHECK-NEXT:        -:   83:
-// CHECK-NEXT:        1:   84:  return EXIT_SUCCESS;
-// CHECK-NEXT:        1:   85:}
-// CHECK-NEXT:        -:   86:
diff --git a/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main_three-libs.c.gcov b/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main_three-libs.c.gcov
deleted file mode 100644
index f1dd1757144f4..0000000000000
--- a/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main_three-libs.c.gcov
+++ /dev/null
@@ -1,91 +0,0 @@
-// CHECK:        -:    0:Source:{{.*}}Inputs/instrprof-dlopen-dlclose-main.c
-// CHECK-NEXT:        -:    0:Graph:instrprof-dlopen-dlclose-main.gcno
-// CHECK-NEXT:        -:    0:Data:instrprof-dlopen-dlclose-main.gcda
-// CHECK-NEXT:        -:    0:Runs:1
-// CHECK-NEXT:        -:    0:Programs:1
-// CHECK-NEXT:        -:    1:#include 
-// CHECK-NEXT:        -:    2:#include 
-// CHECK-NEXT:        -:    3:#include 
-// CHECK-NEXT:        -:    4:
-// CHECK-NEXT:        1:    5:int main(int argc, char *argv[]) {
-// CHECK-NEXT:        1:    6:  dlerror();
-// CHECK-NEXT:        1:    7:  void *f1_handle = dlopen("func.shared", RTLD_LAZY | RTLD_GLOBAL);
-// CHECK-NEXT:        1:    8:  if (f1_handle == NULL) {
-// CHECK-NEXT:    #####:    9:    fprintf(stderr, "unable to open 'func.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   10:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   11:  }
-// CHECK-NEXT:        -:   12:
-// CHECK-NEXT:        1:   13:  void (*func)(void) = (void (*)(void))dlsym(f1_handle, "func");
-// CHECK-NEXT:        1:   14:  if (func == NULL) {
-// CHECK-NEXT:    #####:   15:    fprintf(stderr, "unable to lookup symbol 'func': %s\n", dlerror());
-// CHECK-NEXT:    #####:   16:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   17:  }
-// CHECK-NEXT:        -:   18:
-// CHECK-NEXT:        1:   19:  dlerror();
-// CHECK-NEXT:        1:   20:  void *f2_handle = dlopen("func2.shared", RTLD_LAZY | RTLD_GLOBAL);
-// CHECK-NEXT:        1:   21:  if (f2_handle == NULL) {
-// CHECK-NEXT:    #####:   22:    fprintf(stderr, "unable to open 'func2.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   23:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   24:  }
-// CHECK-NEXT:        -:   25:
-// CHECK-NEXT:        1:   26:  void (*func2)(void) = (void (*)(void))dlsym(f2_handle, "func2");
-// CHECK-NEXT:        1:   27:  if (func2 == NULL) {
-// CHECK-NEXT:    #####:   28:    fprintf(stderr, "unable to lookup symbol 'func2': %s\n", dlerror());
-// CHECK-NEXT:    #####:   29:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   30:  }
-// CHECK-NEXT:        1:   31:  func2();
-// CHECK-NEXT:        -:   32:
-// CHECK-NEXT:        -:   33:#ifdef USE_LIB3
-// CHECK-NEXT:        1:   34:  void *f3_handle = dlopen("func3.shared", RTLD_LAZY | RTLD_GLOBAL);
-// CHECK-NEXT:        1:   35:  if (f3_handle == NULL) {
-// CHECK-NEXT:    #####:   36:    fprintf(stderr, "unable to open 'func3.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   37:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   38:  }
-// CHECK-NEXT:        -:   39:
-// CHECK-NEXT:        1:   40:  void (*func3)(void) = (void (*)(void))dlsym(f3_handle, "func3");
-// CHECK-NEXT:        1:   41:  if (func3 == NULL) {
-// CHECK-NEXT:    #####:   42:    fprintf(stderr, "unable to lookup symbol 'func3': %s\n", dlerror());
-// CHECK-NEXT:    #####:   43:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   44:  }
-// CHECK-NEXT:        1:   45:  func3();
-// CHECK-NEXT:        -:   46:#endif
-// CHECK-NEXT:        -:   47:
-// CHECK-NEXT:        1:   48:  dlerror();
-// CHECK-NEXT:        1:   49:  void (*gcov_flush1)() = (void (*)())dlsym(f1_handle, "__gcov_flush");
-// CHECK-NEXT:        1:   50:  if (gcov_flush1 == NULL) {
-// CHECK-NEXT:    #####:   51:    fprintf(stderr, "unable to find __gcov_flush in func.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   52:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   53:  }
-// CHECK-NEXT:        -:   54:
-// CHECK-NEXT:        1:   55:  dlerror();
-// CHECK-NEXT:        1:   56:  void (*gcov_flush2)() = (void (*)())dlsym(f2_handle, "__gcov_flush");
-// CHECK-NEXT:        1:   57:  if (gcov_flush2 == NULL) {
-// CHECK-NEXT:    #####:   58:    fprintf(stderr, "unable to find __gcov_flush in func2.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   59:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   60:  }
-// CHECK-NEXT:        -:   61:
-// CHECK-NEXT:        1:   62:  if (gcov_flush1 == gcov_flush2) {
-// CHECK-NEXT:    #####:   63:    fprintf(stderr, "Same __gcov_flush found in func.shared and func2.shared\n");
-// CHECK-NEXT:    #####:   64:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   65:  }
-// CHECK-NEXT:        -:   66:
-// CHECK-NEXT:        1:   67:  dlerror();
-// CHECK-NEXT:        1:   68:  if (dlclose(f2_handle) != 0) {
-// CHECK-NEXT:    #####:   69:    fprintf(stderr, "unable to close 'func2.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   70:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   71:  }
-// CHECK-NEXT:        -:   72:
-// CHECK-NEXT:        1:   73:  func();
-// CHECK-NEXT:        -:   74:
-// CHECK-NEXT:        1:   75:  int g1 = 0;
-// CHECK-NEXT:        1:   76:  int g2 = 0;
-// CHECK-NEXT:        1:   77:  int n = 10;
-// CHECK-NEXT:        -:   78:
-// CHECK-NEXT:        1:   79:  if (n % 5 == 0)
-// CHECK-NEXT:        1:   80:    g1++;
-// CHECK-NEXT:        -:   81:  else
-// CHECK-NEXT:    #####:   82:    g2++;
-// CHECK-NEXT:        -:   83:
-// CHECK-NEXT:        1:   84:  return EXIT_SUCCESS;
-// CHECK-NEXT:        1:   85:}
-// CHECK-NEXT:        -:   86:
diff --git a/compiler-rt/test/profile/Inputs/instrprof-dlopen-func.c.gcov b/compiler-rt/test/profile/Inputs/instrprof-dlopen-func.c.gcov
deleted file mode 100644
index 3af4ec94a1b01..0000000000000
--- a/compiler-rt/test/profile/Inputs/instrprof-dlopen-func.c.gcov
+++ /dev/null
@@ -1,6 +0,0 @@
-// CHECK:        -:    0:Source:{{.*}}Inputs/instrprof-dlopen-func.c
-// CHECK-NEXT:        -:    0:Graph:instrprof-dlopen-func.gcno
-// CHECK-NEXT:        -:    0:Data:instrprof-dlopen-func.gcda
-// CHECK-NEXT:        -:    0:Runs:1
-// CHECK-NEXT:        -:    0:Programs:1
-// CHECK-NEXT:        1:    1:void func(int K) {}
diff --git a/compiler-rt/test/profile/Inputs/instrprof-dlopen-func2.c.gcov b/compiler-rt/test/profile/Inputs/instrprof-dlopen-func2.c.gcov
deleted file mode 100644
index 7101f74b938d6..0000000000000
--- a/compiler-rt/test/profile/Inputs/instrprof-dlopen-func2.c.gcov
+++ /dev/null
@@ -1,6 +0,0 @@
-// CHECK:        -:    0:Source:{{.*}}Inputs/instrprof-dlopen-func2.c
-// CHECK-NEXT:        -:    0:Graph:instrprof-dlopen-func2.gcno
-// CHECK-NEXT:        -:    0:Data:instrprof-dlopen-func2.gcda
-// CHECK-NEXT:        -:    0:Runs:1
-// CHECK-NEXT:        -:    0:Programs:1
-// CHECK-NEXT:        1:    1:void func2(int K) {}
diff --git a/compiler-rt/test/profile/Inputs/instrprof-dlopen-func3.c.gcov b/compiler-rt/test/profile/Inputs/instrprof-dlopen-func3.c.gcov
deleted file mode 100644
index 7101f74b938d6..0000000000000
--- a/compiler-rt/test/profile/Inputs/instrprof-dlopen-func3.c.gcov
+++ /dev/null
@@ -1,6 +0,0 @@
-// CHECK:        -:    0:Source:{{.*}}Inputs/instrprof-dlopen-func2.c
-// CHECK-NEXT:        -:    0:Graph:instrprof-dlopen-func2.gcno
-// CHECK-NEXT:        -:    0:Data:instrprof-dlopen-func2.gcda
-// CHECK-NEXT:        -:    0:Runs:1
-// CHECK-NEXT:        -:    0:Programs:1
-// CHECK-NEXT:        1:    1:void func2(int K) {}
diff --git a/compiler-rt/test/profile/Posix/gcov-dlopen.c b/compiler-rt/test/profile/Posix/gcov-dlopen.c
new file mode 100644
index 0000000000000..0212979e87ffd
--- /dev/null
+++ b/compiler-rt/test/profile/Posix/gcov-dlopen.c
@@ -0,0 +1,87 @@
+/// atexit(3) not supported in dlopen(3)ed+dlclose(3)d DSO
+// XFAIL: netbsd
+
+// RUN: mkdir -p %t.d && cd %t.d
+
+// RUN: echo 'void func1(int k) {}' > func1.c
+// RUN: echo 'void func2(int k) {}' > func2.c
+// RUN: echo 'void func3(int k) {}' > func3.c
+// RUN: %clang --coverage -fPIC -shared func1.c -o func1.so
+// RUN: %clang --coverage -fPIC -shared func2.c -o func2.so
+// RUN: %clang --coverage -fPIC -shared func3.c -o func3.so
+// RUN: %clang --coverage -fPIC -rpath %t.d %s -o %t
+
+/// Test with two dlopened libraries.
+// RUN: rm -f gcov-dlopen.gcda func1.gcda func2.gcda
+// RUN: %run %t
+// RUN: llvm-cov gcov -t gcov-dlopen.gcda | FileCheck %s
+// RUN: llvm-cov gcov -t func1.gcda | FileCheck %s --check-prefix=FUNC1
+// RUN: llvm-cov gcov -t func2.gcda | FileCheck %s --check-prefix=FUNC2
+
+// FUNC1:     1:    1:void func1(int k) {}
+// FUNC2:     1:    1:void func2(int k) {}
+
+/// Test with three dlopened libraries.
+// RUN: %clang -DUSE_LIB3 --coverage -fPIC -rpath %t.d %s -o %t
+// RUN: rm -f gcov-dlopen.gcda func1.gcda func2.gcda func3.gcda
+// RUN: %run %t
+// RUN: llvm-cov gcov -t gcov-dlopen.gcda | FileCheck %s --check-prefix=LIB3
+// RUN: llvm-cov gcov -t func1.gcda | FileCheck %s --check-prefix=FUNC1
+// RUN: llvm-cov gcov -t func2.gcda | FileCheck %s --check-prefix=FUNC2
+// RUN: llvm-cov gcov -t func3.gcda | FileCheck %s --check-prefix=FUNC3
+
+// FUNC3:     1:    1:void func3(int k) {}
+
+#include 
+#include 
+#include 
+
+int main(int argc, char *argv[]) {
+  void *f1_handle = dlopen("func1.so", RTLD_LAZY | RTLD_GLOBAL);
+  if (f1_handle == NULL)
+    return fprintf(stderr, "unable to open 'func1.so': %s\n", dlerror());
+  void (*func1)(void) = (void (*)(void))dlsym(f1_handle, "func1");
+  if (func1 == NULL)
+    return fprintf(stderr, "unable to lookup symbol 'func1': %s\n", dlerror());
+
+  void *f2_handle = dlopen("func2.so", RTLD_LAZY | RTLD_GLOBAL);
+  if (f2_handle == NULL)
+    return fprintf(stderr, "unable to open 'func2.so': %s\n", dlerror());
+  void (*func2)(void) = (void (*)(void))dlsym(f2_handle, "func2");
+  if (func2 == NULL)
+    return fprintf(stderr, "unable to lookup symbol 'func2': %s\n", dlerror());
+  func2();
+
+#ifdef USE_LIB3
+// CHECK:          -: [[#@LINE+2]]:  void *f3_handle
+// LIB3:           1: [[#@LINE+1]]:  void *f3_handle
+  void *f3_handle = dlopen("func3.so", RTLD_LAZY | RTLD_GLOBAL);
+  if (f3_handle == NULL)
+    return fprintf(stderr, "unable to open 'func3.so': %s\n", dlerror());
+  void (*func3)(void) = (void (*)(void))dlsym(f3_handle, "func3");
+  if (func3 == NULL)
+    return fprintf(stderr, "unable to lookup symbol 'func3': %s\n", dlerror());
+  func3();
+#endif
+
+  void (*gcov_reset1)() = (void (*)())dlsym(f1_handle, "__gcov_reset");
+  if (gcov_reset1 == NULL)
+    return fprintf(stderr, "unable to find __gcov_reset in func1.so': %s\n", dlerror());
+  void (*gcov_reset2)() = (void (*)())dlsym(f2_handle, "__gcov_reset");
+  if (gcov_reset2 == NULL)
+    return fprintf(stderr, "unable to find __gcov_reset in func2.so': %s\n", dlerror());
+  if (gcov_reset1 == gcov_reset2)
+    return fprintf(stderr, "same __gcov_reset found in func1.so and func2.so\n");
+
+  /// Test that __gcov_dump is in the dynamic symbol table.
+  void (*gcov_dump1)() = (void (*)())dlsym(f1_handle, "__gcov_dump");
+  if (gcov_dump1 == NULL)
+    return fprintf(stderr, "unable to find __gcov_dump in func1.so': %s\n", dlerror());
+
+  if (dlclose(f2_handle) != 0)
+    return fprintf(stderr, "unable to close 'func2.so': %s\n", dlerror());
+
+  func1();
+
+  return 0;
+}
diff --git a/compiler-rt/test/profile/Posix/gcov-fork.c b/compiler-rt/test/profile/Posix/gcov-fork.c
index 4942d5ac92888..022ce716a8dcb 100644
--- a/compiler-rt/test/profile/Posix/gcov-fork.c
+++ b/compiler-rt/test/profile/Posix/gcov-fork.c
@@ -1,10 +1,6 @@
 /// A basic block with fork/exec* is split. .gcda is flushed immediately before
 /// fork/exec* so the lines before fork are counted once while succeeding
 /// lines are counted twice.
-// UNSUPPORTED: darwin
-/// FIXME: http://lab.llvm.org:8011/builders/clang-ppc64be-linux/builds/50913
-// UNSUPPORTED: host-byteorder-big-endian
-
 // RUN: mkdir -p %t.d && cd %t.d
 // RUN: %clang --coverage %s -o %t
 // RUN: test -f gcov-fork.gcno
@@ -17,8 +13,12 @@
 void func1() {}                    // CHECK:      1: [[#@LINE]]:void func1()
 void func2() {}                    // CHECK-NEXT: 2: [[#@LINE]]:
 int main(void) {                   // CHECK-NEXT: 1: [[#@LINE]]:
+  int status;                      // CHECK-NEXT: -: [[#@LINE]]:
   func1();                         // CHECK-NEXT: 1: [[#@LINE]]:
-  if (fork() == -1) return 1;      // CHECK-NEXT: 1: [[#@LINE]]:
+  pid_t pid = fork();              // CHECK-NEXT: 1: [[#@LINE]]:
+  if (pid == -1) return 1;         // CHECK-NEXT: 2: [[#@LINE]]:
+  if (pid)                         // CHECK-NEXT: 2: [[#@LINE]]:
+    wait(&status);                 // CHECK-NEXT: 1: [[#@LINE]]:
   func2();                         // CHECK-NEXT: 2: [[#@LINE]]:
   return 0;                        // CHECK-NEXT: 2: [[#@LINE]]:
 }
diff --git a/compiler-rt/test/profile/Posix/gcov-shared-flush.c b/compiler-rt/test/profile/Posix/gcov-shared-flush.c
index 97d44ad5204e1..494fb9be761db 100644
--- a/compiler-rt/test/profile/Posix/gcov-shared-flush.c
+++ b/compiler-rt/test/profile/Posix/gcov-shared-flush.c
@@ -7,7 +7,7 @@
 // RUN: %clang --coverage -fPIC -shared shared.c -o libfunc.so
 // RUN: test -f shared.gcno
 
-/// Test the case where we exit abruptly after calling __gcov_flush, which means we don't write out the counters at exit.
+/// Test the case where we exit abruptly after calling __gcov_dump, which means we don't write out the counters at exit.
 // RUN: %clang -DEXIT_ABRUPTLY -DSHARED_CALL_BEFORE_FLUSH -DSHARED_CALL_AFTER_FLUSH --coverage %s -L%t.d -rpath %t.d -lfunc -o %t
 // RUN: test -f gcov-shared-flush.gcno
 
@@ -21,7 +21,7 @@
 
 // SHARED: 1: {{[[0-9]+}}:void foo(int n)
 
-/// Test the case where we exit normally and we have a call to the shared library function before __gcov_flush.
+/// Test the case where we exit normally and we have a call to the shared library function before __gcov_dump.
 // RUN: %clang -DSHARED_CALL_BEFORE_FLUSH --coverage %s -L%t.d -rpath %t.d -lfunc -o %t
 // RUN: test -f gcov-shared-flush.gcno
 
@@ -32,14 +32,15 @@
 
 // BEFORE:      -: {{[0-9]+}}:#ifdef SHARED_CALL_BEFORE_FLUSH
 // BEFORE-NEXT: 1: {{[0-9]+}}:  foo(1);
-// BEFORE:      1: {{[0-9]+}}:  __gcov_flush();
+// BEFORE:      1: {{[0-9]+}}:  __gcov_dump();
+// BEFORE-NEXT: 1: {{[0-9]+}}:  __gcov_reset();
 // BEFORE:      -: {{[0-9]+}}:#ifdef SHARED_CALL_AFTER_FLUSH
 // BEFORE-NEXT: -: {{[0-9]+}}:  foo(1);
 // BEFORE:      1: {{[0-9]+}}:  bar(5);
 
 // SHARED_ONCE: 1: {{[0-9]+}}:void foo(int n)
 
-// # Test the case where we exit normally and we have a call to the shared library function after __gcov_flush.
+// # Test the case where we exit normally and we have a call to the shared library function after __gcov_dump.
 // RUN: %clang -DSHARED_CALL_AFTER_FLUSH --coverage %s -L%t.d -rpath %t.d -lfunc -o %t
 // RUN: test -f gcov-shared-flush.gcno
 
@@ -50,12 +51,13 @@
 
 // AFTER:      -: {{[0-9]+}}:#ifdef SHARED_CALL_BEFORE_FLUSH
 // AFTER-NEXT: -: {{[0-9]+}}:  foo(1);
-// AFTER:      1: {{[0-9]+}}:  __gcov_flush();
+// AFTER:      1: {{[0-9]+}}:  __gcov_dump();
+// AFTER-NEXT: 1: {{[0-9]+}}:  __gcov_reset();
 // AFTER:      -: {{[0-9]+}}:#ifdef SHARED_CALL_AFTER_FLUSH
 // AFTER-NEXT: 1: {{[0-9]+}}:  foo(1);
 // AFTER:      1: {{[0-9]+}}:  bar(5);
 
-// # Test the case where we exit normally and we have calls to the shared library function before and after __gcov_flush.
+// # Test the case where we exit normally and we have calls to the shared library function before and after __gcov_dump.
 // RUN: %clang -DSHARED_CALL_BEFORE_FLUSH -DSHARED_CALL_AFTER_FLUSH --coverage %s -L%t.d -rpath %t.d -lfunc -o %t
 // RUN: test -f gcov-shared-flush.gcno
 
@@ -66,7 +68,8 @@
 
 // BEFORE_AFTER:      -: {{[0-9]+}}:#ifdef SHARED_CALL_BEFORE_FLUSH
 // BEFORE_AFTER-NEXT: 1: {{[0-9]+}}:  foo(1);
-// BEFORE_AFTER:      1: {{[0-9]+}}:  __gcov_flush();
+// BEFORE_AFTER:      1: {{[0-9]+}}:  __gcov_dump();
+// BEFORE_AFTER-NEXT: 1: {{[0-9]+}}:  __gcov_reset();
 // BEFORE_AFTER:      -: {{[0-9]+}}:#ifdef SHARED_CALL_AFTER_FLUSH
 // BEFORE_AFTER-NEXT: 1: {{[0-9]+}}:  foo(1);
 // BEFORE_AFTER:      1: {{[0-9]+}}:  bar(5);
@@ -78,7 +81,8 @@ void foo(int n) {
 }
 #else
 extern void foo(int n);
-extern void __gcov_flush(void);
+extern void __gcov_dump(void);
+extern void __gcov_reset(void);
 
 int bar1 = 0;
 int bar2 = 1;
@@ -96,7 +100,8 @@ int main(int argc, char *argv[]) {
 #endif
 
   bar(5);
-  __gcov_flush();
+  __gcov_dump();
+  __gcov_reset();
   bar(5);
 
 #ifdef SHARED_CALL_AFTER_FLUSH
diff --git a/compiler-rt/test/profile/Posix/instrprof-dlopen-dlclose-gcov.test b/compiler-rt/test/profile/Posix/instrprof-dlopen-dlclose-gcov.test
deleted file mode 100644
index b845303a8afdc..0000000000000
--- a/compiler-rt/test/profile/Posix/instrprof-dlopen-dlclose-gcov.test
+++ /dev/null
@@ -1,33 +0,0 @@
-# atexit(3) not supported in dlopen(3)ed+dlclose(3)d DSO
-XFAIL: netbsd
-
-RUN: mkdir -p %t.d
-RUN: cd %t.d
-
-RUN: %clang --coverage -o func.shared -fPIC -shared %S/../Inputs/instrprof-dlopen-func.c
-RUN: %clang --coverage -o func2.shared -fPIC -shared %S/../Inputs/instrprof-dlopen-func2.c
-RUN: %clang --coverage -o func3.shared -fPIC -shared %S/../Inputs/instrprof-dlopen-func3.c
-RUN: %clang --coverage -o %t -fPIC -rpath %t.d %S/../Inputs/instrprof-dlopen-dlclose-main.c
-
-# Test with two dlopened libraries.
-RUN: rm -f instrprof-dlopen-dlclose-main.gcda instrprof-dlopen-func.gcda instrprof-dlopen-func2.gcda
-RUN: %run %t
-RUN: llvm-cov gcov instrprof-dlopen-dlclose-main.gcda
-RUN: FileCheck --match-full-lines --strict-whitespace --input-file instrprof-dlopen-dlclose-main.c.gcov %S/../Inputs/instrprof-dlopen-dlclose-main.c.gcov
-RUN: llvm-cov gcov instrprof-dlopen-func.gcda
-RUN: FileCheck --match-full-lines --strict-whitespace --input-file instrprof-dlopen-func.c.gcov %S/../Inputs/instrprof-dlopen-func.c.gcov
-RUN: llvm-cov gcov instrprof-dlopen-func2.gcda
-RUN: FileCheck --match-full-lines --strict-whitespace --input-file instrprof-dlopen-func2.c.gcov %S/../Inputs/instrprof-dlopen-func2.c.gcov
-
-# Test with three dlopened libraries.
-RUN: %clang -DUSE_LIB3 --coverage -o %t -fPIC -rpath %t.d %S/../Inputs/instrprof-dlopen-dlclose-main.c
-RUN: rm -f instrprof-dlopen-dlclose-main.gcda instrprof-dlopen-func.gcda instrprof-dlopen-func2.gcda instrprof-dlopen-func3.gcda
-RUN: %run %t
-RUN: llvm-cov gcov instrprof-dlopen-dlclose-main.gcda
-RUN: FileCheck --match-full-lines --strict-whitespace --input-file instrprof-dlopen-dlclose-main.c.gcov %S/../Inputs/instrprof-dlopen-dlclose-main_three-libs.c.gcov
-RUN: llvm-cov gcov instrprof-dlopen-func.gcda
-RUN: FileCheck --match-full-lines --strict-whitespace --input-file instrprof-dlopen-func.c.gcov %S/../Inputs/instrprof-dlopen-func.c.gcov
-RUN: llvm-cov gcov instrprof-dlopen-func2.gcda
-RUN: FileCheck --match-full-lines --strict-whitespace --input-file instrprof-dlopen-func2.c.gcov %S/../Inputs/instrprof-dlopen-func2.c.gcov
-RUN: llvm-cov gcov instrprof-dlopen-func3.gcda
-RUN: FileCheck --match-full-lines --strict-whitespace --input-file instrprof-dlopen-func2.c.gcov %S/../Inputs/instrprof-dlopen-func3.c.gcov
diff --git a/compiler-rt/test/profile/gcov-__gcov_flush-terminate.c b/compiler-rt/test/profile/gcov-__gcov_flush-terminate.c
index 5303e045063e8..649538dc2aaf1 100644
--- a/compiler-rt/test/profile/gcov-__gcov_flush-terminate.c
+++ b/compiler-rt/test/profile/gcov-__gcov_flush-terminate.c
@@ -10,11 +10,13 @@
 // CHECK:             -:    0:Runs:1
 // CHECK-NEXT:        -:    0:Programs:1
 
-void __gcov_flush(void);
+void __gcov_dump(void);
+void __gcov_reset(void);
 
 int main(void) {                   // CHECK:      1: [[#@LINE]]:int main(void)
   int i = 22;                      // CHECK-NEXT: 1: [[#@LINE]]:
-  __gcov_flush();                  // CHECK-NEXT: 1: [[#@LINE]]:
+  __gcov_dump();                   // CHECK-NEXT: 1: [[#@LINE]]:
+  __gcov_reset();                  // CHECK-NEXT: 1: [[#@LINE]]:
   i = 42;                          // CHECK-NEXT: 1: [[#@LINE]]:
   __builtin_trap();                // CHECK-NEXT: 1: [[#@LINE]]:
   i = 84;                          // CHECK-NEXT: 1: [[#@LINE]]:
diff --git a/compiler-rt/test/profile/gcov-dump-and-remove.c b/compiler-rt/test/profile/gcov-dump-and-remove.c
index 1dcf7b5bd5ca8..b7f80535aada3 100644
--- a/compiler-rt/test/profile/gcov-dump-and-remove.c
+++ b/compiler-rt/test/profile/gcov-dump-and-remove.c
@@ -8,16 +8,19 @@
 // RUN: rm -f gcov-dump-and-remove.gcda && %run %t
 // RUN: llvm-cov gcov -t gcov-dump-and-remove.gcda | FileCheck %s
 
-extern void __gcov_flush(void);
+extern void __gcov_dump(void);
+extern void __gcov_reset(void);
 extern int remove(const char *);   // CHECK:          -: [[#@LINE]]:extern int remove
 int main(void) {                   // CHECK-NEXT: #####: [[#@LINE]]:
-  __gcov_flush();                  // CHECK-NEXT: #####: [[#@LINE]]:
+  __gcov_dump();                   // CHECK-NEXT: #####: [[#@LINE]]:
+  __gcov_reset();                  // CHECK-NEXT: #####: [[#@LINE]]:
   if (remove("gcov-dump-and-remove.gcda") != 0) // CHECK-NEXT: #####: [[#@LINE]]:
     return 1;                      // CHECK-NEXT: #####: [[#@LINE]]: return 1;
                                    // CHECK-NEXT:     -: [[#@LINE]]:
-  __gcov_flush();                  // CHECK-NEXT: #####: [[#@LINE]]:
-  __gcov_flush();                  // CHECK-NEXT: #####: [[#@LINE]]:
-  if (remove("gcov-dump-and-remove.gcda") != 0) // CHECK-NEXT: #####: [[#@LINE]]:
+  __gcov_dump();                   // CHECK-NEXT:     1: [[#@LINE]]:
+  __gcov_reset();                  // CHECK-NEXT:     1: [[#@LINE]]:
+  __gcov_dump();                   // CHECK-NEXT:     1: [[#@LINE]]:
+  if (remove("gcov-dump-and-remove.gcda") != 0) // CHECK-NEXT:     1: [[#@LINE]]:
     return 1;                      // CHECK-NEXT: #####: [[#@LINE]]: return 1;
 
   return 0;
diff --git a/compiler-rt/test/profile/instrprof-lto-pgogen.c b/compiler-rt/test/profile/instrprof-lto-pgogen.c
new file mode 100644
index 0000000000000..99870c70bef37
--- /dev/null
+++ b/compiler-rt/test/profile/instrprof-lto-pgogen.c
@@ -0,0 +1,13 @@
+// REQUIRES: lto
+// XFAIL: msvc
+
+// RUN: %clang_pgogen=%t.profraw -flto %s -o %t
+// RUN: %run %t
+// RUN: llvm-profdata merge %t.profraw -o %t.profdata
+// RUN: llvm-profdata show %t.profdata | FileCheck %s
+
+// Testing a bug that happens when trying to generate IR
+// profile with BFD linker + LTO plugin
+
+// CHECK: Instrumentation level: IR
+int main() { return 0; }
diff --git a/compiler-rt/test/profile/instrprof-version-mismatch.c b/compiler-rt/test/profile/instrprof-version-mismatch.c
index c63b299c76d73..81ae52119693f 100644
--- a/compiler-rt/test/profile/instrprof-version-mismatch.c
+++ b/compiler-rt/test/profile/instrprof-version-mismatch.c
@@ -1,9 +1,6 @@
 // RUN: %clang_profgen -o %t -O3 %s
 // RUN: %run %t 1 2>&1 | FileCheck %s
 
-// FIXME: Weak symbols are once again a portability problem for Windows.
-// XFAIL: windows
-
 // override the version variable with a bogus version:
 unsigned long long __llvm_profile_raw_version = 10000;
 int main(int argc, const char *argv[]) {
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/getcpuclockid.c b/compiler-rt/test/sanitizer_common/TestCases/Linux/getcpuclockid.c
new file mode 100644
index 0000000000000..6999a80b638e5
--- /dev/null
+++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/getcpuclockid.c
@@ -0,0 +1,20 @@
+// RUN: %clang %s -Wl,-as-needed -o %t && %run %t
+#include 
+#include 
+#include 
+
+long cpu_ns() {
+  clockid_t clk;
+  struct timespec ts;
+  int res = clock_getcpuclockid(getpid(), &clk);
+  assert(!res);
+  res = clock_gettime(clk, &ts);
+  assert(!res);
+  return ts.tv_nsec;
+}
+
+int main() {
+  long cpuns = cpu_ns();
+  asm volatile ("" :: "r"(cpuns));
+  return 0;
+}
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/protoent.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/protoent.cpp
index a1a93badf6b81..003790067d1b1 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/Linux/protoent.cpp
+++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/protoent.cpp
@@ -1,10 +1,16 @@
-// RUN: %clangxx -std=c++11 -O0 -g %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx -std=c++11 -O0 -g %s -o %t
+// RUN: %clangxx -fno-sanitize=all -std=c++11 -O0 -g %s -o %t.nosan
+// RUN: diff <(%run %t 2>&1) <(%run %t.nosan 2>&1)
 // REQUIRES: !android
 
 #include 
 #include 
 #include 
 #include 
+#include 
+
+std::string any_name;
+int total_count;
 
 void print_protoent(protoent *curr_entry) {
   fprintf(stderr, "%s (%d)\n", curr_entry->p_name, curr_entry->p_proto);
@@ -21,6 +27,8 @@ void print_all_protoent() {
   protoent *curr_entry;
 
   while (getprotoent_r(&entry, buf, sizeof(buf), &curr_entry) != ENOENT && curr_entry) {
+    ++total_count;
+    any_name = curr_entry->p_name;
     print_protoent(curr_entry);
   }
 }
@@ -46,24 +54,16 @@ void print_protoent_by_num(int num) {
 }
 
 int main() {
-  // CHECK: All protoent
-  // CHECK: ip (0)
-  // CHECK-NEXT: alias IP
-  // CHECK: ipv6 (41)
-  // CHECK-NEXT: alias IPv6
   fprintf(stderr, "All protoent\n");
   print_all_protoent();
 
-  // CHECK: Protoent by name
-  // CHECK-NEXT: ipv6 (41)
-  // CHECK-NEXT: alias IPv6
+  if (!total_count)
+    return 0;
+
   fprintf(stderr, "Protoent by name\n");
-  print_protoent_by_name("ipv6");
+  print_protoent_by_name(any_name.c_str());
 
-  // CHECK: Protoent by num
-  // CHECK-NEXT: udp (17)
-  // CHECK-NEXT: alias UDP
   fprintf(stderr, "Protoent by num\n");
-  print_protoent_by_num(17);
+  print_protoent_by_num(total_count / 2);
   return 0;
 }
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_send.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_send.cpp
new file mode 100644
index 0000000000000..84084b9291a70
--- /dev/null
+++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_send.cpp
@@ -0,0 +1,78 @@
+// RUN: %clangxx -std=c++11 -O0 -g %s -o %t && %run %t 2>&1 | FileCheck %s
+
+// sigandset is glibc specific.
+// UNSUPPORTED: android, freebsd, netbsd
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+sigset_t mkset(int n, ...) {
+  sigset_t s;
+  int res = 0;
+  res |= sigemptyset(&s);
+  va_list va;
+  va_start(va, n);
+  while (n--) {
+    res |= sigaddset(&s, va_arg(va, int));
+  }
+  va_end(va);
+  assert(!res);
+  return s;
+}
+
+sigset_t sigset_or(sigset_t first, sigset_t second) {
+  sigset_t out;
+  int res = sigorset(&out, &first, &second);
+  assert(!res);
+  return out;
+}
+
+sigset_t sigset_and(sigset_t first, sigset_t second) {
+  sigset_t out;
+  int res = sigandset(&out, &first, &second);
+  assert(!res);
+  return out;
+}
+
+int fork_and_signal(sigset_t s) {
+  if (pid_t pid = fork()) {
+    kill(pid, SIGUSR1);
+    kill(pid, SIGUSR2);
+    int child_stat;
+    wait(&child_stat);
+    return !WIFEXITED(child_stat);
+  } else {
+    int sig;
+    int res = sigwait(&s, &sig);
+    assert(!res);
+    fprintf(stderr, "died with sig %d\n", sig);
+    _exit(0);
+  }
+}
+
+void test_sigwait() {
+  // test sigorset... s should now contain SIGUSR1 | SIGUSR2
+  sigset_t s = sigset_or(mkset(1, SIGUSR1), mkset(1, SIGUSR2));
+  sigprocmask(SIG_BLOCK, &s, 0);
+  int res;
+  res = fork_and_signal(s);
+  fprintf(stderr, "fork_and_signal with SIGUSR1,2: %d\n", res);
+  // CHECK: died with sig 10
+  // CHECK: fork_and_signal with SIGUSR1,2: 0
+
+  // test sigandset... s should only have SIGUSR2 now
+  s = sigset_and(s, mkset(1, SIGUSR2));
+  res = fork_and_signal(s);
+  fprintf(stderr, "fork_and_signal with SIGUSR2: %d\n", res);
+  // CHECK: died with sig 12
+  // CHECK: fork_and_signal with SIGUSR2: 0
+}
+
+int main(void) {
+  test_sigwait();
+  return 0;
+}
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/strxfrm.c b/compiler-rt/test/sanitizer_common/TestCases/Posix/strxfrm.c
index c28eb65b7d4f0..d08af1b3565fd 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/Posix/strxfrm.c
+++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/strxfrm.c
@@ -3,16 +3,16 @@
 
 #include 
 #include 
-#include 
+#include 
 
 int main(int argc, char **argv) {
   char q[10];
   size_t n = strxfrm(q, "abcdef", sizeof(q));
   assert(n < sizeof(q));
 
-  char q2[10];
+  char q2[100];
   locale_t loc = newlocale(LC_ALL_MASK, "", (locale_t)0);
-  n = strxfrm_l(q2, L"qwerty", sizeof(q), loc);
+  n = strxfrm_l(q2, "qwerty", sizeof(q2), loc);
   assert(n < sizeof(q2));
 
   freelocale(loc);
diff --git a/compiler-rt/test/sanitizer_common/android_commands/android_compile.py b/compiler-rt/test/sanitizer_common/android_commands/android_compile.py
index 4b880886b0c1e..a57bc311bd522 100755
--- a/compiler-rt/test/sanitizer_common/android_commands/android_compile.py
+++ b/compiler-rt/test/sanitizer_common/android_commands/android_compile.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import os, sys, subprocess
 from android_common import *
diff --git a/compiler-rt/test/sanitizer_common/android_commands/android_run.py b/compiler-rt/test/sanitizer_common/android_commands/android_run.py
index 8a97aa5f7b1b3..41a587cb404c5 100755
--- a/compiler-rt/test/sanitizer_common/android_commands/android_run.py
+++ b/compiler-rt/test/sanitizer_common/android_commands/android_run.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import os, signal, sys, subprocess, tempfile
 from android_common import *
diff --git a/compiler-rt/test/sanitizer_common/ios_commands/iossim_compile.py b/compiler-rt/test/sanitizer_common/ios_commands/iossim_compile.py
index 8fa480ed5f601..e4c50d7949a29 100755
--- a/compiler-rt/test/sanitizer_common/ios_commands/iossim_compile.py
+++ b/compiler-rt/test/sanitizer_common/ios_commands/iossim_compile.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import os, sys, subprocess
 
diff --git a/compiler-rt/test/sanitizer_common/ios_commands/iossim_env.py b/compiler-rt/test/sanitizer_common/ios_commands/iossim_env.py
index 28f626900f0bf..78d197483df1b 100755
--- a/compiler-rt/test/sanitizer_common/ios_commands/iossim_env.py
+++ b/compiler-rt/test/sanitizer_common/ios_commands/iossim_env.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import os, sys, subprocess
 
diff --git a/compiler-rt/test/sanitizer_common/ios_commands/iossim_prepare.py b/compiler-rt/test/sanitizer_common/ios_commands/iossim_prepare.py
index ad1b922778757..cbcc6029fc1a2 100755
--- a/compiler-rt/test/sanitizer_common/ios_commands/iossim_prepare.py
+++ b/compiler-rt/test/sanitizer_common/ios_commands/iossim_prepare.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import json
 
diff --git a/compiler-rt/test/sanitizer_common/ios_commands/iossim_run.py b/compiler-rt/test/sanitizer_common/ios_commands/iossim_run.py
index 8af3eec441f2f..5ad15af3a33e1 100755
--- a/compiler-rt/test/sanitizer_common/ios_commands/iossim_run.py
+++ b/compiler-rt/test/sanitizer_common/ios_commands/iossim_run.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import glob, os, pipes, sys, subprocess
 
diff --git a/compiler-rt/test/ubsan/TestCases/Misc/objc-cast.m b/compiler-rt/test/ubsan/TestCases/Misc/objc-cast.m
new file mode 100644
index 0000000000000..f502e5f535372
--- /dev/null
+++ b/compiler-rt/test/ubsan/TestCases/Misc/objc-cast.m
@@ -0,0 +1,27 @@
+// REQUIRES: darwin
+//
+// RUN: %clang -framework Foundation -fsanitize=objc-cast %s -O1 -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+//
+// RUN: %clang -framework Foundation -fsanitize=objc-cast -fno-sanitize-recover=objc-cast %s -O1 -o %t.trap
+// RUN: not %run %t.trap 2>&1 | FileCheck %s
+
+#include 
+
+int main() {
+  NSArray *arrayOfInt = [NSArray arrayWithObjects:@1, @2, @3, (void *)0];
+  // CHECK: objc-cast.m:[[@LINE+1]]:{{.*}}: runtime error: invalid ObjC cast, object is a '__NSCFNumber', but expected a 'NSString'
+  for (NSString *str in arrayOfInt) {
+    NSLog(@"%@", str);
+  }
+
+  NSArray *arrayOfStr = [NSArray arrayWithObjects:@"a", @"b", @"c", (void *)0];
+  for (NSString *str in arrayOfStr) {
+    NSLog(@"%@", str);
+  }
+
+  // The diagnostic should only be printed once.
+  // CHECK-NOT: runtime error
+
+  return 0;
+}
diff --git a/debuginfo-tests/dexter/dexter.py b/debuginfo-tests/dexter/dexter.py
index 8190a4b4e22ed..49ba85db43d9c 100755
--- a/debuginfo-tests/dexter/dexter.py
+++ b/debuginfo-tests/dexter/dexter.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # DExTer : Debugging Experience Tester
 # ~~~~~~   ~         ~~         ~   ~~
 #
diff --git a/debuginfo-tests/llgdb-tests/llgdb.py b/debuginfo-tests/llgdb-tests/llgdb.py
index 5f14497f628f1..83b5ec686de73 100755
--- a/debuginfo-tests/llgdb-tests/llgdb.py
+++ b/debuginfo-tests/llgdb-tests/llgdb.py
@@ -1,4 +1,4 @@
-#!/bin/env python
+#!/usr/bin/env python
 """
 A gdb-compatible frontend for lldb that implements just enough
 commands to run the tests in the debuginfo-tests repository with lldb.
diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index feb9544209bf0..13e675f1096e5 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -37,13 +37,7 @@ endif()
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules")
 include(AddFlang)
 
-if (MSVC)
-  set(_FLANG_ENABLE_WERROR_DEFAULT OFF)
-else ()
-  set(_FLANG_ENABLE_WERROR_DEFAULT ON)
-endif()
-option(FLANG_ENABLE_WERROR "Fail and stop building flang if a warning is triggered."
-       "${_FLANG_ENABLE_WERROR_DEFAULT}")
+option(FLANG_ENABLE_WERROR "Fail and stop building flang if a warning is triggered." OFF)
 
 # Check for a standalone build and configure as appropriate from
 # there.
@@ -305,9 +299,6 @@ if (FLANG_ENABLE_WERROR)
     append("-Werror" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
     append("-Wno-error" CMAKE_REQUIRED_FLAGS)
   endif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
-  if (NOT LLVM_ENABLE_WERROR)
-      message(WARNING "FLANG_ENABLE_WERROR setting is different from LLVM_ENABLE_WERROR.")
-  endif()
 endif()
 
 # Builtin check_cxx_compiler_flag doesn't seem to work correctly
diff --git a/flang/include/flang/Common/Fortran-features.h b/flang/include/flang/Common/Fortran-features.h
index 823fa85ad12e2..613aa69cc5d61 100644
--- a/flang/include/flang/Common/Fortran-features.h
+++ b/flang/include/flang/Common/Fortran-features.h
@@ -24,7 +24,7 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines,
     OldStyleParameter, ComplexConstructor, PercentLOC, SignedPrimary, FileName,
     Convert, Dispose, IOListLeadingComma, AbbreviatedEditDescriptor,
     ProgramParentheses, PercentRefAndVal, OmitFunctionDummies, CrayPointer,
-    Hollerith, ArithmeticIF, Assign, AssignedGOTO, Pause, OpenMP,
+    Hollerith, ArithmeticIF, Assign, AssignedGOTO, Pause, OpenACC, OpenMP,
     CruftAfterAmpersand, ClassicCComments, AdditionalFormats, BigIntLiterals,
     RealDoControls, EquivalenceNumericWithCharacter, AdditionalIntrinsics,
     AnonymousParents, OldLabelDoEndStatements, LogicalIntegerAssignment,
@@ -37,6 +37,7 @@ class LanguageFeatureControl {
   LanguageFeatureControl() {
     // These features must be explicitly enabled by command line options.
     disable_.set(LanguageFeature::OldDebugLines);
+    disable_.set(LanguageFeature::OpenACC);
     disable_.set(LanguageFeature::OpenMP);
     // These features, if enabled, conflict with valid standard usage,
     // so there are disabled here by default.
@@ -50,7 +51,9 @@ class LanguageFeatureControl {
   void WarnOnAllNonstandard(bool yes = true) { warnAll_ = yes; }
   bool IsEnabled(LanguageFeature f) const { return !disable_.test(f); }
   bool ShouldWarn(LanguageFeature f) const {
-    return (warnAll_ && f != LanguageFeature::OpenMP) || warn_.test(f);
+    return (warnAll_ && f != LanguageFeature::OpenMP &&
+               f != LanguageFeature::OpenACC) ||
+        warn_.test(f);
   }
   // Return all spellings of operators names, depending on features enabled
   std::vector GetNames(LogicalOperator) const;
diff --git a/flang/include/flang/Decimal/decimal.h b/flang/include/flang/Decimal/decimal.h
index 0bc9deb08f4cd..fa687e92d35b4 100644
--- a/flang/include/flang/Decimal/decimal.h
+++ b/flang/include/flang/Decimal/decimal.h
@@ -69,7 +69,7 @@ enum DecimalConversionFlags {
  * some extra due to the library working internally in base 10**16
  * and computing its output size in multiples of 16.
  */
-#define EXTRA_DECIMAL_CONVERSION_SPACE (1 + 1 + 16 - 1)
+#define EXTRA_DECIMAL_CONVERSION_SPACE (1 + 1 + 2 * 16 - 1)
 
 #ifdef __cplusplus
 template 
diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h
index 0b273a6aa7340..13dd43b60fded 100644
--- a/flang/include/flang/Lower/OpenMP.h
+++ b/flang/include/flang/Lower/OpenMP.h
@@ -5,6 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
 
 #ifndef FORTRAN_LOWER_OPENMP_H
 #define FORTRAN_LOWER_OPENMP_H
diff --git a/flang/include/flang/Optimizer/Dialect/FIRDialect.h b/flang/include/flang/Optimizer/Dialect/FIRDialect.h
index 963dad8a09c4b..1c06fe5ab0609 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRDialect.h
+++ b/flang/include/flang/Optimizer/Dialect/FIRDialect.h
@@ -38,6 +38,7 @@ inline void registerFIR() {
   [[maybe_unused]] static bool init_once = [] {
     mlir::registerDialect();
     mlir::registerDialect();
+    mlir::registerDialect();
     mlir::registerDialect();
     mlir::registerDialect();
     mlir::registerDialect();
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 58bf38aa8a4bc..0bc543882a268 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -1853,7 +1853,9 @@ def fir_LenParamIndexOp : fir_OneResultOp<"len_param_index", [NoSideEffect]> {
 // Fortran loops
 //===----------------------------------------------------------------------===//
 
-def fir_ResultOp : fir_Op<"result", [NoSideEffect, ReturnLike, Terminator]> {
+def fir_ResultOp : fir_Op<"result",
+    [NoSideEffect, ReturnLike, Terminator,
+     ParentOneOf<["WhereOp", "LoopOp", "IterWhileOp"]>]> {
   let summary = "special terminator for use in fir region operations";
 
   let description = [{
@@ -1970,7 +1972,7 @@ def fir_LoopOp : region_Op<"do_loop",
   }];
 }
 
-def fir_WhereOp : region_Op<"if"> {
+def fir_WhereOp : region_Op<"if", [NoRegionArguments]> {
   let summary = "if-then-else conditional operation";
   let description = [{
     Used to conditionally execute operations. This operation is the FIR
diff --git a/flang/include/flang/Parser/char-buffer.h b/flang/include/flang/Parser/char-buffer.h
index e61a3fe3427e4..1879e1960c381 100644
--- a/flang/include/flang/Parser/char-buffer.h
+++ b/flang/include/flang/Parser/char-buffer.h
@@ -58,9 +58,6 @@ class CharBuffer {
 
   std::string Marshal() const;
 
-  // Removes carriage returns ('\r') and ensures a final line feed ('\n').
-  std::string MarshalNormalized() const;
-
 private:
   struct Block {
     static constexpr std::size_t capacity{1 << 20};
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index 59333c7405ffa..02da3f53b44e0 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -53,6 +53,88 @@ class ParseTreeDumper {
   NODE(format, IntrinsicTypeDataEditDesc)
   NODE(format::IntrinsicTypeDataEditDesc, Kind)
   NODE(parser, Abstract)
+  NODE(parser, AccAtomicCapture)
+  NODE(AccAtomicCapture, Stmt1)
+  NODE(AccAtomicCapture, Stmt2)
+  NODE(parser, AccAtomicRead)
+  NODE(parser, AccAtomicUpdate)
+  NODE(parser, AccAtomicWrite)
+  NODE(parser, AccBeginBlockDirective)
+  NODE(parser, AccBeginCombinedDirective)
+  NODE(parser, AccBeginLoopDirective)
+  NODE(parser, AccBlockDirective)
+  NODE(parser, AccClause)
+  NODE(AccClause, Auto)
+  NODE(AccClause, Async)
+  NODE(AccClause, Attach)
+  NODE(AccClause, Bind)
+  NODE(AccClause, Capture)
+  NODE(AccClause, Collapse)
+  NODE(AccClause, Copy)
+  NODE(AccClause, Copyin)
+  NODE(AccClause, Copyout)
+  NODE(AccClause, Create)
+  NODE(AccClause, Default)
+  NODE(AccClause, DefaultAsync)
+  NODE(AccClause, Delete)
+  NODE(AccClause, Detach)
+  NODE(AccClause, Device)
+  NODE(AccClause, DeviceNum)
+  NODE(AccClause, DevicePtr)
+  NODE(AccClause, DeviceResident)
+  NODE(AccClause, DeviceType)
+  NODE(AccClause, Finalize)
+  NODE(AccClause, FirstPrivate)
+  NODE(AccClause, Gang)
+  NODE(AccClause, Host)
+  NODE(AccClause, If)
+  NODE(AccClause, IfPresent)
+  NODE(AccClause, Independent)
+  NODE(AccClause, Link)
+  NODE(AccClause, NoCreate)
+  NODE(AccClause, NoHost)
+  NODE(AccClause, NumGangs)
+  NODE(AccClause, NumWorkers)
+  NODE(AccClause, Present)
+  NODE(AccClause, Private)
+  NODE(AccClause, Tile)
+  NODE(AccClause, UseDevice)
+  NODE(AccClause, Read)
+  NODE(AccClause, Reduction)
+  NODE(AccClause, Self)
+  NODE(AccClause, Seq)
+  NODE(AccClause, Vector)
+  NODE(AccClause, VectorLength)
+  NODE(AccClause, Wait)
+  NODE(AccClause, Worker)
+  NODE(AccClause, Write)
+  NODE(AccClause, Unknown)
+  NODE(parser, AccDefaultClause)
+  NODE_ENUM(parser::AccDefaultClause, Arg)
+  NODE(parser, AccClauseList)
+  NODE(parser, AccCombinedDirective)
+  NODE(parser, AccDataModifier)
+  NODE_ENUM(parser::AccDataModifier, Modifier)
+  NODE(parser, AccDeclarativeDirective)
+  NODE(parser, AccEndAtomic)
+  NODE(parser, AccEndBlockDirective)
+  NODE(parser, AccEndCombinedDirective)
+  NODE(parser, AccGangArgument)
+  NODE(parser, AccObject)
+  NODE(parser, AccObjectList)
+  NODE(parser, AccObjectListWithModifier)
+  NODE(parser, AccObjectListWithReduction)
+  NODE(parser, AccReductionOperator)
+  NODE(parser, AccSizeExpr)
+  NODE(parser, AccSizeExprList)
+  NODE(parser, AccStandaloneDirective)
+  NODE(parser, AccLoopDirective)
+  NODE(parser, AccWaitArgument)
+  static std::string GetNodeName(const llvm::acc::Directive &x) {
+    return llvm::Twine(
+        "llvm::acc::Directive = ", llvm::acc::getOpenACCDirectiveName(x))
+        .str();
+  }
   NODE(parser, AcImpliedDo)
   NODE(parser, AcImpliedDoControl)
   NODE(parser, AcValue)
@@ -140,6 +222,7 @@ class ParseTreeDumper {
   NODE(CommonStmt, Block)
   NODE(parser, CompilerDirective)
   NODE(CompilerDirective, IgnoreTKR)
+  NODE(CompilerDirective, NameValue)
   NODE(parser, ComplexLiteralConstant)
   NODE(parser, ComplexPart)
   NODE(parser, ComponentArraySpec)
@@ -510,6 +593,17 @@ class ParseTreeDumper {
   NODE(parser, OmpSectionsDirective)
   NODE(parser, OmpSimpleStandaloneDirective)
   NODE(parser, Only)
+  NODE(parser, OpenACCAtomicConstruct)
+  NODE(parser, OpenACCBlockConstruct)
+  NODE(parser, OpenACCCacheConstruct)
+  NODE(parser, OpenACCCombinedConstruct)
+  NODE(parser, OpenACCConstruct)
+  NODE(parser, OpenACCDeclarativeConstruct)
+  NODE(parser, OpenACCLoopConstruct)
+  NODE(parser, OpenACCRoutineConstruct)
+  NODE(parser, OpenACCStandaloneDeclarativeConstruct)
+  NODE(parser, OpenACCStandaloneConstruct)
+  NODE(parser, OpenACCWaitConstruct)
   NODE(parser, OpenMPAtomicConstruct)
   NODE(parser, OpenMPBlockConstruct)
   NODE(parser, OpenMPCancelConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 67fd5741b0975..4b34d2cd674cc 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -25,6 +25,7 @@
 #include "flang/Common/Fortran.h"
 #include "flang/Common/idioms.h"
 #include "flang/Common/indirection.h"
+#include "llvm/Frontend/OpenACC/ACC.h.inc"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include 
 #include 
@@ -256,6 +257,8 @@ struct ArithmeticIfStmt;
 struct AssignStmt;
 struct AssignedGotoStmt;
 struct PauseStmt;
+struct OpenACCConstruct;
+struct OpenACCDeclarativeConstruct;
 struct OpenMPConstruct;
 struct OpenMPDeclarativeConstruct;
 struct OmpEndLoopDirective;
@@ -386,6 +389,7 @@ struct SpecificationConstruct {
       Statement,
       Statement>,
       common::Indirection,
+      common::Indirection,
       common::Indirection,
       common::Indirection>
       u;
@@ -424,7 +428,8 @@ struct DeclarationConstruct {
 // from the implicit part to the declaration constructs
 struct SpecificationPart {
   TUPLE_CLASS_BOILERPLATE(SpecificationPart);
-  std::tuple,
+  std::tuple,
+      std::list,
       std::list>>,
       std::list>>, ImplicitPart,
       std::list>
@@ -509,6 +514,7 @@ struct ExecutableConstruct {
       common::Indirection,
       common::Indirection, common::Indirection,
       common::Indirection,
+      common::Indirection,
       common::Indirection,
       common::Indirection>
       u;
@@ -3205,8 +3211,12 @@ struct CompilerDirective {
     TUPLE_CLASS_BOILERPLATE(IgnoreTKR);
     std::tuple, Name> t;
   };
+  struct NameValue {
+    TUPLE_CLASS_BOILERPLATE(NameValue);
+    std::tuple> t;
+  };
   CharBlock source;
-  std::variant, std::list> u;
+  std::variant, std::list> u;
 };
 
 // Legacy extensions
@@ -3789,5 +3799,287 @@ struct OpenMPConstruct {
       OpenMPCriticalConstruct>
       u;
 };
+
+// Parse tree nodes for OpenACC 3.0 directives and clauses
+
+struct AccObject {
+  UNION_CLASS_BOILERPLATE(AccObject);
+  std::variant u;
+};
+
+WRAPPER_CLASS(AccObjectList, std::list);
+
+// OpenACC directive beginning or ending a block
+struct AccBlockDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccBlockDirective, llvm::acc::Directive);
+  CharBlock source;
+};
+
+struct AccLoopDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccLoopDirective, llvm::acc::Directive);
+  CharBlock source;
+};
+
+struct AccStandaloneDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccStandaloneDirective, llvm::acc::Directive);
+  CharBlock source;
+};
+
+// 2.11 Combined constructs
+struct AccCombinedDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccCombinedDirective, llvm::acc::Directive);
+  CharBlock source;
+};
+
+struct AccDeclarativeDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccDeclarativeDirective, llvm::acc::Directive);
+  CharBlock source;
+};
+
+// OpenACC Clauses
+struct AccDefaultClause {
+  ENUM_CLASS(Arg, None, Present)
+  WRAPPER_CLASS_BOILERPLATE(AccDefaultClause, Arg);
+  CharBlock source;
+};
+
+struct AccDataModifier {
+  ENUM_CLASS(Modifier, ReadOnly, Zero)
+  WRAPPER_CLASS_BOILERPLATE(AccDataModifier, Modifier);
+  CharBlock source;
+};
+
+struct AccObjectListWithModifier {
+  TUPLE_CLASS_BOILERPLATE(AccObjectListWithModifier);
+  std::tuple, AccObjectList> t;
+};
+
+// 2.5.13: + | * | max | min | iand | ior | ieor | .and. | .or. | .eqv. | .neqv.
+struct AccReductionOperator {
+  UNION_CLASS_BOILERPLATE(AccReductionOperator);
+  std::variant u;
+};
+
+struct AccObjectListWithReduction {
+  TUPLE_CLASS_BOILERPLATE(AccObjectListWithReduction);
+  std::tuple t;
+};
+
+struct AccWaitArgument {
+  TUPLE_CLASS_BOILERPLATE(AccWaitArgument);
+  std::tuple, std::list> t;
+};
+
+struct AccSizeExpr {
+  TUPLE_CLASS_BOILERPLATE(AccSizeExpr);
+  CharBlock source;
+  std::tuple> t; // if null then *
+};
+
+struct AccSizeExprList {
+  WRAPPER_CLASS_BOILERPLATE(AccSizeExprList, std::list);
+};
+
+struct AccGangArgument {
+  TUPLE_CLASS_BOILERPLATE(AccGangArgument);
+  std::tuple, std::optional> t;
+};
+
+struct AccClause {
+  UNION_CLASS_BOILERPLATE(AccClause);
+
+  EMPTY_CLASS(Auto);
+  WRAPPER_CLASS(Async, std::optional);
+  WRAPPER_CLASS(Attach, AccObjectList);
+  WRAPPER_CLASS(Bind, Name);
+  EMPTY_CLASS(Capture);
+  WRAPPER_CLASS(Collapse, ScalarIntConstantExpr);
+  WRAPPER_CLASS(Copy, AccObjectList);
+  WRAPPER_CLASS(Copyin, AccObjectListWithModifier);
+  WRAPPER_CLASS(Copyout, AccObjectListWithModifier);
+  WRAPPER_CLASS(Create, AccObjectListWithModifier);
+  WRAPPER_CLASS(Default, AccDefaultClause);
+  WRAPPER_CLASS(DefaultAsync, ScalarIntExpr);
+  WRAPPER_CLASS(Delete, AccObjectList);
+  WRAPPER_CLASS(Detach, AccObjectList);
+  WRAPPER_CLASS(Device, AccObjectList);
+  WRAPPER_CLASS(DeviceNum, ScalarIntConstantExpr);
+  WRAPPER_CLASS(DevicePtr, AccObjectList);
+  WRAPPER_CLASS(DeviceResident, AccObjectList);
+  WRAPPER_CLASS(DeviceType, std::optional>);
+  EMPTY_CLASS(Finalize);
+  WRAPPER_CLASS(FirstPrivate, AccObjectList);
+  WRAPPER_CLASS(Gang, std::optional);
+  WRAPPER_CLASS(Host, AccObjectList);
+  WRAPPER_CLASS(If, ScalarLogicalExpr);
+  EMPTY_CLASS(IfPresent);
+  EMPTY_CLASS(Independent);
+  WRAPPER_CLASS(Link, AccObjectList);
+  WRAPPER_CLASS(NoCreate, AccObjectList);
+  EMPTY_CLASS(NoHost);
+  WRAPPER_CLASS(NumGangs, ScalarIntExpr);
+  WRAPPER_CLASS(NumWorkers, ScalarIntExpr);
+  WRAPPER_CLASS(Present, AccObjectList);
+  WRAPPER_CLASS(Private, AccObjectList);
+  WRAPPER_CLASS(Tile, AccSizeExprList);
+  WRAPPER_CLASS(UseDevice, AccObjectList);
+  EMPTY_CLASS(Read);
+  WRAPPER_CLASS(Reduction, AccObjectListWithReduction);
+  WRAPPER_CLASS(Self, std::optional);
+  EMPTY_CLASS(Seq);
+  WRAPPER_CLASS(Vector, std::optional);
+  WRAPPER_CLASS(VectorLength, ScalarIntExpr);
+  WRAPPER_CLASS(Wait, std::optional);
+  WRAPPER_CLASS(Worker, std::optional);
+  EMPTY_CLASS(Write);
+  EMPTY_CLASS(Unknown);
+
+  CharBlock source;
+
+  std::variant
+      u;
+};
+
+struct AccClauseList {
+  WRAPPER_CLASS_BOILERPLATE(AccClauseList, std::list);
+  CharBlock source;
+};
+
+struct OpenACCRoutineConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCRoutineConstruct);
+  CharBlock source;
+  std::tuple, AccClauseList> t;
+};
+
+struct OpenACCCacheConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCCacheConstruct);
+  CharBlock source;
+  std::tuple t;
+};
+
+struct OpenACCWaitConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCWaitConstruct);
+  CharBlock source;
+  std::tuple, AccClauseList> t;
+};
+
+struct AccBeginLoopDirective {
+  TUPLE_CLASS_BOILERPLATE(AccBeginLoopDirective);
+  std::tuple t;
+  CharBlock source;
+};
+
+struct AccBeginBlockDirective {
+  TUPLE_CLASS_BOILERPLATE(AccBeginBlockDirective);
+  CharBlock source;
+  std::tuple t;
+};
+
+struct AccEndBlockDirective {
+  CharBlock source;
+  WRAPPER_CLASS_BOILERPLATE(AccEndBlockDirective, AccBlockDirective);
+};
+
+// ACC END ATOMIC
+EMPTY_CLASS(AccEndAtomic);
+
+// ACC ATOMIC READ
+struct AccAtomicRead {
+  TUPLE_CLASS_BOILERPLATE(AccAtomicRead);
+  std::tuple, std::optional>
+      t;
+};
+
+// ACC ATOMIC WRITE
+struct AccAtomicWrite {
+  TUPLE_CLASS_BOILERPLATE(AccAtomicWrite);
+  std::tuple, std::optional>
+      t;
+};
+
+// ACC ATOMIC UPDATE
+struct AccAtomicUpdate {
+  TUPLE_CLASS_BOILERPLATE(AccAtomicUpdate);
+  std::tuple, Statement,
+      std::optional>
+      t;
+};
+
+// ACC ATOMIC CAPTURE
+struct AccAtomicCapture {
+  TUPLE_CLASS_BOILERPLATE(AccAtomicCapture);
+  WRAPPER_CLASS(Stmt1, Statement);
+  WRAPPER_CLASS(Stmt2, Statement);
+  std::tuple t;
+};
+
+struct OpenACCAtomicConstruct {
+  UNION_CLASS_BOILERPLATE(OpenACCAtomicConstruct);
+  std::variant
+      u;
+};
+
+struct OpenACCBlockConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCBlockConstruct);
+  std::tuple t;
+};
+
+struct OpenACCStandaloneDeclarativeConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCStandaloneDeclarativeConstruct);
+  CharBlock source;
+  std::tuple t;
+};
+
+struct AccBeginCombinedDirective {
+  TUPLE_CLASS_BOILERPLATE(AccBeginCombinedDirective);
+  std::tuple t;
+};
+
+struct AccEndCombinedDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccEndCombinedDirective, AccCombinedDirective);
+  CharBlock source;
+};
+
+struct OpenACCCombinedConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCCombinedConstruct);
+  CharBlock source;
+  std::tuple>
+      t;
+};
+
+struct OpenACCDeclarativeConstruct {
+  UNION_CLASS_BOILERPLATE(OpenACCDeclarativeConstruct);
+  CharBlock source;
+  std::variant u;
+};
+
+// OpenACC directives enclosing do loop
+struct OpenACCLoopConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCLoopConstruct);
+  OpenACCLoopConstruct(AccBeginLoopDirective &&a)
+      : t({std::move(a), std::nullopt}) {}
+  std::tuple> t;
+};
+
+struct OpenACCStandaloneConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCStandaloneConstruct);
+  CharBlock source;
+  std::tuple t;
+};
+
+struct OpenACCConstruct {
+  UNION_CLASS_BOILERPLATE(OpenACCConstruct);
+  std::variant
+      u;
+};
+
 } // namespace Fortran::parser
 #endif // FORTRAN_PARSER_PARSE_TREE_H_
diff --git a/flang/lib/Decimal/big-radix-floating-point.h b/flang/lib/Decimal/big-radix-floating-point.h
index 53bd9d7249145..2fbb777104d6d 100644
--- a/flang/lib/Decimal/big-radix-floating-point.h
+++ b/flang/lib/Decimal/big-radix-floating-point.h
@@ -222,15 +222,46 @@ template  class BigRadixFloatingPointNumber {
     return remainder;
   }
 
-  int DivideByPowerOfTwo(int twoPow) { // twoPow <= LOG10RADIX
-    int remainder{0};
+  void DivideByPowerOfTwo(int twoPow) { // twoPow <= log10Radix
+    Digit remainder{0};
+    auto mask{(Digit{1} << twoPow) - 1};
+    auto coeff{radix >> twoPow};
     for (int j{digits_ - 1}; j >= 0; --j) {
-      Digit q{digit_[j] >> twoPow};
-      int nrem = digit_[j] - (q << twoPow);
-      digit_[j] = q + (radix >> twoPow) * remainder;
+      auto nrem{digit_[j] & mask};
+      digit_[j] = (digit_[j] >> twoPow) + coeff * remainder;
       remainder = nrem;
     }
-    return remainder;
+  }
+
+  // Returns true on overflow
+  bool DivideByPowerOfTwoInPlace(int twoPow) {
+    if (digits_ > 0) {
+      while (twoPow > 0) {
+        int chunk{twoPow > log10Radix ? log10Radix : twoPow};
+        if ((digit_[0] & ((Digit{1} << chunk) - 1)) == 0) {
+          DivideByPowerOfTwo(chunk);
+          twoPow -= chunk;
+          continue;
+        }
+        twoPow -= chunk;
+        if (digit_[digits_ - 1] >> chunk != 0) {
+          if (digits_ == digitLimit_) {
+            return true; // overflow
+          }
+          digit_[digits_++] = 0;
+        }
+        auto remainder{digit_[digits_ - 1]};
+        exponent_ -= log10Radix;
+        auto coeff{radix >> chunk}; // precise; radix is (5*2)**log10Radix
+        auto mask{(Digit{1} << chunk) - 1};
+        for (int j{digits_ - 1}; j >= 1; --j) {
+          digit_[j] = (digit_[j - 1] >> chunk) + coeff * remainder;
+          remainder = digit_[j - 1] & mask;
+        }
+        digit_[0] = coeff * remainder;
+      }
+    }
+    return false; // no overflow
   }
 
   int AddCarry(int position = 0, int carry = 1) {
diff --git a/flang/lib/Decimal/binary-to-decimal.cpp b/flang/lib/Decimal/binary-to-decimal.cpp
index ad30b4d854033..bcc0f08558aa4 100644
--- a/flang/lib/Decimal/binary-to-decimal.cpp
+++ b/flang/lib/Decimal/binary-to-decimal.cpp
@@ -70,42 +70,8 @@ BigRadixFloatingPointNumber::BigRadixFloatingPointNumber(
     overflow |= MultiplyBy<2>();
   }
 
-  while (twoPow < 0) {
-    int shift{common::TrailingZeroBitCount(digit_[0])};
-    if (shift == 0) {
-      break;
-    }
-    if (shift > log10Radix) {
-      shift = log10Radix;
-    }
-    if (shift > -twoPow) {
-      shift = -twoPow;
-    }
-    // (D*(2**S)) * 10.**E * 2.**twoPow -> D * 10.**E * 2.**(twoPow+S)
-    DivideByPowerOfTwo(shift);
-    twoPow += shift;
-  }
-
-  for (; twoPow <= -4; twoPow += 4) {
-    // D * 10.**E * 2.**twoPow -> 625D * 10.**(E-4) * 2.**(twoPow+4)
-    overflow |= MultiplyBy<(5 * 5 * 5 * 5)>();
-    exponent_ -= 4;
-  }
-  if (twoPow <= -2) {
-    // D * 10.**E * 2.**twoPow -> 25D * 10.**(E-2) * 2.**(twoPow+2)
-    overflow |= MultiplyBy<5 * 5>();
-    twoPow += 2;
-    exponent_ -= 2;
-  }
-  for (; twoPow < 0; ++twoPow) {
-    // D * 10.**E * 2.**twoPow -> 5D * 10.**(E-1) * 2.**(twoPow+1)
-    overflow |= MultiplyBy<5>();
-    --exponent_;
-  }
-
+  overflow |= DivideByPowerOfTwoInPlace(-twoPow);
   assert(overflow == 0);
-
-  // twoPow == 0, the decimal encoding is complete.
   Normalize();
 }
 
@@ -153,7 +119,7 @@ BigRadixFloatingPointNumber::ConvertToDecimal(char *buffer,
     for (int k{0}; k < log10Radix; k += 2) {
       Digit d{common::DivideUnsignedBy(dig)};
       dig = 100 * (dig - d * hundredth);
-      const char *q = lut + 2 * d;
+      const char *q{lut + 2 * d};
       *p++ = q[0];
       *p++ = q[1];
     }
diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt
index 3cd71c007a00a..975065c9ed7de 100644
--- a/flang/lib/Lower/CMakeLists.txt
+++ b/flang/lib/Lower/CMakeLists.txt
@@ -7,6 +7,7 @@ add_flang_library(FortranLower
   Coarray.cpp
   ComplexExpr.cpp
   ConvertType.cpp
+  ConvertExpr.cpp
   DoLoopHelper.cpp
   FIRBuilder.cpp
   IntrinsicCall.cpp
diff --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp
new file mode 100644
index 0000000000000..1bac6884a5f7e
--- /dev/null
+++ b/flang/lib/Lower/ConvertExpr.cpp
@@ -0,0 +1,95 @@
+//===-- ConvertExpr.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Common/idioms.h"
+#include "flang/Lower/IntrinsicCall.h"
+#include "flang/Lower/Support/BoxValue.h"
+
+mlir::Value fir::getBase(const fir::ExtendedValue &ex) {
+  return std::visit(Fortran::common::visitors{
+                        [](const fir::UnboxedValue &x) { return x; },
+                        [](const auto &x) { return x.getAddr(); },
+                    },
+                    ex.box);
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::CharBoxValue &box) {
+  os << "boxchar { addr: " << box.getAddr() << ", len: " << box.getLen()
+     << " }";
+  return os;
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::ArrayBoxValue &box) {
+  os << "boxarray { addr: " << box.getAddr();
+  if (box.getLBounds().size()) {
+    os << ", lbounds: [";
+    llvm::interleaveComma(box.getLBounds(), os);
+    os << "]";
+  } else {
+    os << ", lbounds: all-ones";
+  }
+  os << ", shape: [";
+  llvm::interleaveComma(box.getExtents(), os);
+  os << "]}";
+  return os;
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::CharArrayBoxValue &box) {
+  os << "boxchararray { addr: " << box.getAddr() << ", len : " << box.getLen();
+  if (box.getLBounds().size()) {
+    os << ", lbounds: [";
+    llvm::interleaveComma(box.getLBounds(), os);
+    os << "]";
+  } else {
+    os << " lbounds: all-ones";
+  }
+  os << ", shape: [";
+  llvm::interleaveComma(box.getExtents(), os);
+  os << "]}";
+  return os;
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::BoxValue &box) {
+  os << "box { addr: " << box.getAddr();
+  if (box.getLen())
+    os << ", size: " << box.getLen();
+  if (box.params.size()) {
+    os << ", type params: [";
+    llvm::interleaveComma(box.params, os);
+    os << "]";
+  }
+  if (box.getLBounds().size()) {
+    os << ", lbounds: [";
+    llvm::interleaveComma(box.getLBounds(), os);
+    os << "]";
+  }
+  if (box.getExtents().size()) {
+    os << ", shape: [";
+    llvm::interleaveComma(box.getExtents(), os);
+    os << "]";
+  }
+  os << "}";
+  return os;
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::ProcBoxValue &box) {
+  os << "boxproc: { addr: " << box.getAddr() << ", context: " << box.hostContext
+     << "}";
+  return os;
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::ExtendedValue &ex) {
+  std::visit([&](const auto &value) { os << value; }, ex.box);
+  return os;
+}
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 5eb6a1866d293..e839c14a2a5d1 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -5,18 +5,100 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
 
 #include "flang/Lower/OpenMP.h"
 #include "flang/Lower/Bridge.h"
+#include "flang/Lower/FIRBuilder.h"
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Parser/parse-tree.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
 
 #define TODO() llvm_unreachable("not yet implemented")
 
+static void genOMP(Fortran::lower::AbstractConverter &absConv,
+                   Fortran::lower::pft::Evaluation &eval,
+                   const Fortran::parser::OpenMPSimpleStandaloneConstruct
+                       &simpleStandaloneConstruct) {
+  const auto &directive =
+      std::get(
+          simpleStandaloneConstruct.t);
+  switch (directive.v) {
+  default:
+    break;
+  case llvm::omp::Directive::OMPD_barrier:
+    absConv.getFirOpBuilder().create(
+        absConv.getCurrentLocation());
+    break;
+  case llvm::omp::Directive::OMPD_taskwait:
+    absConv.getFirOpBuilder().create(
+        absConv.getCurrentLocation());
+    break;
+  case llvm::omp::Directive::OMPD_taskyield:
+    TODO();
+  case llvm::omp::Directive::OMPD_target_enter_data:
+    TODO();
+  case llvm::omp::Directive::OMPD_target_exit_data:
+    TODO();
+  case llvm::omp::Directive::OMPD_target_update:
+    TODO();
+  case llvm::omp::Directive::OMPD_ordered:
+    TODO();
+  }
+}
+
+static void
+genOMP(Fortran::lower::AbstractConverter &absConv,
+       Fortran::lower::pft::Evaluation &eval,
+       const Fortran::parser::OpenMPStandaloneConstruct &standaloneConstruct) {
+  std::visit(
+      Fortran::common::visitors{
+          [&](const Fortran::parser::OpenMPSimpleStandaloneConstruct
+                  &simpleStandaloneConstruct) {
+            genOMP(absConv, eval, simpleStandaloneConstruct);
+          },
+          [&](const Fortran::parser::OpenMPFlushConstruct &flushConstruct) {
+            TODO();
+          },
+          [&](const Fortran::parser::OpenMPCancelConstruct &cancelConstruct) {
+            TODO();
+          },
+          [&](const Fortran::parser::OpenMPCancellationPointConstruct
+                  &cancellationPointConstruct) { TODO(); },
+      },
+      standaloneConstruct.u);
+}
+
 void Fortran::lower::genOpenMPConstruct(
-    Fortran::lower::AbstractConverter &, Fortran::lower::pft::Evaluation &,
-    const Fortran::parser::OpenMPConstruct &) {
-  TODO();
+    Fortran::lower::AbstractConverter &absConv,
+    Fortran::lower::pft::Evaluation &eval,
+    const Fortran::parser::OpenMPConstruct &ompConstruct) {
+
+  std::visit(
+      common::visitors{
+          [&](const Fortran::parser::OpenMPStandaloneConstruct
+                  &standaloneConstruct) {
+            genOMP(absConv, eval, standaloneConstruct);
+          },
+          [&](const Fortran::parser::OpenMPSectionsConstruct
+                  §ionsConstruct) { TODO(); },
+          [&](const Fortran::parser::OpenMPLoopConstruct &loopConstruct) {
+            TODO();
+          },
+          [&](const Fortran::parser::OpenMPBlockConstruct &blockConstruct) {
+            TODO();
+          },
+          [&](const Fortran::parser::OpenMPAtomicConstruct &atomicConstruct) {
+            TODO();
+          },
+          [&](const Fortran::parser::OpenMPCriticalConstruct
+                  &criticalConstruct) { TODO(); },
+      },
+      ompConstruct.u);
 }
 
 void Fortran::lower::genOpenMPEndLoop(
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 30cd365f139bc..36334167184d5 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -968,19 +968,12 @@ static mlir::LogicalResult verify(fir::ResultOp op) {
   auto results = parentOp->getResults();
   auto operands = op.getOperands();
 
-  if (isa(parentOp) || isa(parentOp) ||
-      isa(parentOp)) {
-    if (parentOp->getNumResults() != op.getNumOperands())
-      return op.emitOpError() << "parent of result must have same arity";
-    for (auto e : llvm::zip(results, operands)) {
-      if (std::get<0>(e).getType() != std::get<1>(e).getType())
-        return op.emitOpError()
-               << "types mismatch between result op and its parent";
-    }
-  } else {
-    return op.emitOpError()
-           << "result only terminates if, do_loop, or iterate_while regions";
-  }
+  if (parentOp->getNumResults() != op.getNumOperands())
+    return op.emitOpError() << "parent of result must have same arity";
+  for (auto e : llvm::zip(results, operands))
+    if (std::get<0>(e).getType() != std::get<1>(e).getType())
+      return op.emitOpError()
+             << "types mismatch between result op and its parent";
   return success();
 }
 
@@ -1395,15 +1388,28 @@ mlir::OpFoldResult fir::SubfOp::fold(llvm::ArrayRef opnds) {
 //===----------------------------------------------------------------------===//
 // WhereOp
 //===----------------------------------------------------------------------===//
-
 void fir::WhereOp::build(mlir::OpBuilder &builder, OperationState &result,
                          mlir::Value cond, bool withElseRegion) {
+  build(builder, result, llvm::None, cond, withElseRegion);
+}
+
+void fir::WhereOp::build(mlir::OpBuilder &builder, OperationState &result,
+                         mlir::TypeRange resultTypes, mlir::Value cond,
+                         bool withElseRegion) {
   result.addOperands(cond);
+  result.addTypes(resultTypes);
+
   mlir::Region *thenRegion = result.addRegion();
+  thenRegion->push_back(new mlir::Block());
+  if (resultTypes.empty())
+    WhereOp::ensureTerminator(*thenRegion, builder, result.location);
+
   mlir::Region *elseRegion = result.addRegion();
-  WhereOp::ensureTerminator(*thenRegion, builder, result.location);
-  if (withElseRegion)
-    WhereOp::ensureTerminator(*elseRegion, builder, result.location);
+  if (withElseRegion) {
+    elseRegion->push_back(new mlir::Block());
+    if (resultTypes.empty())
+      WhereOp::ensureTerminator(*elseRegion, builder, result.location);
+  }
 }
 
 static mlir::ParseResult parseWhereOp(OpAsmParser &parser,
@@ -1439,16 +1445,6 @@ static mlir::ParseResult parseWhereOp(OpAsmParser &parser,
 }
 
 static LogicalResult verify(fir::WhereOp op) {
-  // Verify that the entry of each child region does not have arguments.
-  for (auto ®ion : op.getOperation()->getRegions()) {
-    if (region.empty())
-      continue;
-
-    for (auto &b : region)
-      if (b.getNumArguments() != 0)
-        return op.emitOpError(
-            "requires that child entry blocks have no arguments");
-  }
   if (op.getNumResults() != 0 && op.otherRegion().empty())
     return op.emitOpError("must have an else block if defining values");
 
diff --git a/flang/lib/Parser/CMakeLists.txt b/flang/lib/Parser/CMakeLists.txt
index eb5126e1b937e..9ee4168031771 100644
--- a/flang/lib/Parser/CMakeLists.txt
+++ b/flang/lib/Parser/CMakeLists.txt
@@ -11,6 +11,7 @@ add_flang_library(FortranParser
   instrumented-parser.cpp
   io-parsers.cpp
   message.cpp
+  openacc-parsers.cpp
   openmp-parsers.cpp
   parse-tree.cpp
   parsing.cpp
@@ -29,7 +30,9 @@ add_flang_library(FortranParser
 
   LINK_COMPONENTS
   Support
+  FrontendOpenACC
 
   DEPENDS
   omp_gen
+  acc_gen
 )
diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp
index 3192781d4bcc9..f46186323ada1 100644
--- a/flang/lib/Parser/Fortran-parsers.cpp
+++ b/flang/lib/Parser/Fortran-parsers.cpp
@@ -1173,7 +1173,9 @@ constexpr auto ignore_tkr{
                              defaulted(parenthesized(some("tkr"_ch))), name))};
 TYPE_PARSER(
     beginDirective >> sourced(construct(ignore_tkr) ||
-                          construct("DIR$" >> many(name))) /
+                          construct("DIR$" >>
+                              many(construct(
+                                  name, maybe("=" >> digitString64))))) /
         endDirective)
 
 TYPE_PARSER(extension(construct(
diff --git a/flang/lib/Parser/char-buffer.cpp b/flang/lib/Parser/char-buffer.cpp
index e0fc7335424ba..780d7e89538f7 100644
--- a/flang/lib/Parser/char-buffer.cpp
+++ b/flang/lib/Parser/char-buffer.cpp
@@ -65,26 +65,4 @@ std::string CharBuffer::Marshal() const {
   CHECK(result.size() == bytes_);
   return result;
 }
-
-std::string CharBuffer::MarshalNormalized() const {
-  std::string result;
-  std::size_t bytes{bytes_};
-  result.reserve(bytes + 1 /* for terminal line feed */);
-  char ch{'\0'};
-  for (const Block &block : blocks_) {
-    std::size_t chunk{std::min(bytes, Block::capacity)};
-    for (std::size_t j{0}; j < chunk; ++j) {
-      ch = block.data[j];
-      if (ch != '\r') {
-        result += ch;
-      }
-    }
-    bytes -= chunk;
-  }
-  if (ch != '\n') {
-    result += '\n';
-  }
-  result.shrink_to_fit();
-  return result;
-}
 } // namespace Fortran::parser
diff --git a/flang/lib/Parser/executable-parsers.cpp b/flang/lib/Parser/executable-parsers.cpp
index 160b2dc376a48..d6dd4688dbac1 100644
--- a/flang/lib/Parser/executable-parsers.cpp
+++ b/flang/lib/Parser/executable-parsers.cpp
@@ -50,6 +50,7 @@ constexpr auto executableConstruct{
         construct(indirect(whereConstruct)),
         construct(indirect(forallConstruct)),
         construct(indirect(ompEndLoopDirective)),
+        construct(indirect(openaccConstruct)),
         construct(indirect(openmpConstruct)),
         construct(indirect(compilerDirective)))};
 
diff --git a/flang/lib/Parser/openacc-parsers.cpp b/flang/lib/Parser/openacc-parsers.cpp
new file mode 100644
index 0000000000000..a2ab628c0993a
--- /dev/null
+++ b/flang/lib/Parser/openacc-parsers.cpp
@@ -0,0 +1,284 @@
+//===-- lib/Parser/openacc-parsers.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Top-level grammar specification for OpenACC 3.0.
+
+#include "basic-parsers.h"
+#include "expr-parsers.h"
+#include "misc-parsers.h"
+#include "stmt-parser.h"
+#include "token-parsers.h"
+#include "type-parser-implementation.h"
+#include "flang/Parser/parse-tree.h"
+
+// OpenACC Directives and Clauses
+namespace Fortran::parser {
+
+constexpr auto startAccLine = skipStuffBeforeStatement >> "!$ACC "_sptok;
+constexpr auto endAccLine = space >> endOfLine;
+
+// Basic clauses
+TYPE_PARSER("AUTO" >> construct(construct()) ||
+    "ASYNC" >> construct(construct(
+                   maybe(parenthesized(scalarIntExpr)))) ||
+    "ATTACH" >> construct(construct(
+                    parenthesized(Parser{}))) ||
+    "BIND" >>
+        construct(construct(parenthesized(name))) ||
+    "CAPTURE" >> construct(construct()) ||
+    "COLLAPSE" >> construct(construct(
+                      parenthesized(scalarIntConstantExpr))) ||
+    ("COPY"_tok || "PRESENT_OR_COPY"_tok || "PCOPY"_tok) >>
+        construct(construct(
+            parenthesized(Parser{}))) ||
+    ("COPYIN"_tok || "PRESENT_OR_COPYIN"_tok || "PCOPYIN"_tok) >>
+        construct(construct(
+            parenthesized(Parser{}))) ||
+    ("COPYOUT"_tok || "PRESENT_OR_COPYOUT"_tok || "PCOPYOUT"_tok) >>
+        construct(construct(
+            parenthesized(Parser{}))) ||
+    ("CREATE"_tok || "PRESENT_OR_CREATE"_tok || "PCREATE"_tok) >>
+        construct(construct(
+            parenthesized(Parser{}))) ||
+    "DEFAULT" >> construct(construct(
+                     Parser{})) ||
+    "DEFAULT_ASYNC" >> construct(construct(
+                           parenthesized(scalarIntExpr))) ||
+    "DELETE" >> construct(construct(
+                    parenthesized(Parser{}))) ||
+    "DETACH" >> construct(construct(
+                    parenthesized(Parser{}))) ||
+    "DEVICE" >> construct(construct(
+                    parenthesized(Parser{}))) ||
+    "DEVICEPTR" >> construct(construct(
+                       parenthesized(Parser{}))) ||
+    "DEVICENUM" >> construct(construct(
+                       parenthesized(scalarIntConstantExpr))) ||
+    "DEVICE_RESIDENT" >>
+        construct(construct(
+            parenthesized(Parser{}))) ||
+    ("DEVICE_TYPE"_tok || "DTYPE"_tok) >>
+        construct(construct(parenthesized(
+            "*" >> construct>>()))) ||
+    ("DEVICE_TYPE"_tok || "DTYPE"_tok) >>
+        construct(construct(
+            parenthesized(maybe(nonemptyList(name))))) ||
+    "FINALIZE" >> construct(construct()) ||
+    "FIRSTPRIVATE" >> construct(construct(
+                          parenthesized(Parser{}))) ||
+    "GANG" >> construct(construct(
+                  maybe(parenthesized(Parser{})))) ||
+    "HOST" >> construct(construct(
+                  parenthesized(Parser{}))) ||
+    "IF" >> construct(
+                construct(parenthesized(scalarLogicalExpr))) ||
+    "IF_PRESENT" >> construct(construct()) ||
+    "INDEPENDENT" >>
+        construct(construct()) ||
+    "LINK" >> construct(construct(
+                  parenthesized(Parser{}))) ||
+    "NO_CREATE" >> construct(construct(
+                       parenthesized(Parser{}))) ||
+    "NOHOST" >> construct(construct()) ||
+    "NUM_GANGS" >> construct(construct(
+                       parenthesized(scalarIntExpr))) ||
+    "NUM_WORKERS" >> construct(construct(
+                         parenthesized(scalarIntExpr))) ||
+    "PRESENT" >> construct(construct(
+                     parenthesized(Parser{}))) ||
+    "PRIVATE" >> construct(construct(
+                     parenthesized(Parser{}))) ||
+    "READ" >> construct(construct()) ||
+    "REDUCTION" >> construct(construct(
+                       parenthesized(construct(
+                           Parser{} / ":",
+                           Parser{})))) ||
+    "SELF" >> construct(construct(
+                  maybe(parenthesized(scalarLogicalExpr)))) ||
+    "SEQ" >> construct(construct()) ||
+    "TILE" >> construct(construct(
+                  parenthesized(Parser{}))) ||
+    "USE_DEVICE" >> construct(construct(
+                        parenthesized(Parser{}))) ||
+    "VECTOR_LENGTH" >> construct(construct(
+                           parenthesized(scalarIntExpr))) ||
+    "VECTOR" >>
+        construct(construct(maybe(
+            parenthesized(("LENGTH:" >> scalarIntExpr || scalarIntExpr))))) ||
+    "WAIT" >> construct(construct(
+                  maybe(Parser{}))) ||
+    "WORKER" >>
+        construct(construct(maybe(
+            parenthesized(("NUM:" >> scalarIntExpr || scalarIntExpr))))) ||
+    "WRITE" >> construct(construct()))
+
+TYPE_PARSER(
+    construct(designator) || construct("/" >> name / "/"))
+
+TYPE_PARSER(construct(nonemptyList(Parser{})))
+
+TYPE_PARSER(construct(
+    maybe(Parser{}), Parser{}))
+
+TYPE_PARSER(construct(
+    maybe("DEVNUM:" >> scalarIntExpr / ":"), nonemptyList(scalarIntExpr)))
+
+// 2.9 (1609) size-expr is one of:
+//   int-expr
+TYPE_PARSER(construct(scalarIntExpr) ||
+    construct("*" >> maybe(scalarIntExpr)))
+TYPE_PARSER(construct(nonemptyList(Parser{})))
+
+// 2.9 (1607) gang-arg is one of:
+//   [num:]int-expr
+//   static:size-expr
+TYPE_PARSER(construct(maybe(scalarIntExpr),
+                maybe(","_tok / "STATIC:" >> Parser{})) ||
+    construct(maybe("NUM:" >> scalarIntExpr),
+        maybe(","_tok / "STATIC:" >> Parser{})))
+
+// 2.5.13 Reduction
+TYPE_PARSER(construct(Parser{}) ||
+    construct(Parser{}))
+
+// 2.5.14 Default clause
+TYPE_PARSER(construct(
+    parenthesized(first("NONE" >> pure(AccDefaultClause::Arg::None),
+        "PRESENT" >> pure(AccDefaultClause::Arg::Present)))))
+
+// Modifier for copyin, copyout, cache and create
+TYPE_PARSER(construct(
+    first("ZERO:" >> pure(AccDataModifier::Modifier::Zero),
+        "READONLY:" >> pure(AccDataModifier::Modifier::ReadOnly))))
+
+// Combined directives
+TYPE_PARSER(sourced(construct(
+    first("KERNELS LOOP" >> pure(llvm::acc::Directive::ACCD_kernels_loop),
+        "PARALLEL LOOP" >> pure(llvm::acc::Directive::ACCD_parallel_loop),
+        "SERIAL LOOP" >> pure(llvm::acc::Directive::ACCD_serial_loop)))))
+
+// Block directives
+TYPE_PARSER(sourced(construct(
+    first("DATA" >> pure(llvm::acc::Directive::ACCD_data),
+        "HOST_DATA" >> pure(llvm::acc::Directive::ACCD_host_data),
+        "KERNELS" >> pure(llvm::acc::Directive::ACCD_kernels),
+        "PARALLEL" >> pure(llvm::acc::Directive::ACCD_parallel),
+        "SERIAL" >> pure(llvm::acc::Directive::ACCD_serial)))))
+
+// Standalone directives
+TYPE_PARSER(sourced(construct(
+    first("ENTER DATA" >> pure(llvm::acc::Directive::ACCD_enter_data),
+        "EXIT DATA" >> pure(llvm::acc::Directive::ACCD_exit_data),
+        "INIT" >> pure(llvm::acc::Directive::ACCD_init),
+        "SHUTDOWN" >> pure(llvm::acc::Directive::ACCD_shutdown),
+        "SET" >> pure(llvm::acc::Directive::ACCD_set),
+        "UPDATE" >> pure(llvm::acc::Directive::ACCD_update)))))
+
+// Loop directives
+TYPE_PARSER(sourced(construct(
+    first("LOOP" >> pure(llvm::acc::Directive::ACCD_loop)))))
+
+TYPE_PARSER(construct(
+    sourced(Parser{}), Parser{}))
+
+TYPE_PARSER(
+    construct(sourced(Parser{})))
+
+// 2.15.1 Routine directive
+TYPE_PARSER(sourced(construct(verbatim("ROUTINE"_tok),
+    maybe(parenthesized(name)), Parser{})))
+
+// 2.10 Cache directive
+TYPE_PARSER(sourced(
+    construct(sourced(construct("CACHE"_tok)),
+        parenthesized(Parser{}))))
+
+// 2.11 Combined constructs
+TYPE_PARSER(startAccLine >> construct(sourced(
+                                "END"_tok >> Parser{})))
+
+TYPE_PARSER(construct(
+    sourced(Parser{}), Parser{}))
+
+TYPE_PARSER(construct(
+    Parser{} / endAccLine, block,
+    maybe(Parser{} / endAccLine)))
+
+// 2.12 Atomic constructs
+TYPE_PARSER(construct(startAccLine >> "END ATOMIC"_tok))
+
+TYPE_PARSER("ATOMIC" >>
+    construct(verbatim("READ"_tok) / endAccLine,
+        statement(assignmentStmt), maybe(Parser{} / endAccLine)))
+
+TYPE_PARSER("ATOMIC" >>
+    construct(verbatim("WRITE"_tok) / endAccLine,
+        statement(assignmentStmt), maybe(Parser{} / endAccLine)))
+
+TYPE_PARSER("ATOMIC" >>
+    construct(maybe(verbatim("UPDATE"_tok)) / endAccLine,
+        statement(assignmentStmt), maybe(Parser{} / endAccLine)))
+
+TYPE_PARSER("ATOMIC" >>
+    construct(verbatim("CAPTURE"_tok) / endAccLine,
+        statement(assignmentStmt), statement(assignmentStmt),
+        Parser{} / endAccLine))
+
+TYPE_PARSER(construct(Parser{}) ||
+    construct(Parser{}) ||
+    construct(Parser{}) ||
+    construct(Parser{}))
+
+// 2.13 Declare constructs
+TYPE_PARSER(sourced(construct(
+    first("DECLARE" >> pure(llvm::acc::Directive::ACCD_declare)))))
+
+// [Clause, [Clause], ...]
+TYPE_PARSER(sourced(construct(
+    many(maybe(","_tok) >> sourced(Parser{})))))
+
+// 2.16.3 Wait directive
+TYPE_PARSER(sourced(construct(
+    sourced(construct("WAIT"_tok)),
+    maybe(parenthesized(Parser{})), Parser{})))
+
+// Block Constructs
+TYPE_PARSER(sourced(construct(
+    sourced(Parser{}), Parser{})))
+
+TYPE_PARSER(startAccLine >> sourced(construct("END"_tok >>
+                                sourced(Parser{}))))
+
+TYPE_PARSER(construct(
+    Parser{} / endAccLine, block,
+    Parser{} / endAccLine))
+
+// Standalone constructs
+TYPE_PARSER(construct(
+    sourced(Parser{}), Parser{}))
+
+// Standalone declarative constructs
+TYPE_PARSER(construct(
+    sourced(Parser{}), Parser{}))
+
+TYPE_PARSER(
+    startAccLine >> sourced(construct(
+                        Parser{})))
+
+// OpenACC constructs
+TYPE_CONTEXT_PARSER("OpenACC construct"_en_US,
+    startAccLine >>
+        first(construct(Parser{}),
+            construct(Parser{}),
+            construct(Parser{}),
+            construct(Parser{}),
+            construct(Parser{}),
+            construct(Parser{}),
+            construct(Parser{}),
+            construct(Parser{})))
+} // namespace Fortran::parser
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index 41a97ff902d97..a09a5554116fb 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -23,10 +23,6 @@ namespace Fortran::parser {
 constexpr auto startOmpLine = skipStuffBeforeStatement >> "!$OMP "_sptok;
 constexpr auto endOmpLine = space >> endOfLine;
 
-template  constexpr decltype(auto) verbatim(A x) {
-  return sourced(construct(x));
-}
-
 // OpenMP Clauses
 // 2.15.3.1 DEFAULT (PRIVATE | FIRSTPRIVATE | SHARED | NONE)
 TYPE_PARSER(construct(
diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp
index 5e12b5545f0ac..d7a7d107878d3 100644
--- a/flang/lib/Parser/parsing.cpp
+++ b/flang/lib/Parser/parsing.cpp
@@ -67,6 +67,9 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
   prescanner.set_fixedForm(options.isFixedForm)
       .set_fixedFormColumnLimit(options.fixedFormColumns)
       .AddCompilerDirectiveSentinel("dir$");
+  if (options.features.IsEnabled(LanguageFeature::OpenACC)) {
+    prescanner.AddCompilerDirectiveSentinel("$acc");
+  }
   if (options.features.IsEnabled(LanguageFeature::OpenMP)) {
     prescanner.AddCompilerDirectiveSentinel("$omp");
     prescanner.AddCompilerDirectiveSentinel("$"); // OMP conditional line
diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 3b09597ddeb75..a1f07967d9b08 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -453,10 +453,9 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
           "# missing or invalid name"_err_en_US);
     } else {
-      j = dir.SkipBlanks(j + 1);
-      if (j != tokens) {
+      if (dir.IsAnythingLeft(++j)) {
         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
-            "#undef: excess tokens at end of directive"_err_en_US);
+            "#undef: excess tokens at end of directive"_en_US);
       } else {
         definitions_.erase(nameToken);
       }
@@ -468,8 +467,7 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
           "#%s: missing name"_err_en_US, dirName);
     } else {
-      j = dir.SkipBlanks(j + 1);
-      if (j != tokens) {
+      if (dir.IsAnythingLeft(++j)) {
         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
             "#%s: excess tokens at end of directive"_en_US, dirName);
       }
@@ -489,9 +487,9 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
           dir.GetTokenProvenanceRange(dirOffset));
     }
   } else if (dirName == "else") {
-    if (j != tokens) {
+    if (dir.IsAnythingLeft(j)) {
       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
-          "#else: excess tokens at end of directive"_err_en_US);
+          "#else: excess tokens at end of directive"_en_US);
     } else if (ifStack_.empty()) {
       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
           "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
@@ -516,9 +514,9 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
           dir.GetTokenProvenanceRange(dirOffset));
     }
   } else if (dirName == "endif") {
-    if (j != tokens) {
+    if (dir.IsAnythingLeft(j)) {
       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
-          "#endif: excess tokens at end of directive"_err_en_US);
+          "#endif: excess tokens at end of directive"_en_US);
     } else if (ifStack_.empty()) {
       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
           "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
diff --git a/flang/lib/Parser/program-parsers.cpp b/flang/lib/Parser/program-parsers.cpp
index fc2c7c324eb60..9e18c458ea3cb 100644
--- a/flang/lib/Parser/program-parsers.cpp
+++ b/flang/lib/Parser/program-parsers.cpp
@@ -60,7 +60,8 @@ TYPE_PARSER(construct(indirect(Parser{})) ||
 //         [use-stmt]... [import-stmt]... [implicit-part]
 //         [declaration-construct]...
 TYPE_CONTEXT_PARSER("specification part"_en_US,
-    construct(many(openmpDeclarativeConstruct),
+    construct(many(openaccDeclarativeConstruct),
+        many(openmpDeclarativeConstruct),
         many(statement(indirect(Parser{}))),
         many(unambiguousStatement(indirect(Parser{}))),
         implicitPart, many(declarationConstruct)))
@@ -75,10 +76,10 @@ TYPE_CONTEXT_PARSER("specification part"_en_US,
 // are in contexts that impose constraints on the kinds of statements that
 // are allowed, and so we have a variant production for declaration-construct
 // that implements those constraints.
-constexpr auto execPartLookAhead{
-    first(actionStmt >> ok, ompEndLoopDirective >> ok, openmpConstruct >> ok,
-        "ASSOCIATE ("_tok, "BLOCK"_tok, "SELECT"_tok, "CHANGE TEAM"_sptok,
-        "CRITICAL"_tok, "DO"_tok, "IF ("_tok, "WHERE ("_tok, "FORALL ("_tok)};
+constexpr auto execPartLookAhead{first(actionStmt >> ok,
+    ompEndLoopDirective >> ok, openaccConstruct >> ok, openmpConstruct >> ok,
+    "ASSOCIATE ("_tok, "BLOCK"_tok, "SELECT"_tok, "CHANGE TEAM"_sptok,
+    "CRITICAL"_tok, "DO"_tok, "IF ("_tok, "WHERE ("_tok, "FORALL ("_tok)};
 constexpr auto declErrorRecovery{
     stmtErrorRecoveryStart >> !execPartLookAhead >> skipStmtErrorRecovery};
 constexpr auto misplacedSpecificationStmt{Parser{} >>
@@ -126,7 +127,8 @@ constexpr auto limitedDeclarationConstruct{recovery(
 // specialized error recovery in the event of a spurious executable
 // statement.
 constexpr auto limitedSpecificationPart{inContext("specification part"_en_US,
-    construct(many(openmpDeclarativeConstruct),
+    construct(many(openaccDeclarativeConstruct),
+        many(openmpDeclarativeConstruct),
         many(statement(indirect(Parser{}))),
         many(unambiguousStatement(indirect(Parser{}))),
         implicitPart, many(limitedDeclarationConstruct)))};
@@ -151,6 +153,8 @@ TYPE_CONTEXT_PARSER("specification construct"_en_US,
         construct(
             statement(indirect(typeDeclarationStmt))),
         construct(indirect(Parser{})),
+        construct(
+            indirect(openaccDeclarativeConstruct)),
         construct(indirect(openmpDeclarativeConstruct)),
         construct(indirect(compilerDirective))))
 
diff --git a/flang/lib/Parser/source.cpp b/flang/lib/Parser/source.cpp
index 4f6c21fc2b48e..693138c2711cc 100644
--- a/flang/lib/Parser/source.cpp
+++ b/flang/lib/Parser/source.cpp
@@ -85,10 +85,19 @@ std::size_t RemoveCarriageReturns(llvm::MutableArrayRef buf) {
       break;
     }
     std::size_t chunk = crcp - p;
+    auto advance{chunk + 1};
+    if (chunk + 1 >= bytes || crcp[1] == '\n') {
+      // CR followed by LF or EOF: omit
+    } else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') {
+      // CR preceded by LF or BOF: omit
+    } else {
+      // CR in line: retain
+      ++chunk;
+    }
     std::memmove(buffer + wrote, p, chunk);
     wrote += chunk;
-    p += chunk + 1;
-    bytes -= chunk + 1;
+    p += advance;
+    bytes -= advance;
   }
   return wrote;
 }
diff --git a/flang/lib/Parser/stmt-parser.h b/flang/lib/Parser/stmt-parser.h
index 7dcc1f4620a9d..cd1c69beedd4a 100644
--- a/flang/lib/Parser/stmt-parser.h
+++ b/flang/lib/Parser/stmt-parser.h
@@ -80,6 +80,7 @@ constexpr auto skipBadLine{SkipPast<'\n'>{} >> construct()};
 constexpr auto executionPartErrorRecovery{stmtErrorRecoveryStart >>
     !"END"_tok >> !"CONTAINS"_tok >> !"ELSE"_tok >> !"CASE"_tok >>
     !"TYPE IS"_tok >> !"CLASS"_tok >> !"RANK"_tok >>
+    !("!$ACC "_sptok >> "END"_tok) >>
     !("!$OMP "_sptok >> ("END"_tok || "SECTION"_id)) >> skipBadLine};
 
 // END statement error recovery
diff --git a/flang/lib/Parser/token-parsers.h b/flang/lib/Parser/token-parsers.h
index fe43182e386f7..2ad89053fc65b 100644
--- a/flang/lib/Parser/token-parsers.h
+++ b/flang/lib/Parser/token-parsers.h
@@ -664,5 +664,9 @@ constexpr auto logicalFALSE{
 constexpr auto rawHollerithLiteral{
     deprecated(HollerithLiteral{})};
 
+template  constexpr decltype(auto) verbatim(A x) {
+  return sourced(construct(x));
+}
+
 } // namespace Fortran::parser
 #endif // FORTRAN_PARSER_TOKEN_PARSERS_H_
diff --git a/flang/lib/Parser/token-sequence.cpp b/flang/lib/Parser/token-sequence.cpp
index ce94f26235013..07c5b12e5f759 100644
--- a/flang/lib/Parser/token-sequence.cpp
+++ b/flang/lib/Parser/token-sequence.cpp
@@ -56,6 +56,31 @@ std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
   return tokens; // even if at > tokens
 }
 
+// C-style /*comments*/ are removed from preprocessing directive
+// token sequences by the prescanner, but not C++ or Fortran
+// free-form line-ending comments (//...  and !...) because
+// ignoring them is directive-specific.
+bool TokenSequence::IsAnythingLeft(std::size_t at) const {
+  std::size_t tokens{start_.size()};
+  for (; at < tokens; ++at) {
+    auto tok{TokenAt(at)};
+    const char *end{tok.end()};
+    for (const char *p{tok.begin()}; p < end; ++p) {
+      switch (*p) {
+      case '/':
+        return p + 1 >= end || p[1] != '/';
+      case '!':
+        return false;
+      case ' ':
+        break;
+      default:
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 void TokenSequence::RemoveLastToken() {
   CHECK(!start_.empty());
   CHECK(nextStart_ > start_.back());
diff --git a/flang/lib/Parser/token-sequence.h b/flang/lib/Parser/token-sequence.h
index d73b3c20be6f4..d98c0b955c5e9 100644
--- a/flang/lib/Parser/token-sequence.h
+++ b/flang/lib/Parser/token-sequence.h
@@ -71,6 +71,10 @@ class TokenSequence {
 
   std::size_t SkipBlanks(std::size_t) const;
 
+  // True if anything remains in the sequence at & after the given offset
+  // except blanks and line-ending C++ and Fortran free-form comments.
+  bool IsAnythingLeft(std::size_t) const;
+
   void PutNextTokenChar(char ch, Provenance provenance) {
     char_.emplace_back(ch);
     provenances_.Put({provenance, 1});
diff --git a/flang/lib/Parser/type-parsers.h b/flang/lib/Parser/type-parsers.h
index c7a1bce781fff..a2f38e90db212 100644
--- a/flang/lib/Parser/type-parsers.h
+++ b/flang/lib/Parser/type-parsers.h
@@ -130,6 +130,8 @@ constexpr Parser endSubroutineStmt; // R1537
 constexpr Parser entryStmt; // R1541
 constexpr Parser containsStmt; // R1543
 constexpr Parser compilerDirective;
+constexpr Parser openaccConstruct;
+constexpr Parser openaccDeclarativeConstruct;
 constexpr Parser openmpConstruct;
 constexpr Parser openmpDeclarativeConstruct;
 constexpr Parser ompEndLoopDirective;
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 09acaaa37076c..3b95636fc3e59 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -1761,7 +1761,9 @@ class UnparseVisitor {
               Word("!DIR$ IGNORE_TKR"); // emitted even if tkr list is empty
               Walk(" ", tkr, ", ");
             },
-            [&](const std::list &names) { Walk("!DIR$ ", names, " "); },
+            [&](const std::list &names) {
+              Walk("!DIR$ ", names, " ");
+            },
         },
         x.u);
     Put('\n');
@@ -1777,6 +1779,379 @@ class UnparseVisitor {
     }
     Walk(std::get(x.t));
   }
+  void Unparse(const CompilerDirective::NameValue &x) {
+    Walk(std::get(x.t));
+    Walk("=", std::get>(x.t));
+  }
+
+  // OpenACC Directives & Clauses
+  void Unparse(const AccAtomicCapture &x) {
+    BeginOpenACC();
+    Word("!$ACC CAPTURE");
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get(x.t));
+    Put("\n");
+    Walk(std::get(x.t));
+    BeginOpenACC();
+    Word("!$ACC END ATOMIC\n");
+    EndOpenACC();
+  }
+  void Unparse(const AccAtomicRead &x) {
+    BeginOpenACC();
+    Word("!$ACC ATOMIC READ");
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get>(x.t));
+    BeginOpenACC();
+    Walk(std::get>(x.t), "!$ACC END ATOMIC\n");
+    EndOpenACC();
+  }
+  void Unparse(const AccAtomicWrite &x) {
+    BeginOpenACC();
+    Word("!$ACC ATOMIC WRITE");
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get>(x.t));
+    BeginOpenACC();
+    Walk(std::get>(x.t), "!$ACC END ATOMIC\n");
+    EndOpenACC();
+  }
+  void Unparse(const AccAtomicUpdate &x) {
+    BeginOpenACC();
+    Word("!$ACC ATOMIC UPDATE");
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get>(x.t));
+    BeginOpenACC();
+    Walk(std::get>(x.t), "!$ACC END ATOMIC\n");
+    EndOpenACC();
+  }
+  void Unparse(const llvm::acc::Directive &x) {
+    Word(llvm::acc::getOpenACCDirectiveName(x).str());
+  }
+  void Before(const AccClause::Auto &) { Word("AUTO"); }
+  void Before(const AccClause::Capture &) { Word("CAPTURE"); }
+  void Before(const AccClause::Finalize &) { Word("FINALIZE"); }
+  void Before(const AccClause::IfPresent &) { Word("IF_PRESENT"); }
+  void Before(const AccClause::Independent &) { Word("INDEPENDENT"); }
+  void Before(const AccClause::NoHost &) { Word("NOHOST"); }
+  void Before(const AccClause::Read &) { Word("READ"); }
+  void Before(const AccClause::Seq &) { Word("SEQ"); }
+  void Before(const AccClause::Write &) { Word("WRITE"); }
+  void Before(const AccClause::Unknown &) { Word("UNKNOWN"); }
+  void Unparse(const AccClause::Attach &x) {
+    Word("ATTACH");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Bind &x) {
+    Word("BIND");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Collapse &x) {
+    Word("COLLAPSE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Copy &x) {
+    Word("COPY");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Copyin &x) {
+    Word("COPYIN");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Copyout &x) {
+    Word("COPYOUT");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Create &x) {
+    Word("CREATE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Default &x) {
+    Word("DEFAULT");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Delete &x) {
+    Word("DELETE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Detach &x) {
+    Word("DETACH");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Device &x) {
+    Word("DEVICE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::DevicePtr &x) {
+    Word("DEVICEPTR");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::DeviceResident &x) {
+    Word("DEVICE_RESIDENT");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::FirstPrivate &x) {
+    Word("FIRSTPRIVATE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Host &x) {
+    Word("HOST");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::If &x) {
+    Word("IF");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Link &x) {
+    Word("LINK");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::NumGangs &x) {
+    Word("NUM_GANGS");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::NumWorkers &x) {
+    Word("NUM_WORKERS");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Present &x) {
+    Word("PRESENT");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Private &x) {
+    Word("PRIVATE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Reduction &x) {
+    Word("REDUCTION");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::VectorLength &x) {
+    Word("VECTOR_LENGTH");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Async &x) {
+    Word("ASYNC");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::DefaultAsync &x) {
+    Word("DEFAULT_ASYNC");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::DeviceNum &x) {
+    Word("DEVICE_NUM");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Gang &x) {
+    Word("GANG");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::NoCreate &x) {
+    Word("NO_CREATE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::UseDevice &x) {
+    Word("USE_DEVICE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Self &x) {
+    Word("SELF");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::Vector &x) {
+    Word("VECTOR");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::Wait &x) {
+    Word("WAIT");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::Worker &x) {
+    Word("WORKER");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::DeviceType &x) {
+    Word("DEVICE_TYPE");
+    Put("(");
+    if (x.v.has_value())
+      Walk(x.v);
+    else
+      Put("*");
+    Put(")");
+  }
+  void Unparse(const AccObjectListWithModifier &x) {
+    Walk(std::get>(x.t), ":");
+    Walk(std::get(x.t));
+  }
+  void Unparse(const AccDataModifier::Modifier &x) {
+    Word(AccDataModifier::EnumToString(x));
+  }
+  void Unparse(const AccDefaultClause &x) {
+    switch (x.v) {
+    case AccDefaultClause::Arg::None:
+      Put("NONE");
+      break;
+    case AccDefaultClause::Arg::Present:
+      Put("PRESENT");
+      break;
+    }
+  }
+  void Unparse(const AccClauseList &x) { Walk(" ", x.v, " "); }
+  void Unparse(const AccGangArgument &x) {
+    Walk("NUM:", std::get>(x.t));
+    Walk(", STATIC:", std::get>(x.t));
+  }
+  void Unparse(const OpenACCBlockConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Walk(std::get(x.t));
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get(x.t), "");
+    BeginOpenACC();
+    Word("!$ACC END ");
+    Walk(std::get(x.t));
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const OpenACCLoopConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Walk(std::get(x.t));
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get>(x.t));
+  }
+  void Unparse(const AccBeginLoopDirective &x) {
+    Walk(std::get(x.t));
+    Walk(std::get(x.t));
+  }
+  void Unparse(const OpenACCStandaloneConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Walk(std::get(x.t));
+    Walk(std::get(x.t));
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const OpenACCStandaloneDeclarativeConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Walk(std::get(x.t));
+    Walk(std::get(x.t));
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const OpenACCCombinedConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Walk(std::get(x.t));
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get(x.t), "");
+    BeginOpenACC();
+    Word("!$ACC END ");
+    Walk(std::get>(x.t));
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const OpenACCRoutineConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ROUTINE");
+    Walk("(", std::get>(x.t), ")");
+    Walk(std::get(x.t));
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const AccObject &x) {
+    std::visit(common::visitors{
+                   [&](const Designator &y) { Walk(y); },
+                   [&](const Name &y) { Put("/"), Walk(y), Put("/"); },
+               },
+        x.u);
+  }
+  void Unparse(const AccObjectList &x) { Walk(x.v, ","); }
+  void Unparse(const AccObjectListWithReduction &x) {
+    Walk(std::get(x.t));
+    Put(":");
+    Walk(std::get(x.t));
+  }
+  void Unparse(const OpenACCCacheConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Word("CACHE(");
+    Walk(std::get(x.t));
+    Put(")");
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const OpenACCWaitConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Word("WAIT(");
+    Walk(std::get>(x.t));
+    Walk(std::get(x.t));
+    Put(")");
+    Put("\n");
+    EndOpenACC();
+  }
+
   // OpenMP Clauses & Directives
   void Unparse(const OmpObject &x) {
     std::visit(common::visitors{
@@ -2522,6 +2897,8 @@ class UnparseVisitor {
   }
   void BeginOpenMP() { openmpDirective_ = true; }
   void EndOpenMP() { openmpDirective_ = false; }
+  void BeginOpenACC() { openaccDirective_ = true; }
+  void EndOpenACC() { openaccDirective_ = false; }
 
   // Call back to the traversal framework.
   template  void Walk(const T &x) {
@@ -2591,6 +2968,7 @@ class UnparseVisitor {
   std::set structureComponents_;
   Encoding encoding_{Encoding::UTF_8};
   bool capitalizeKeywords_{true};
+  bool openaccDirective_{false};
   bool openmpDirective_{false};
   bool backslashEscapes_{false};
   preStatementType *preStatement_{nullptr};
@@ -2599,7 +2977,7 @@ class UnparseVisitor {
 
 void UnparseVisitor::Put(char ch) {
   int sav = indent_;
-  if (openmpDirective_) {
+  if (openmpDirective_ || openaccDirective_) {
     indent_ = 0;
   }
   if (column_ <= 1) {
@@ -2620,13 +2998,16 @@ void UnparseVisitor::Put(char ch) {
     if (openmpDirective_) {
       out_ << "!$OMP&";
       column_ = 8;
+    } else if (openaccDirective_) {
+      out_ << "!$ACC&";
+      column_ = 8;
     } else {
       out_ << '&';
       column_ = indent_ + 3;
     }
   }
   out_ << ch;
-  if (openmpDirective_) {
+  if (openmpDirective_ || openaccDirective_) {
     indent_ = sav;
   }
 }
diff --git a/flang/lib/Semantics/CMakeLists.txt b/flang/lib/Semantics/CMakeLists.txt
index 05295f5900952..2bdc5f9582819 100644
--- a/flang/lib/Semantics/CMakeLists.txt
+++ b/flang/lib/Semantics/CMakeLists.txt
@@ -2,8 +2,10 @@
 add_flang_library(FortranSemantics
   assignment.cpp
   attr.cpp
+  canonicalize-acc.cpp
   canonicalize-do.cpp
   canonicalize-omp.cpp
+  check-acc-structure.cpp
   check-allocate.cpp
   check-arithmeticif.cpp
   check-call.cpp
@@ -50,4 +52,5 @@ add_flang_library(FortranSemantics
   LINK_COMPONENTS
   Support
   FrontendOpenMP
+  FrontendOpenACC
 )
diff --git a/flang/lib/Semantics/canonicalize-acc.cpp b/flang/lib/Semantics/canonicalize-acc.cpp
new file mode 100644
index 0000000000000..4c4d716fe7def
--- /dev/null
+++ b/flang/lib/Semantics/canonicalize-acc.cpp
@@ -0,0 +1,84 @@
+//===-- lib/Semantics/canonicalize-acc.cpp --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "canonicalize-acc.h"
+#include "flang/Parser/parse-tree-visitor.h"
+#include "flang/Semantics/tools.h"
+
+// After Loop Canonicalization, rewrite OpenACC parse tree to make OpenACC
+// Constructs more structured which provide explicit scopes for later
+// structural checks and semantic analysis.
+//   1. move structured DoConstruct into
+//      OpenACCLoopConstruct. Compilation will not proceed in case of errors
+//      after this pass.
+namespace Fortran::semantics {
+
+using namespace parser::literals;
+
+class CanonicalizationOfAcc {
+public:
+  template  bool Pre(T &) { return true; }
+  template  void Post(T &) {}
+  CanonicalizationOfAcc(parser::Messages &messages) : messages_{messages} {}
+
+  void Post(parser::Block &block) {
+    for (auto it{block.begin()}; it != block.end(); ++it) {
+      if (auto *accLoop{parser::Unwrap(*it)}) {
+        RewriteOpenACCLoopConstruct(*accLoop, block, it);
+      }
+    } // Block list
+  }
+
+private:
+  void RewriteOpenACCLoopConstruct(parser::OpenACCLoopConstruct &x,
+      parser::Block &block, parser::Block::iterator it) {
+    // Check the sequence of DoConstruct in the same iteration
+    //
+    // Original:
+    //   ExecutableConstruct -> OpenACCConstruct -> OpenACCLoopConstruct
+    //     ACCBeginLoopDirective
+    //   ExecutableConstruct -> DoConstruct
+    //
+    // After rewriting:
+    //   ExecutableConstruct -> OpenACCConstruct -> OpenACCLoopConstruct
+    //     AccBeginLoopDirective
+    //     DoConstruct
+    parser::Block::iterator nextIt;
+    auto &beginDir{std::get(x.t)};
+    auto &dir{std::get(beginDir.t)};
+
+    nextIt = it;
+    if (++nextIt != block.end()) {
+      if (auto *doCons{parser::Unwrap(*nextIt)}) {
+        if (doCons->GetLoopControl()) {
+          // move DoConstruct
+          std::get>(x.t) =
+              std::move(*doCons);
+          nextIt = block.erase(nextIt);
+        } else {
+          messages_.Say(dir.source,
+              "DO loop after the %s directive must have loop control"_err_en_US,
+              parser::ToUpperCaseLetters(dir.source.ToString()));
+        }
+        return; // found do-loop
+      }
+    }
+    messages_.Say(dir.source,
+        "A DO loop must follow the %s directive"_err_en_US,
+        parser::ToUpperCaseLetters(dir.source.ToString()));
+  }
+
+  parser::Messages &messages_;
+};
+
+bool CanonicalizeAcc(parser::Messages &messages, parser::Program &program) {
+  CanonicalizationOfAcc acc{messages};
+  Walk(program, acc);
+  return !messages.AnyFatalError();
+}
+} // namespace Fortran::semantics
diff --git a/flang/lib/Semantics/canonicalize-acc.h b/flang/lib/Semantics/canonicalize-acc.h
new file mode 100644
index 0000000000000..f24f9fbc44f3c
--- /dev/null
+++ b/flang/lib/Semantics/canonicalize-acc.h
@@ -0,0 +1,21 @@
+//===-- lib/Semantics/canonicalize-acc.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_SEMANTICS_CANONICALIZE_ACC_H_
+#define FORTRAN_SEMANTICS_CANONICALIZE_ACC_H_
+
+namespace Fortran::parser {
+struct Program;
+class Messages;
+} // namespace Fortran::parser
+
+namespace Fortran::semantics {
+bool CanonicalizeAcc(parser::Messages &messages, parser::Program &program);
+}
+
+#endif // FORTRAN_SEMANTICS_CANONICALIZE_ACC_H_
diff --git a/flang/lib/Semantics/check-acc-structure.cpp b/flang/lib/Semantics/check-acc-structure.cpp
new file mode 100644
index 0000000000000..974c9dc59abe6
--- /dev/null
+++ b/flang/lib/Semantics/check-acc-structure.cpp
@@ -0,0 +1,501 @@
+//===-- lib/Semantics/check-acc-structure.cpp -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "check-acc-structure.h"
+#include "flang/Parser/parse-tree.h"
+#include "flang/Semantics/tools.h"
+
+#define CHECK_SIMPLE_CLAUSE(X, Y) \
+  void AccStructureChecker::Enter(const parser::AccClause::X &) { \
+    CheckAllowed(llvm::acc::Clause::Y); \
+  }
+
+#define CHECK_REQ_SCALAR_INT_CONSTANT_CLAUSE(X, Y) \
+  void AccStructureChecker::Enter(const parser::AccClause::X &c) { \
+    CheckAllowed(llvm::acc::Clause::Y); \
+    RequiresConstantPositiveParameter(llvm::acc::Clause::Y, c.v); \
+  }
+
+namespace Fortran::semantics {
+
+static constexpr inline AccClauseSet
+    parallelAndKernelsOnlyAllowedAfterDeviceTypeClauses{
+        llvm::acc::Clause::ACCC_async, llvm::acc::Clause::ACCC_wait,
+        llvm::acc::Clause::ACCC_num_gangs, llvm::acc::Clause::ACCC_num_workers,
+        llvm::acc::Clause::ACCC_vector_length};
+
+static constexpr inline AccClauseSet serialOnlyAllowedAfterDeviceTypeClauses{
+    llvm::acc::Clause::ACCC_async, llvm::acc::Clause::ACCC_wait};
+
+static constexpr inline AccClauseSet loopOnlyAllowedAfterDeviceTypeClauses{
+    llvm::acc::Clause::ACCC_auto, llvm::acc::Clause::ACCC_collapse,
+    llvm::acc::Clause::ACCC_independent, llvm::acc::Clause::ACCC_gang,
+    llvm::acc::Clause::ACCC_seq, llvm::acc::Clause::ACCC_tile,
+    llvm::acc::Clause::ACCC_vector, llvm::acc::Clause::ACCC_worker};
+
+static constexpr inline AccClauseSet updateOnlyAllowedAfterDeviceTypeClauses{
+    llvm::acc::Clause::ACCC_async, llvm::acc::Clause::ACCC_wait};
+
+static constexpr inline AccClauseSet routineOnlyAllowedAfterDeviceTypeClauses{
+    llvm::acc::Clause::ACCC_bind, llvm::acc::Clause::ACCC_gang,
+    llvm::acc::Clause::ACCC_vector, llvm::acc::Clause::ACCC_worker};
+
+class NoBranchingEnforce {
+public:
+  NoBranchingEnforce(SemanticsContext &context,
+      parser::CharBlock sourcePosition, llvm::acc::Directive directive)
+      : context_{context}, sourcePosition_{sourcePosition}, currentDirective_{
+                                                                directive} {}
+  template  bool Pre(const T &) { return true; }
+  template  void Post(const T &) {}
+
+  template  bool Pre(const parser::Statement &statement) {
+    currentStatementSourcePosition_ = statement.source;
+    return true;
+  }
+
+  void Post(const parser::ReturnStmt &) { emitBranchOutError("RETURN"); }
+  void Post(const parser::ExitStmt &) { emitBranchOutError("EXIT"); }
+  void Post(const parser::StopStmt &) { emitBranchOutError("STOP"); }
+
+private:
+  parser::MessageFixedText GetEnclosingMsg() {
+    return "Enclosing block construct"_en_US;
+  }
+
+  void emitBranchOutError(const char *stmt) {
+    context_
+        .Say(currentStatementSourcePosition_,
+            "%s statement is not allowed in a %s construct"_err_en_US, stmt,
+            parser::ToUpperCaseLetters(
+                llvm::acc::getOpenACCDirectiveName(currentDirective_).str()))
+        .Attach(sourcePosition_, GetEnclosingMsg());
+  }
+
+  SemanticsContext &context_;
+  parser::CharBlock currentStatementSourcePosition_;
+  parser::CharBlock sourcePosition_;
+  llvm::acc::Directive currentDirective_;
+};
+
+void AccStructureChecker::Enter(const parser::AccClause &x) {
+  SetContextClause(x);
+}
+
+void AccStructureChecker::Leave(const parser::AccClauseList &) {}
+
+void AccStructureChecker::Enter(const parser::OpenACCBlockConstruct &x) {
+  const auto &beginBlockDir{std::get(x.t)};
+  const auto &endBlockDir{std::get(x.t)};
+  const auto &beginAccBlockDir{
+      std::get(beginBlockDir.t)};
+
+  CheckMatching(beginAccBlockDir, endBlockDir.v);
+  PushContextAndClauseSets(beginAccBlockDir.source, beginAccBlockDir.v);
+}
+
+void AccStructureChecker::Leave(const parser::OpenACCBlockConstruct &x) {
+  const auto &beginBlockDir{std::get(x.t)};
+  const auto &blockDir{std::get(beginBlockDir.t)};
+  const parser::Block &block{std::get(x.t)};
+  switch (blockDir.v) {
+  case llvm::acc::Directive::ACCD_kernels:
+  case llvm::acc::Directive::ACCD_parallel:
+    // Restriction - 880-881 (KERNELS)
+    // Restriction - 843-844 (PARALLEL)
+    CheckOnlyAllowedAfter(llvm::acc::Clause::ACCC_device_type,
+        parallelAndKernelsOnlyAllowedAfterDeviceTypeClauses);
+    // Restriction - 877 (KERNELS)
+    // Restriction - 840 (PARALLEL)
+    CheckNoBranching(block, GetContext().directive, blockDir.source);
+    break;
+  case llvm::acc::Directive::ACCD_serial:
+    // Restriction - 919
+    CheckOnlyAllowedAfter(llvm::acc::Clause::ACCC_device_type,
+        serialOnlyAllowedAfterDeviceTypeClauses);
+    // Restriction - 916
+    CheckNoBranching(block, llvm::acc::Directive::ACCD_serial, blockDir.source);
+    break;
+  case llvm::acc::Directive::ACCD_data:
+    // Restriction - 1117-1118
+    CheckRequireAtLeastOneOf();
+    break;
+  case llvm::acc::Directive::ACCD_host_data:
+    // Restriction - 1578
+    CheckRequireAtLeastOneOf();
+    break;
+  default:
+    break;
+  }
+  accContext_.pop_back();
+}
+
+void AccStructureChecker::CheckNoBranching(const parser::Block &block,
+    const llvm::acc::Directive directive,
+    const parser::CharBlock &directiveSource) const {
+  NoBranchingEnforce noBranchingEnforce{context_, directiveSource, directive};
+  parser::Walk(block, noBranchingEnforce);
+}
+
+void AccStructureChecker::Enter(
+    const parser::OpenACCStandaloneDeclarativeConstruct &x) {
+  const auto &declarativeDir{std::get(x.t)};
+  PushContextAndClauseSets(declarativeDir.source, declarativeDir.v);
+}
+
+void AccStructureChecker::Leave(
+    const parser::OpenACCStandaloneDeclarativeConstruct &) {
+  // Restriction - 2075
+  CheckAtLeastOneClause();
+  accContext_.pop_back();
+}
+
+void AccStructureChecker::Enter(const parser::OpenACCCombinedConstruct &x) {
+  const auto &beginBlockDir{std::get(x.t)};
+  const auto &combinedDir{
+      std::get(beginBlockDir.t)};
+  PushContextAndClauseSets(combinedDir.source, combinedDir.v);
+}
+
+void AccStructureChecker::Leave(const parser::OpenACCCombinedConstruct &x) {
+  const auto &beginBlockDir{std::get(x.t)};
+  const auto &combinedDir{
+      std::get(beginBlockDir.t)};
+  switch (combinedDir.v) {
+  case llvm::acc::Directive::ACCD_kernels_loop:
+  case llvm::acc::Directive::ACCD_parallel_loop:
+    // Restriction - 1962 -> (880-881) (KERNELS LOOP)
+    // Restriction - 1962 -> (843-844) (PARALLEL LOOP)
+    CheckOnlyAllowedAfter(llvm::acc::Clause::ACCC_device_type,
+        {llvm::acc::Clause::ACCC_async, llvm::acc::Clause::ACCC_wait,
+            llvm::acc::Clause::ACCC_num_gangs,
+            llvm::acc::Clause::ACCC_num_workers,
+            llvm::acc::Clause::ACCC_vector_length});
+    break;
+  case llvm::acc::Directive::ACCD_serial_loop:
+    // Restriction - 1962 -> (919) (SERIAL LOOP)
+    CheckOnlyAllowedAfter(llvm::acc::Clause::ACCC_device_type,
+        {llvm::acc::Clause::ACCC_async, llvm::acc::Clause::ACCC_wait});
+    break;
+  default:
+    break;
+  }
+  accContext_.pop_back();
+}
+
+std::string AccStructureChecker::ContextDirectiveAsFortran() {
+  return parser::ToUpperCaseLetters(
+      llvm::acc::getOpenACCDirectiveName(GetContext().directive).str());
+}
+
+void AccStructureChecker::Enter(const parser::OpenACCLoopConstruct &x) {
+  const auto &beginDir{std::get(x.t)};
+  const auto &loopDir{std::get(beginDir.t)};
+  PushContextAndClauseSets(loopDir.source, loopDir.v);
+}
+
+void AccStructureChecker::Leave(const parser::OpenACCLoopConstruct &x) {
+  const auto &beginDir{std::get(x.t)};
+  const auto &loopDir{std::get(beginDir.t)};
+  if (loopDir.v == llvm::acc::Directive::ACCD_loop) {
+    // Restriction - 1615-1616
+    CheckOnlyAllowedAfter(llvm::acc::Clause::ACCC_device_type,
+        loopOnlyAllowedAfterDeviceTypeClauses);
+    // Restriction - 1622
+    CheckNotAllowedIfClause(llvm::acc::Clause::ACCC_seq,
+        {llvm::acc::Clause::ACCC_gang, llvm::acc::Clause::ACCC_vector,
+            llvm::acc::Clause::ACCC_worker});
+  }
+  accContext_.pop_back();
+}
+
+void AccStructureChecker::Enter(const parser::OpenACCStandaloneConstruct &x) {
+  const auto &standaloneDir{std::get(x.t)};
+  PushContextAndClauseSets(standaloneDir.source, standaloneDir.v);
+}
+
+void AccStructureChecker::Leave(const parser::OpenACCStandaloneConstruct &x) {
+  const auto &standaloneDir{std::get(x.t)};
+  switch (standaloneDir.v) {
+  case llvm::acc::Directive::ACCD_enter_data:
+  case llvm::acc::Directive::ACCD_exit_data:
+  case llvm::acc::Directive::ACCD_set:
+    // Restriction - 1117-1118 (ENTER DATA)
+    // Restriction - 1161-1162 (EXIT DATA)
+    // Restriction - 2254 (SET)
+    CheckRequireAtLeastOneOf();
+    break;
+  case llvm::acc::Directive::ACCD_update:
+    // Restriction - 2301
+    CheckOnlyAllowedAfter(llvm::acc::Clause::ACCC_device_type,
+        updateOnlyAllowedAfterDeviceTypeClauses);
+    break;
+  default:
+    break;
+  }
+  accContext_.pop_back();
+}
+
+void AccStructureChecker::Enter(const parser::OpenACCRoutineConstruct &x) {
+  PushContextAndClauseSets(x.source, llvm::acc::Directive::ACCD_routine);
+}
+void AccStructureChecker::Leave(const parser::OpenACCRoutineConstruct &) {
+  // Restriction - 2409
+  CheckRequireAtLeastOneOf();
+  // Restriction - 2407-2408
+  CheckOnlyAllowedAfter(llvm::acc::Clause::ACCC_device_type,
+      routineOnlyAllowedAfterDeviceTypeClauses);
+  accContext_.pop_back();
+}
+
+// Clause checkers
+CHECK_REQ_SCALAR_INT_CONSTANT_CLAUSE(Collapse, ACCC_collapse)
+
+CHECK_SIMPLE_CLAUSE(Auto, ACCC_auto)
+CHECK_SIMPLE_CLAUSE(Async, ACCC_async)
+CHECK_SIMPLE_CLAUSE(Attach, ACCC_attach)
+CHECK_SIMPLE_CLAUSE(Bind, ACCC_bind)
+CHECK_SIMPLE_CLAUSE(Capture, ACCC_capture)
+CHECK_SIMPLE_CLAUSE(Copy, ACCC_copy)
+CHECK_SIMPLE_CLAUSE(Default, ACCC_default)
+CHECK_SIMPLE_CLAUSE(DefaultAsync, ACCC_default_async)
+CHECK_SIMPLE_CLAUSE(Delete, ACCC_delete)
+CHECK_SIMPLE_CLAUSE(Detach, ACCC_detach)
+CHECK_SIMPLE_CLAUSE(Device, ACCC_device)
+CHECK_SIMPLE_CLAUSE(DeviceNum, ACCC_device_num)
+CHECK_SIMPLE_CLAUSE(DevicePtr, ACCC_deviceptr)
+CHECK_SIMPLE_CLAUSE(DeviceResident, ACCC_device_resident)
+CHECK_SIMPLE_CLAUSE(DeviceType, ACCC_device_type)
+CHECK_SIMPLE_CLAUSE(Finalize, ACCC_finalize)
+CHECK_SIMPLE_CLAUSE(FirstPrivate, ACCC_firstprivate)
+CHECK_SIMPLE_CLAUSE(Gang, ACCC_gang)
+CHECK_SIMPLE_CLAUSE(Host, ACCC_host)
+CHECK_SIMPLE_CLAUSE(If, ACCC_if)
+CHECK_SIMPLE_CLAUSE(IfPresent, ACCC_if_present)
+CHECK_SIMPLE_CLAUSE(Independent, ACCC_independent)
+CHECK_SIMPLE_CLAUSE(Link, ACCC_link)
+CHECK_SIMPLE_CLAUSE(NoCreate, ACCC_no_create)
+CHECK_SIMPLE_CLAUSE(NoHost, ACCC_nohost)
+CHECK_SIMPLE_CLAUSE(NumGangs, ACCC_num_gangs)
+CHECK_SIMPLE_CLAUSE(NumWorkers, ACCC_num_workers)
+CHECK_SIMPLE_CLAUSE(Present, ACCC_present)
+CHECK_SIMPLE_CLAUSE(Private, ACCC_private)
+CHECK_SIMPLE_CLAUSE(Read, ACCC_read)
+CHECK_SIMPLE_CLAUSE(Reduction, ACCC_reduction)
+CHECK_SIMPLE_CLAUSE(Self, ACCC_self)
+CHECK_SIMPLE_CLAUSE(Seq, ACCC_seq)
+CHECK_SIMPLE_CLAUSE(Tile, ACCC_tile)
+CHECK_SIMPLE_CLAUSE(UseDevice, ACCC_use_device)
+CHECK_SIMPLE_CLAUSE(Vector, ACCC_vector)
+CHECK_SIMPLE_CLAUSE(VectorLength, ACCC_vector_length)
+CHECK_SIMPLE_CLAUSE(Wait, ACCC_wait)
+CHECK_SIMPLE_CLAUSE(Worker, ACCC_worker)
+CHECK_SIMPLE_CLAUSE(Write, ACCC_write)
+
+void AccStructureChecker::Enter(const parser::AccClause::Create &c) {
+  CheckAllowed(llvm::acc::Clause::ACCC_create);
+  const auto &modifierClause{c.v};
+  if (const auto &modifier{
+          std::get>(modifierClause.t)}) {
+    if (modifier->v != parser::AccDataModifier::Modifier::Zero) {
+      context_.Say(GetContext().clauseSource,
+          "Only the ZERO modifier is allowed for the %s clause "
+          "on the %s directive"_err_en_US,
+          parser::ToUpperCaseLetters(
+              llvm::acc::getOpenACCClauseName(llvm::acc::Clause::ACCC_create)
+                  .str()),
+          ContextDirectiveAsFortran());
+    }
+  }
+}
+
+void AccStructureChecker::Enter(const parser::AccClause::Copyin &c) {
+  CheckAllowed(llvm::acc::Clause::ACCC_copyin);
+  const auto &modifierClause{c.v};
+  if (const auto &modifier{
+          std::get>(modifierClause.t)}) {
+    if (modifier->v != parser::AccDataModifier::Modifier::ReadOnly) {
+      context_.Say(GetContext().clauseSource,
+          "Only the READONLY modifier is allowed for the %s clause "
+          "on the %s directive"_err_en_US,
+          parser::ToUpperCaseLetters(
+              llvm::acc::getOpenACCClauseName(llvm::acc::Clause::ACCC_copyin)
+                  .str()),
+          ContextDirectiveAsFortran());
+    }
+  }
+}
+
+void AccStructureChecker::Enter(const parser::AccClause::Copyout &c) {
+  CheckAllowed(llvm::acc::Clause::ACCC_copyout);
+  const auto &modifierClause{c.v};
+  if (const auto &modifier{
+          std::get>(modifierClause.t)}) {
+    if (modifier->v != parser::AccDataModifier::Modifier::Zero) {
+      context_.Say(GetContext().clauseSource,
+          "Only the ZERO modifier is allowed for the %s clause "
+          "on the %s directive"_err_en_US,
+          parser::ToUpperCaseLetters(
+              llvm::acc::getOpenACCClauseName(llvm::acc::Clause::ACCC_copyout)
+                  .str()),
+          ContextDirectiveAsFortran());
+    }
+  }
+}
+
+void AccStructureChecker::CheckAllowed(llvm::acc::Clause clause) {
+  if (!GetContext().allowedClauses.test(clause) &&
+      !GetContext().allowedOnceClauses.test(clause) &&
+      !GetContext().allowedExclusiveClauses.test(clause) &&
+      !GetContext().requiredClauses.test(clause)) {
+    context_.Say(GetContext().clauseSource,
+        "%s clause is not allowed on the %s directive"_err_en_US,
+        parser::ToUpperCaseLetters(
+            llvm::acc::getOpenACCClauseName(clause).str()),
+        parser::ToUpperCaseLetters(GetContext().directiveSource.ToString()));
+    return;
+  }
+  if ((GetContext().allowedOnceClauses.test(clause) ||
+          GetContext().allowedExclusiveClauses.test(clause)) &&
+      FindClause(clause)) {
+    context_.Say(GetContext().clauseSource,
+        "At most one %s clause can appear on the %s directive"_err_en_US,
+        parser::ToUpperCaseLetters(
+            llvm::acc::getOpenACCClauseName(clause).str()),
+        parser::ToUpperCaseLetters(GetContext().directiveSource.ToString()));
+    return;
+  }
+  if (GetContext().allowedExclusiveClauses.test(clause)) {
+    std::vector others;
+    GetContext().allowedExclusiveClauses.IterateOverMembers(
+        [&](llvm::acc::Clause o) {
+          if (FindClause(o)) {
+            others.emplace_back(o);
+          }
+        });
+    for (const auto &e : others) {
+      context_.Say(GetContext().clauseSource,
+          "%s and %s clauses are mutually exclusive and may not appear on the "
+          "same %s directive"_err_en_US,
+          parser::ToUpperCaseLetters(
+              llvm::acc::getOpenACCClauseName(clause).str()),
+          parser::ToUpperCaseLetters(llvm::acc::getOpenACCClauseName(e).str()),
+          parser::ToUpperCaseLetters(GetContext().directiveSource.ToString()));
+    }
+    if (!others.empty()) {
+      return;
+    }
+  }
+  SetContextClauseInfo(clause);
+  AddClauseToCrtContext(clause);
+}
+
+void AccStructureChecker::CheckOnlyAllowedAfter(
+    llvm::acc::Clause clause, AccClauseSet set) {
+  bool enforceCheck = false;
+  for (auto cl : GetContext().actualClauses) {
+    if (cl == clause) {
+      enforceCheck = true;
+      continue;
+    } else if (enforceCheck && !set.test(cl)) {
+      auto parserClause = GetContext().clauseInfo.find(cl);
+      context_.Say(parserClause->second->source,
+          "Clause %s is not allowed after clause %s on the %s "
+          "directive"_err_en_US,
+          parser::ToUpperCaseLetters(llvm::acc::getOpenACCClauseName(cl).str()),
+          parser::ToUpperCaseLetters(
+              llvm::acc::getOpenACCClauseName(clause).str()),
+          ContextDirectiveAsFortran());
+    }
+  }
+}
+
+void AccStructureChecker::CheckRequireAtLeastOneOf() {
+  for (auto cl : GetContext().actualClauses) {
+    if (GetContext().requiredClauses.test(cl))
+      return;
+  }
+  // No clause matched in the actual clauses list
+  context_.Say(GetContext().directiveSource,
+      "At least one of %s clause must appear on the %s directive"_err_en_US,
+      ClauseSetToString(GetContext().requiredClauses),
+      ContextDirectiveAsFortran());
+}
+
+void AccStructureChecker::CheckAtLeastOneClause() {
+  if (GetContext().actualClauses.empty()) {
+    context_.Say(GetContext().directiveSource,
+        "At least one clause is required on the %s directive"_err_en_US,
+        ContextDirectiveAsFortran());
+  }
+}
+
+// Enforce restriction where clauses in the given set are not allowed if the
+// given clause appears.
+void AccStructureChecker::CheckNotAllowedIfClause(
+    llvm::acc::Clause clause, AccClauseSet set) {
+  if (std::find(GetContext().actualClauses.begin(),
+          GetContext().actualClauses.end(),
+          clause) == GetContext().actualClauses.end()) {
+    return; // Clause is not present
+  }
+
+  for (auto cl : GetContext().actualClauses) {
+    if (set.test(cl)) {
+      context_.Say(GetContext().directiveSource,
+          "Clause %s is not allowed if clause %s appears on the %s directive"_err_en_US,
+          parser::ToUpperCaseLetters(llvm::acc::getOpenACCClauseName(cl).str()),
+          parser::ToUpperCaseLetters(
+              llvm::acc::getOpenACCClauseName(clause).str()),
+          ContextDirectiveAsFortran());
+    }
+  }
+}
+
+void AccStructureChecker::RequiresConstantPositiveParameter(
+    const llvm::acc::Clause &clause, const parser::ScalarIntConstantExpr &i) {
+  if (const auto v{GetIntValue(i)}) {
+    if (*v <= 0) {
+      context_.Say(GetContext().clauseSource,
+          "The parameter of the %s clause on the %s directive must be "
+          "a constant positive integer expression"_err_en_US,
+          parser::ToUpperCaseLetters(
+              llvm::acc::getOpenACCClauseName(clause).str()),
+          ContextDirectiveAsFortran());
+    }
+  }
+}
+
+void AccStructureChecker::OptionalConstantPositiveParameter(
+    const llvm::acc::Clause &clause,
+    const std::optional &o) {
+  if (o != std::nullopt) {
+    RequiresConstantPositiveParameter(clause, o.value());
+  }
+}
+
+std::string AccStructureChecker::ClauseSetToString(const AccClauseSet set) {
+  std::string list;
+  set.IterateOverMembers([&](llvm::acc::Clause o) {
+    if (!list.empty())
+      list.append(", ");
+    list.append(
+        parser::ToUpperCaseLetters(llvm::acc::getOpenACCClauseName(o).str()));
+  });
+  return list;
+}
+
+void AccStructureChecker::SayNotMatching(
+    const parser::CharBlock &beginSource, const parser::CharBlock &endSource) {
+  context_
+      .Say(endSource, "Unmatched %s directive"_err_en_US,
+          parser::ToUpperCaseLetters(endSource.ToString()))
+      .Attach(beginSource, "Does not match directive"_en_US);
+}
+
+} // namespace Fortran::semantics
diff --git a/flang/lib/Semantics/check-acc-structure.h b/flang/lib/Semantics/check-acc-structure.h
new file mode 100644
index 0000000000000..fef12383952db
--- /dev/null
+++ b/flang/lib/Semantics/check-acc-structure.h
@@ -0,0 +1,204 @@
+//===-- lib/Semantics/check-acc-structure.h ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// OpenACC structure validity check list
+//    1. invalid clauses on directive
+//    2. invalid repeated clauses on directive
+//    3. invalid nesting of regions
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_SEMANTICS_CHECK_ACC_STRUCTURE_H_
+#define FORTRAN_SEMANTICS_CHECK_ACC_STRUCTURE_H_
+
+#include "flang/Common/enum-set.h"
+#include "flang/Parser/parse-tree.h"
+#include "flang/Semantics/semantics.h"
+#include "llvm/Frontend/OpenACC/ACC.h.inc"
+
+#include 
+
+using AccDirectiveSet = Fortran::common::EnumSet;
+
+using AccClauseSet =
+    Fortran::common::EnumSet;
+
+#define GEN_FLANG_DIRECTIVE_CLAUSE_SETS
+#include "llvm/Frontend/OpenACC/ACC.cpp.inc"
+
+namespace Fortran::semantics {
+
+class AccStructureChecker : public virtual BaseChecker {
+public:
+  AccStructureChecker(SemanticsContext &context) : context_{context} {}
+
+  // Construct and directives
+  void Enter(const parser::OpenACCBlockConstruct &);
+  void Leave(const parser::OpenACCBlockConstruct &);
+  void Enter(const parser::OpenACCCombinedConstruct &);
+  void Leave(const parser::OpenACCCombinedConstruct &);
+  void Enter(const parser::OpenACCLoopConstruct &);
+  void Leave(const parser::OpenACCLoopConstruct &);
+  void Enter(const parser::OpenACCRoutineConstruct &);
+  void Leave(const parser::OpenACCRoutineConstruct &);
+  void Enter(const parser::OpenACCStandaloneConstruct &);
+  void Leave(const parser::OpenACCStandaloneConstruct &);
+  void Enter(const parser::OpenACCStandaloneDeclarativeConstruct &);
+  void Leave(const parser::OpenACCStandaloneDeclarativeConstruct &);
+
+  // Clauses
+  void Leave(const parser::AccClauseList &);
+  void Enter(const parser::AccClause &);
+
+  void Enter(const parser::AccClause::Auto &);
+  void Enter(const parser::AccClause::Async &);
+  void Enter(const parser::AccClause::Attach &);
+  void Enter(const parser::AccClause::Bind &);
+  void Enter(const parser::AccClause::Capture &);
+  void Enter(const parser::AccClause::Create &);
+  void Enter(const parser::AccClause::Collapse &);
+  void Enter(const parser::AccClause::Copy &);
+  void Enter(const parser::AccClause::Copyin &);
+  void Enter(const parser::AccClause::Copyout &);
+  void Enter(const parser::AccClause::Default &);
+  void Enter(const parser::AccClause::DefaultAsync &);
+  void Enter(const parser::AccClause::Delete &);
+  void Enter(const parser::AccClause::Detach &);
+  void Enter(const parser::AccClause::Device &);
+  void Enter(const parser::AccClause::DeviceNum &);
+  void Enter(const parser::AccClause::DevicePtr &);
+  void Enter(const parser::AccClause::DeviceResident &);
+  void Enter(const parser::AccClause::DeviceType &);
+  void Enter(const parser::AccClause::Finalize &);
+  void Enter(const parser::AccClause::FirstPrivate &);
+  void Enter(const parser::AccClause::Gang &);
+  void Enter(const parser::AccClause::Host &);
+  void Enter(const parser::AccClause::If &);
+  void Enter(const parser::AccClause::IfPresent &);
+  void Enter(const parser::AccClause::Independent &);
+  void Enter(const parser::AccClause::Link &);
+  void Enter(const parser::AccClause::NoCreate &);
+  void Enter(const parser::AccClause::NoHost &);
+  void Enter(const parser::AccClause::NumGangs &);
+  void Enter(const parser::AccClause::NumWorkers &);
+  void Enter(const parser::AccClause::Present &);
+  void Enter(const parser::AccClause::Private &);
+  void Enter(const parser::AccClause::Read &);
+  void Enter(const parser::AccClause::Reduction &);
+  void Enter(const parser::AccClause::Self &);
+  void Enter(const parser::AccClause::Seq &);
+  void Enter(const parser::AccClause::Tile &);
+  void Enter(const parser::AccClause::UseDevice &);
+  void Enter(const parser::AccClause::Vector &);
+  void Enter(const parser::AccClause::VectorLength &);
+  void Enter(const parser::AccClause::Wait &);
+  void Enter(const parser::AccClause::Worker &);
+  void Enter(const parser::AccClause::Write &);
+
+private:
+#define GEN_FLANG_DIRECTIVE_CLAUSE_MAP
+#include "llvm/Frontend/OpenACC/ACC.cpp.inc"
+
+  struct AccContext {
+    AccContext(parser::CharBlock source, llvm::acc::Directive d)
+        : directiveSource{source}, directive{d} {}
+
+    parser::CharBlock directiveSource{nullptr};
+    parser::CharBlock clauseSource{nullptr};
+    llvm::acc::Directive directive;
+    AccClauseSet allowedClauses{};
+    AccClauseSet allowedOnceClauses{};
+    AccClauseSet allowedExclusiveClauses{};
+    AccClauseSet requiredClauses{};
+
+    const parser::AccClause *clause{nullptr};
+    std::multimap clauseInfo;
+    std::list actualClauses;
+  };
+
+  // back() is the top of the stack
+  AccContext &GetContext() {
+    CHECK(!accContext_.empty());
+    return accContext_.back();
+  }
+
+  void SetContextClause(const parser::AccClause &clause) {
+    GetContext().clauseSource = clause.source;
+    GetContext().clause = &clause;
+  }
+
+  void SetContextClauseInfo(llvm::acc::Clause type) {
+    GetContext().clauseInfo.emplace(type, GetContext().clause);
+  }
+
+  void AddClauseToCrtContext(llvm::acc::Clause type) {
+    GetContext().actualClauses.push_back(type);
+  }
+
+  const parser::AccClause *FindClause(llvm::acc::Clause type) {
+    auto it{GetContext().clauseInfo.find(type)};
+    if (it != GetContext().clauseInfo.end()) {
+      return it->second;
+    }
+    return nullptr;
+  }
+
+  void PushContext(const parser::CharBlock &source, llvm::acc::Directive dir) {
+    accContext_.emplace_back(source, dir);
+  }
+
+  void SetClauseSets(llvm::acc::Directive dir) {
+    accContext_.back().allowedClauses = directiveClausesTable[dir].allowed;
+    accContext_.back().allowedOnceClauses =
+        directiveClausesTable[dir].allowedOnce;
+    accContext_.back().allowedExclusiveClauses =
+        directiveClausesTable[dir].allowedExclusive;
+    accContext_.back().requiredClauses =
+        directiveClausesTable[dir].requiredOneOf;
+  }
+  void PushContextAndClauseSets(
+      const parser::CharBlock &source, llvm::acc::Directive dir) {
+    PushContext(source, dir);
+    SetClauseSets(dir);
+  }
+
+  void SayNotMatching(const parser::CharBlock &, const parser::CharBlock &);
+
+  template  void CheckMatching(const B &beginDir, const B &endDir) {
+    const auto &begin{beginDir.v};
+    const auto &end{endDir.v};
+    if (begin != end) {
+      SayNotMatching(beginDir.source, endDir.source);
+    }
+  }
+
+  // Check that only clauses in set are after the specific clauses.
+  void CheckOnlyAllowedAfter(llvm::acc::Clause clause, AccClauseSet set);
+  void CheckRequireAtLeastOneOf();
+  void CheckAllowed(llvm::acc::Clause clause);
+  void CheckAtLeastOneClause();
+  void CheckNotAllowedIfClause(llvm::acc::Clause clause, AccClauseSet set);
+  std::string ContextDirectiveAsFortran();
+
+  void CheckNoBranching(const parser::Block &block,
+      const llvm::acc::Directive directive,
+      const parser::CharBlock &directiveSource) const;
+
+  void RequiresConstantPositiveParameter(
+      const llvm::acc::Clause &clause, const parser::ScalarIntConstantExpr &i);
+  void OptionalConstantPositiveParameter(const llvm::acc::Clause &clause,
+      const std::optional &o);
+
+  SemanticsContext &context_;
+  std::vector accContext_; // used as a stack
+
+  std::string ClauseSetToString(const AccClauseSet set);
+};
+
+} // namespace Fortran::semantics
+
+#endif // FORTRAN_SEMANTICS_CHECK_ACC_STRUCTURE_H_
diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp
index c98f7a542be78..a1f5be231fd9a 100644
--- a/flang/lib/Semantics/check-declarations.cpp
+++ b/flang/lib/Semantics/check-declarations.cpp
@@ -45,26 +45,8 @@ class CheckHelper {
 
 private:
   template  void CheckSpecExpr(const A &x) {
-    if (symbolBeingChecked_ && IsSaved(*symbolBeingChecked_)) {
-      if (!evaluate::IsConstantExpr(x)) {
-        messages_.Say(
-            "Specification expression must be constant in declaration of '%s' with the SAVE attribute"_err_en_US,
-            symbolBeingChecked_->name());
-      }
-    } else {
-      evaluate::CheckSpecificationExpr(
-          x, messages_, DEREF(scope_), context_.intrinsics());
-    }
-  }
-  template  void CheckSpecExpr(const std::optional &x) {
-    if (x) {
-      CheckSpecExpr(*x);
-    }
-  }
-  template  void CheckSpecExpr(A &x) {
-    x = Fold(foldingContext_, std::move(x));
-    const A &constx{x};
-    CheckSpecExpr(constx);
+    evaluate::CheckSpecificationExpr(
+        x, messages_, DEREF(scope_), context_.intrinsics());
   }
   void CheckValue(const Symbol &, const DerivedTypeSpec *);
   void CheckVolatile(
@@ -120,7 +102,6 @@ class CheckHelper {
   // This symbol is the one attached to the innermost enclosing scope
   // that has a symbol.
   const Symbol *innermostSymbol_{nullptr};
-  const Symbol *symbolBeingChecked_{nullptr};
 };
 
 void CheckHelper::Check(const ParamValue &value, bool canBeAssumed) {
@@ -295,6 +276,12 @@ void CheckHelper::Check(const Symbol &symbol) {
     messages_.Say(
         "A CONTIGUOUS component must be an array with the POINTER attribute"_err_en_US);
   }
+  if (symbol.owner().IsModule() && IsAutomatic(symbol)) {
+    messages_.Say(
+        "Automatic data object '%s' may not appear in the specification part"
+        " of a module"_err_en_US,
+        symbol.name());
+  }
 }
 
 void CheckHelper::CheckValue(
@@ -388,13 +375,10 @@ void CheckHelper::CheckAssumedTypeEntity( // C709
 
 void CheckHelper::CheckObjectEntity(
     const Symbol &symbol, const ObjectEntityDetails &details) {
-  CHECK(!symbolBeingChecked_);
-  symbolBeingChecked_ = &symbol; // for specification expr checks
   CheckArraySpec(symbol, details.shape());
   Check(details.shape());
   Check(details.coshape());
   CheckAssumedTypeEntity(symbol, details);
-  symbolBeingChecked_ = nullptr;
   if (!details.coshape().empty()) {
     bool isDeferredShape{details.coshape().IsDeferredShape()};
     if (IsAllocatable(symbol)) {
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index b4e86faffe195..d857d36ed05d9 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -9,62 +9,9 @@
 #include "check-omp-structure.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/tools.h"
-#include 
 
 namespace Fortran::semantics {
 
-static OmpClauseSet doAllowedClauses{llvm::omp::Clause::OMPC_private,
-    llvm::omp::Clause::OMPC_firstprivate, llvm::omp::Clause::OMPC_lastprivate,
-    llvm::omp::Clause::OMPC_linear, llvm::omp::Clause::OMPC_reduction};
-static OmpClauseSet doAllowedOnceClauses{llvm::omp::Clause::OMPC_schedule,
-    llvm::omp::Clause::OMPC_collapse, llvm::omp::Clause::OMPC_ordered};
-
-static OmpClauseSet simdAllowedClauses{llvm::omp::Clause::OMPC_linear,
-    llvm::omp::Clause::OMPC_aligned, llvm::omp::Clause::OMPC_private,
-    llvm::omp::Clause::OMPC_lastprivate, llvm::omp::Clause::OMPC_reduction};
-static OmpClauseSet simdAllowedOnceClauses{llvm::omp::Clause::OMPC_collapse,
-    llvm::omp::Clause::OMPC_safelen, llvm::omp::Clause::OMPC_simdlen};
-
-static OmpClauseSet parallelAllowedClauses{llvm::omp::Clause::OMPC_default,
-    llvm::omp::Clause::OMPC_private, llvm::omp::Clause::OMPC_firstprivate,
-    llvm::omp::Clause::OMPC_shared, llvm::omp::Clause::OMPC_copyin,
-    llvm::omp::Clause::OMPC_reduction};
-static OmpClauseSet parallelAllowedOnceClauses{llvm::omp::Clause::OMPC_if,
-    llvm::omp::Clause::OMPC_num_threads, llvm::omp::Clause::OMPC_proc_bind};
-
-static OmpClauseSet taskloopAllowedClauses{llvm::omp::Clause::OMPC_shared,
-    llvm::omp::Clause::OMPC_private, llvm::omp::Clause::OMPC_firstprivate,
-    llvm::omp::Clause::OMPC_lastprivate, llvm::omp::Clause::OMPC_default,
-    llvm::omp::Clause::OMPC_untied, llvm::omp::Clause::OMPC_mergeable,
-    llvm::omp::Clause::OMPC_nogroup};
-static OmpClauseSet taskloopAllowedOnceClauses{llvm::omp::Clause::OMPC_collapse,
-    llvm::omp::Clause::OMPC_if, llvm::omp::Clause::OMPC_final,
-    llvm::omp::Clause::OMPC_priority};
-static OmpClauseSet taskloopAllowedExclusiveClauses{
-    llvm::omp::Clause::OMPC_grainsize, llvm::omp::Clause::OMPC_num_tasks};
-
-static OmpClauseSet distributeAllowedClauses{llvm::omp::Clause::OMPC_private,
-    llvm::omp::Clause::OMPC_firstprivate, llvm::omp::Clause::OMPC_lastprivate};
-static OmpClauseSet distributeAllowedOnceClauses{
-    llvm::omp::Clause::OMPC_collapse, llvm::omp::Clause::OMPC_dist_schedule};
-
-static OmpClauseSet targetAllowedClauses{llvm::omp::Clause::OMPC_if,
-    llvm::omp::Clause::OMPC_private, llvm::omp::Clause::OMPC_firstprivate,
-    llvm::omp::Clause::OMPC_map, llvm::omp::Clause::OMPC_is_device_ptr,
-    llvm::omp::Clause::OMPC_depend};
-static OmpClauseSet targetAllowedOnceClauses{llvm::omp::Clause::OMPC_device,
-    llvm::omp::Clause::OMPC_defaultmap, llvm::omp::Clause::OMPC_nowait};
-
-static OmpClauseSet teamsAllowedClauses{llvm::omp::Clause::OMPC_private,
-    llvm::omp::Clause::OMPC_firstprivate, llvm::omp::Clause::OMPC_shared,
-    llvm::omp::Clause::OMPC_reduction};
-static OmpClauseSet teamsAllowedOnceClauses{llvm::omp::Clause::OMPC_num_teams,
-    llvm::omp::Clause::OMPC_thread_limit, llvm::omp::Clause::OMPC_default};
-
-static OmpClauseSet sectionsAllowedClauses{llvm::omp::Clause::OMPC_private,
-    llvm::omp::Clause::OMPC_firstprivate, llvm::omp::Clause::OMPC_lastprivate,
-    llvm::omp::Clause::OMPC_reduction};
-
 std::string OmpStructureChecker::ContextDirectiveAsFortran() {
   auto dir = llvm::omp::getOpenMPDirectiveName(GetContext().directive).str();
   std::transform(dir.begin(), dir.end(), dir.begin(),
@@ -186,19 +133,18 @@ void OmpStructureChecker::Enter(const parser::OpenMPLoopConstruct &x) {
     CheckMatching(beginLoopDir, *endLoopDir);
   }
 
-  if (beginDir.v != llvm::omp::Directive::OMPD_do)
-    PushContext(beginDir.source, beginDir.v);
+  if (beginDir.v != llvm::omp::Directive::OMPD_do) {
+    PushContextAndClauseSets(beginDir.source, beginDir.v);
+  } else {
+    // 2.7.1 do-clause -> private-clause |
+    //                    firstprivate-clause |
+    //                    lastprivate-clause |
+    //                    linear-clause |
+    //                    reduction-clause |
+    //                    schedule-clause |
+    //                    collapse-clause |
+    //                    ordered-clause
 
-  switch (beginDir.v) {
-  // 2.7.1 do-clause -> private-clause |
-  //                    firstprivate-clause |
-  //                    lastprivate-clause |
-  //                    linear-clause |
-  //                    reduction-clause |
-  //                    schedule-clause |
-  //                    collapse-clause |
-  //                    ordered-clause
-  case llvm::omp::Directive::OMPD_do: {
     // nesting check
     HasInvalidWorksharingNesting(beginDir.source,
         {llvm::omp::Directive::OMPD_do, llvm::omp::Directive::OMPD_sections,
@@ -210,218 +156,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPLoopConstruct &x) {
             llvm::omp::Directive::OMPD_ordered,
             llvm::omp::Directive::OMPD_atomic,
             llvm::omp::Directive::OMPD_master});
-    PushContext(beginDir.source, llvm::omp::Directive::OMPD_do);
-    SetContextAllowed(doAllowedClauses);
-    SetContextAllowedOnce(doAllowedOnceClauses);
-  } break;
-
-  // 2.11.1 parallel-do-clause -> parallel-clause |
-  //                              do-clause
-  case llvm::omp::Directive::OMPD_parallel_do: {
-    SetContextAllowed(parallelAllowedClauses | doAllowedClauses);
-    SetContextAllowedOnce(parallelAllowedOnceClauses | doAllowedOnceClauses);
-  } break;
-
-  // 2.8.1 simd-clause -> safelen-clause |
-  //                      simdlen-clause |
-  //                      linear-clause |
-  //                      aligned-clause |
-  //                      private-clause |
-  //                      lastprivate-clause |
-  //                      reduction-clause |
-  //                      collapse-clause
-  case llvm::omp::Directive::OMPD_simd: {
-    SetContextAllowed(simdAllowedClauses);
-    SetContextAllowedOnce(simdAllowedOnceClauses);
-  } break;
-
-  // 2.8.3 do-simd-clause -> do-clause |
-  //                         simd-clause
-  case llvm::omp::Directive::OMPD_do_simd: {
-    SetContextAllowed(doAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(doAllowedOnceClauses | simdAllowedOnceClauses);
-  } break;
-
-  // 2.11.4 parallel-do-simd-clause -> parallel-clause |
-  //                                   do-simd-clause
-  case llvm::omp::Directive::OMPD_parallel_do_simd: {
-    SetContextAllowed(
-        parallelAllowedClauses | doAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(parallelAllowedOnceClauses | doAllowedOnceClauses |
-        simdAllowedOnceClauses);
-  } break;
-
-  // 2.9.2 taskloop-clause -> if-clause |
-  //                          shared-clause |
-  //                          private-clause |
-  //                          firstprivate-clause |
-  //                          lastprivate-clause |
-  //                          default-clause |
-  //                          grainsize-clause |
-  //                          num-tasks-clause |
-  //                          collapse-clause |
-  //                          final-clause |
-  //                          priority-clause |
-  //                          untied-clause |
-  //                          mergeable-clause |
-  //                          nogroup-clause
-  case llvm::omp::Directive::OMPD_taskloop: {
-    SetContextAllowed(taskloopAllowedClauses);
-    SetContextAllowedOnce(taskloopAllowedOnceClauses);
-    SetContextAllowedExclusive(taskloopAllowedExclusiveClauses);
-  } break;
-
-  // 2.9.3 taskloop-simd-clause -> taskloop-clause |
-  //                               simd-clause
-  case llvm::omp::Directive::OMPD_taskloop_simd: {
-    SetContextAllowed((taskloopAllowedClauses | simdAllowedClauses) -
-        llvm::omp::Clause::OMPC_reduction);
-    SetContextAllowedOnce(taskloopAllowedOnceClauses | simdAllowedOnceClauses);
-    SetContextAllowedExclusive(taskloopAllowedExclusiveClauses);
-  } break;
-
-  // 2.10.8 distribute-clause -> private-clause |
-  //                             firstprivate-clause |
-  //                             lastprivate-clause |
-  //                             collapse-clause |
-  //                             dist-schedule-clause
-  case llvm::omp::Directive::OMPD_distribute: {
-    SetContextAllowed(distributeAllowedClauses);
-    SetContextAllowedOnce(distributeAllowedOnceClauses);
-  } break;
-
-  // 2.10.9 distribute-simd-clause -> distribute-clause |
-  //                                  simd-clause
-  case llvm::omp::Directive::OMPD_distribute_simd: {
-    SetContextAllowed(distributeAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(
-        distributeAllowedOnceClauses | simdAllowedOnceClauses);
-  } break;
-
-  // 2.10.10 distribute-parallel-do-clause -> distribute-clause |
-  //                                          parallel-do-clause
-  case llvm::omp::Directive::OMPD_distribute_parallel_do: {
-    SetContextAllowed(
-        distributeAllowedClauses | parallelAllowedClauses | doAllowedClauses);
-    SetContextAllowedOnce(distributeAllowedOnceClauses |
-        parallelAllowedOnceClauses | doAllowedOnceClauses);
-  } break;
-
-  // 2.10.11 distribute-parallel-do-simd-clause -> distribute-clause |
-  //                                               parallel-do-simd-clause
-  case llvm::omp::Directive::OMPD_distribute_parallel_do_simd: {
-    SetContextAllowed(distributeAllowedClauses | parallelAllowedClauses |
-        doAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(distributeAllowedOnceClauses |
-        parallelAllowedOnceClauses | doAllowedOnceClauses | simdAllowedClauses);
-  } break;
-
-  // 2.11.6 target-parallel-do-clause -> target-clause |
-  //                                     parallel-do-clause
-  case llvm::omp::Directive::OMPD_target_parallel_do: {
-    SetContextAllowed(
-        targetAllowedClauses | parallelAllowedClauses | doAllowedClauses);
-    SetContextAllowedOnce(
-        (targetAllowedOnceClauses | parallelAllowedOnceClauses |
-            doAllowedOnceClauses) -
-        llvm::omp::Clause::OMPC_nowait);
-  } break;
-
-  // 2.11.7 target-parallel-do-simd-clause -> target-clause |
-  //                                          parallel-do-simd-clause
-  case llvm::omp::Directive::OMPD_target_parallel_do_simd: {
-    SetContextAllowed(targetAllowedClauses | parallelAllowedClauses |
-        doAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(
-        (targetAllowedOnceClauses | parallelAllowedOnceClauses |
-            doAllowedOnceClauses | simdAllowedOnceClauses) -
-        llvm::omp::Clause::OMPC_nowait);
-  } break;
-
-  // 2.11.8 target-simd-clause -> target-clause |
-  //                              simd-clause
-  case llvm::omp::Directive::OMPD_target_simd: {
-    SetContextAllowed(targetAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(targetAllowedOnceClauses | simdAllowedOnceClauses);
-  } break;
-
-  // 2.11.10 teams-distribute-clause -> teams-clause |
-  //                                    distribute-clause
-  case llvm::omp::Directive::OMPD_teams_distribute: {
-    SetContextAllowed(teamsAllowedClauses | distributeAllowedClauses);
-    SetContextAllowedOnce(
-        teamsAllowedOnceClauses | distributeAllowedOnceClauses);
-  } break;
-
-  // 2.11.11 teams-distribute-simd-clause -> teams-clause |
-  //                                         distribute-simd-clause
-  case llvm::omp::Directive::OMPD_teams_distribute_simd: {
-    SetContextAllowed(
-        teamsAllowedClauses | distributeAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(teamsAllowedOnceClauses |
-        distributeAllowedOnceClauses | simdAllowedOnceClauses);
-  } break;
-
-  // 2.11.12 target-teams-distribute-clause -> target-clause |
-  //                                           teams-distribute-clause
-  case llvm::omp::Directive::OMPD_target_teams_distribute: {
-    SetContextAllowed(
-        targetAllowedClauses | teamsAllowedClauses | distributeAllowedClauses);
-    SetContextAllowedOnce(targetAllowedOnceClauses | teamsAllowedOnceClauses |
-        distributeAllowedOnceClauses);
-  } break;
-
-  // 2.11.13 target-teams-distribute-simd-clause -> target-clause |
-  //                                                teams-distribute-simd-clause
-  case llvm::omp::Directive::OMPD_target_teams_distribute_simd: {
-    SetContextAllowed(targetAllowedClauses | teamsAllowedClauses |
-        distributeAllowedClauses | simdAllowedClauses);
-    SetContextAllowed(targetAllowedOnceClauses | teamsAllowedOnceClauses |
-        distributeAllowedOnceClauses | simdAllowedOnceClauses);
-  } break;
-
-  // 2.11.14 teams-distribute-parallel-do-clause -> teams-clause |
-  //                                                distribute-parallel-do-clause
-  case llvm::omp::Directive::OMPD_teams_distribute_parallel_do: {
-    SetContextAllowed(teamsAllowedClauses | distributeAllowedClauses |
-        parallelAllowedClauses | doAllowedClauses);
-    SetContextAllowedOnce(teamsAllowedOnceClauses |
-        distributeAllowedOnceClauses | parallelAllowedOnceClauses |
-        doAllowedOnceClauses);
-  } break;
-
-  // 2.11.15 target-teams-distribute-parallel-do-clause -> target-clause |
-  //                                                       teams-distribute-parallel-do-clause
-  case llvm::omp::Directive::OMPD_target_teams_distribute_parallel_do: {
-    SetContextAllowed(targetAllowedClauses | teamsAllowedClauses |
-        distributeAllowedClauses | parallelAllowedClauses | doAllowedClauses);
-    SetContextAllowedOnce(targetAllowedOnceClauses | teamsAllowedOnceClauses |
-        distributeAllowedOnceClauses | parallelAllowedOnceClauses |
-        doAllowedOnceClauses);
-  } break;
-
-  // 2.11.16 teams-distribute-parallel-do-clause -> teams-clause |
-  //                                                distribute-parallel-do-simd-clause
-  case llvm::omp::Directive::OMPD_teams_distribute_parallel_do_simd: {
-    SetContextAllowed(teamsAllowedClauses | distributeAllowedClauses |
-        parallelAllowedClauses | doAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(teamsAllowedOnceClauses |
-        distributeAllowedOnceClauses | parallelAllowedOnceClauses |
-        doAllowedOnceClauses | simdAllowedOnceClauses);
-  } break;
-
-  case llvm::omp::Directive::OMPD_target_teams_distribute_parallel_do_simd: {
-    SetContextAllowed(targetAllowedClauses | teamsAllowedClauses |
-        distributeAllowedClauses | parallelAllowedClauses | doAllowedClauses |
-        simdAllowedClauses);
-    SetContextAllowedOnce(targetAllowedOnceClauses | teamsAllowedOnceClauses |
-        distributeAllowedOnceClauses | parallelAllowedOnceClauses |
-        doAllowedOnceClauses | simdAllowedOnceClauses);
-  } break;
-
-  default:
-    // TODO others
-    break;
+    PushContextAndClauseSets(beginDir.source, llvm::omp::Directive::OMPD_do);
   }
 }
 
@@ -436,12 +171,8 @@ void OmpStructureChecker::Enter(const parser::OmpEndLoopDirective &x) {
   // 2.7.1 end-do -> END DO [nowait-clause]
   // 2.8.3 end-do-simd -> END DO SIMD [nowait-clause]
   case llvm::omp::Directive::OMPD_do:
-    SetContextDirectiveEnum(llvm::omp::Directive::OMPD_end_do);
-    SetContextAllowed(OmpClauseSet{llvm::omp::Clause::OMPC_nowait});
-    break;
   case llvm::omp::Directive::OMPD_do_simd:
-    SetContextDirectiveEnum(llvm::omp::Directive::OMPD_end_do_simd);
-    SetContextAllowed(OmpClauseSet{llvm::omp::Clause::OMPC_nowait});
+    SetClauseSets(dir.v);
     break;
   default:
     // no clauses are allowed
@@ -455,112 +186,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) {
   const auto &beginDir{
       CheckMatching(beginBlockDir, endBlockDir)};
 
-  PushContext(beginDir.source, beginDir.v);
-  switch (beginDir.v) {
-  // 2.5 parallel-clause -> if-clause |
-  //                        num-threads-clause |
-  //                        default-clause |
-  //                        private-clause |
-  //                        firstprivate-clause |
-  //                        shared-clause |
-  //                        copyin-clause |
-  //                        reduction-clause |
-  //                        proc-bind-clause
-  case llvm::omp::Directive::OMPD_parallel: {
-    // reserve for nesting check
-    SetContextAllowed(parallelAllowedClauses);
-    SetContextAllowedOnce(parallelAllowedOnceClauses);
-  } break;
-  // 2.7.3 single-clause -> private-clause |
-  //                        firstprivate-clause
-  case llvm::omp::Directive::OMPD_single:
-    SetContextAllowed({llvm::omp::Clause::OMPC_private,
-        llvm::omp::Clause::OMPC_firstprivate});
-    break;
-  // 2.7.4 workshare (no clauses are allowed)
-  case llvm::omp::Directive::OMPD_workshare:
-    break;
-  // 2.11.3 parallel-workshare-clause -> parallel-clause
-  case llvm::omp::Directive::OMPD_parallel_workshare: {
-    SetContextAllowed(parallelAllowedClauses);
-    SetContextAllowedOnce(parallelAllowedOnceClauses);
-  } break;
-    // 2.9.1 task-clause -> if-clause |
-    //                      final-clause |
-    //                      untied-clause |
-    //                      default-clause |
-    //                      mergeable-clause |
-    //                      private-clause |
-    //                      firstprivate-clause |
-    //                      shared-clause |
-    //                      depend-clause |
-    //                      priority-clause
-  case llvm::omp::Directive::OMPD_task: {
-    OmpClauseSet allowed{llvm::omp::Clause::OMPC_untied,
-        llvm::omp::Clause::OMPC_default, llvm::omp::Clause::OMPC_mergeable,
-        llvm::omp::Clause::OMPC_private, llvm::omp::Clause::OMPC_firstprivate,
-        llvm::omp::Clause::OMPC_shared, llvm::omp::Clause::OMPC_depend};
-    SetContextAllowed(allowed);
-    OmpClauseSet allowedOnce{llvm::omp::Clause::OMPC_if,
-        llvm::omp::Clause::OMPC_final, llvm::omp::Clause::OMPC_priority};
-    SetContextAllowedOnce(allowedOnce);
-  } break;
-  // 2.10.4 target-clause -> if-clause |
-  //                         device-clause |
-  //                         private-clause |
-  //                         firstprivate-clause |
-  //                         map-clause |
-  //                         is-device-ptr-clause |
-  //                         defaultmap-clause |
-  //                         nowait-clause |
-  //                         depend-clause
-  case llvm::omp::Directive::OMPD_target: {
-    SetContextAllowed(targetAllowedClauses);
-    SetContextAllowedOnce(targetAllowedOnceClauses);
-  } break;
-  // 2.10.7 teams-clause -> num-teams-clause |
-  //                        thread-limit-clause |
-  //                        default-clause |
-  //                        private-clause |
-  //                        firstprivate-clause |
-  //                        shared-clause |
-  //                        reduction-clause
-  case llvm::omp::Directive::OMPD_teams: {
-    SetContextAllowed(teamsAllowedClauses);
-    SetContextAllowedOnce(teamsAllowedOnceClauses);
-  } break;
-  // 2.11.9 target-teams -> target-clause |
-  //                        teams-clause
-  case llvm::omp::Directive::OMPD_target_teams: {
-    SetContextAllowed(targetAllowedClauses | teamsAllowedClauses);
-    SetContextAllowedOnce(targetAllowedOnceClauses | teamsAllowedOnceClauses);
-  } break;
-  // 2.10.1 target-data-clause -> if-clause |
-  //                              device-clause |
-  //                              map-clause |
-  //                              use-device-ptr-clause
-  case llvm::omp::Directive::OMPD_target_data: {
-    OmpClauseSet allowed{llvm::omp::Clause::OMPC_if,
-        llvm::omp::Clause::OMPC_map, llvm::omp::Clause::OMPC_use_device_ptr};
-    SetContextAllowed(allowed);
-    SetContextAllowedOnce({llvm::omp::Clause::OMPC_device});
-    SetContextRequired({llvm::omp::Clause::OMPC_map});
-  } break;
-  // 2.13.1 master (no clauses are allowed)
-  case llvm::omp::Directive::OMPD_master:
-    break;
-  // 2.11.5 target-parallel-clause -> target-clause |
-  //                                  parallel-clause
-  case llvm::omp::Directive::OMPD_target_parallel: {
-    SetContextAllowed((targetAllowedClauses | parallelAllowedClauses) -
-        llvm::omp::Clause::OMPC_copyin);
-    SetContextAllowedOnce(
-        targetAllowedOnceClauses | parallelAllowedOnceClauses);
-  } break;
-  default:
-    // TODO others
-    break;
-  }
+  PushContextAndClauseSets(beginDir.source, beginDir.v);
 }
 
 void OmpStructureChecker::Leave(const parser::OpenMPBlockConstruct &) {
@@ -574,25 +200,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPSectionsConstruct &x) {
   const auto &beginDir{CheckMatching(
       beginSectionsDir, endSectionsDir)};
 
-  PushContext(beginDir.source, beginDir.v);
-  switch (beginDir.v) {
-  // 2.7.2 sections-clause -> private-clause |
-  //                          firstprivate-clause |
-  //                          lastprivate-clause |
-  //                          reduction-clause
-  case llvm::omp::Directive::OMPD_sections: {
-    SetContextAllowed(sectionsAllowedClauses);
-  } break;
-    // 2.11.2 -> parallel-sections-clause -> parallel-clause |
-    //                                       sections-clause
-  case llvm::omp::Directive::OMPD_parallel_sections: {
-    SetContextAllowed(parallelAllowedClauses | sectionsAllowedClauses);
-    SetContextAllowedOnce(parallelAllowedOnceClauses);
-  } break;
-  default:
-    // TODO others
-    break;
-  }
+  PushContextAndClauseSets(beginDir.source, beginDir.v);
 }
 
 void OmpStructureChecker::Leave(const parser::OpenMPSectionsConstruct &) {
@@ -616,19 +224,7 @@ void OmpStructureChecker::Enter(const parser::OmpEndSectionsDirective &x) {
 
 void OmpStructureChecker::Enter(const parser::OpenMPDeclareSimdConstruct &x) {
   const auto &dir{std::get(x.t)};
-  PushContext(dir.source, llvm::omp::Directive::OMPD_declare_simd);
-  // 2.8.2 declare-simd-clause -> simdlen-clause |
-  //                              linear-clause |
-  //                              aligned-clause |
-  //                              uniform-clause |
-  //                              inbranch-clause |
-  //                              notinbranch-clause
-  OmpClauseSet allowed{llvm::omp::Clause::OMPC_linear,
-      llvm::omp::Clause::OMPC_aligned, llvm::omp::Clause::OMPC_uniform};
-  SetContextAllowed(allowed);
-  SetContextAllowedOnce({llvm::omp::Clause::OMPC_simdlen});
-  SetContextAllowedExclusive(
-      {llvm::omp::Clause::OMPC_inbranch, llvm::omp::Clause::OMPC_notinbranch});
+  PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_declare_simd);
 }
 
 void OmpStructureChecker::Leave(const parser::OpenMPDeclareSimdConstruct &) {
@@ -652,57 +248,7 @@ void OmpStructureChecker::Leave(const parser::OpenMPDeclareTargetConstruct &) {
 void OmpStructureChecker::Enter(
     const parser::OpenMPSimpleStandaloneConstruct &x) {
   const auto &dir{std::get(x.t)};
-  PushContext(dir.source, dir.v);
-  switch (dir.v) {
-  case llvm::omp::Directive::OMPD_barrier: {
-    // 2.13.3 barrier
-  } break;
-  case llvm::omp::Directive::OMPD_taskwait: {
-    // 2.13.4 taskwait
-  } break;
-  case llvm::omp::Directive::OMPD_taskyield: {
-    // 2.9.4 taskyield
-  } break;
-  case llvm::omp::Directive::OMPD_target_enter_data: {
-    // 2.10.2 target-enter-data-clause -> if-clause |
-    //                                    device-clause |
-    //                                    map-clause |
-    //                                    depend-clause |
-    //                                    nowait-clause
-    OmpClauseSet allowed{llvm::omp::Clause::OMPC_map,
-        llvm::omp::Clause::OMPC_depend, llvm::omp::Clause::OMPC_nowait};
-    SetContextAllowed(allowed);
-    OmpClauseSet allowedOnce{
-        llvm::omp::Clause::OMPC_device, llvm::omp::Clause::OMPC_if};
-    SetContextAllowedOnce(allowedOnce);
-    SetContextRequired({llvm::omp::Clause::OMPC_map});
-  } break;
-  case llvm::omp::Directive::OMPD_target_exit_data: {
-    // 2.10.3  target-enter-data-clause -> if-clause |
-    //                                     device-clause |
-    //                                     map-clause |
-    //                                     depend-clause |
-    //                                     nowait-clause
-    OmpClauseSet allowed{llvm::omp::Clause::OMPC_map,
-        llvm::omp::Clause::OMPC_depend, llvm::omp::Clause::OMPC_nowait};
-    SetContextAllowed(allowed);
-    OmpClauseSet allowedOnce{
-        llvm::omp::Clause::OMPC_device, llvm::omp::Clause::OMPC_if};
-    SetContextAllowedOnce(allowedOnce);
-    SetContextRequired({llvm::omp::Clause::OMPC_map});
-  } break;
-  case llvm::omp::Directive::OMPD_target_update: {
-    // 2.10.5 target-update
-  } break;
-  case llvm::omp::Directive::OMPD_ordered: {
-    // 2.13.8 ordered-construct-clause -> depend-clause
-    OmpClauseSet allowed{llvm::omp::Clause::OMPC_depend};
-    SetContextAllowed(allowed);
-  } break;
-  default:
-    // TODO others
-    break;
-  }
+  PushContextAndClauseSets(dir.source, dir.v);
 }
 
 void OmpStructureChecker::Leave(
@@ -712,7 +258,7 @@ void OmpStructureChecker::Leave(
 
 void OmpStructureChecker::Enter(const parser::OpenMPFlushConstruct &x) {
   const auto &dir{std::get(x.t)};
-  PushContext(dir.source, llvm::omp::Directive::OMPD_flush);
+  PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_flush);
 }
 
 void OmpStructureChecker::Leave(const parser::OpenMPFlushConstruct &) {
@@ -721,7 +267,7 @@ void OmpStructureChecker::Leave(const parser::OpenMPFlushConstruct &) {
 
 void OmpStructureChecker::Enter(const parser::OpenMPCancelConstruct &x) {
   const auto &dir{std::get(x.t)};
-  PushContext(dir.source, llvm::omp::Directive::OMPD_cancel);
+  PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_cancel);
 }
 
 void OmpStructureChecker::Leave(const parser::OpenMPCancelConstruct &) {
@@ -731,7 +277,8 @@ void OmpStructureChecker::Leave(const parser::OpenMPCancelConstruct &) {
 void OmpStructureChecker::Enter(
     const parser::OpenMPCancellationPointConstruct &x) {
   const auto &dir{std::get(x.t)};
-  PushContext(dir.source, llvm::omp::Directive::OMPD_cancellation_point);
+  PushContextAndClauseSets(
+      dir.source, llvm::omp::Directive::OMPD_cancellation_point);
 }
 
 void OmpStructureChecker::Leave(
diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h
index 1585b0c861add..7fe78a792f19b 100644
--- a/flang/lib/Semantics/check-omp-structure.h
+++ b/flang/lib/Semantics/check-omp-structure.h
@@ -19,12 +19,17 @@
 #include "flang/Semantics/semantics.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 
+#include 
+
 using OmpDirectiveSet = Fortran::common::EnumSet;
 
 using OmpClauseSet =
     Fortran::common::EnumSet;
 
+#define GEN_FLANG_DIRECTIVE_CLAUSE_SETS
+#include "llvm/Frontend/OpenMP/OMP.cpp.inc"
+
 namespace llvm {
 namespace omp {
 static OmpDirectiveSet parallelSet{Directive::OMPD_distribute_parallel_do,
@@ -151,6 +156,9 @@ class OmpStructureChecker : public virtual BaseChecker {
   void Enter(const parser::OmpScheduleClause &);
 
 private:
+#define GEN_FLANG_DIRECTIVE_CLAUSE_MAP
+#include "llvm/Frontend/OpenMP/OMP.cpp.inc"
+
   struct OmpContext {
     OmpContext(parser::CharBlock source, llvm::omp::Directive d)
         : directiveSource{source}, directive{d} {}
@@ -216,7 +224,20 @@ class OmpStructureChecker : public virtual BaseChecker {
   void PushContext(const parser::CharBlock &source, llvm::omp::Directive dir) {
     ompContext_.emplace_back(source, dir);
   }
-
+  void SetClauseSets(llvm::omp::Directive dir) {
+    ompContext_.back().allowedClauses = directiveClausesTable[dir].allowed;
+    ompContext_.back().allowedOnceClauses =
+        directiveClausesTable[dir].allowedOnce;
+    ompContext_.back().allowedExclusiveClauses =
+        directiveClausesTable[dir].allowedExclusive;
+    ompContext_.back().requiredClauses =
+        directiveClausesTable[dir].requiredOneOf;
+  }
+  void PushContextAndClauseSets(
+      const parser::CharBlock &source, llvm::omp::Directive dir) {
+    PushContext(source, dir);
+    SetClauseSets(dir);
+  }
   void RequiresConstantPositiveParameter(
       const llvm::omp::Clause &clause, const parser::ScalarIntConstantExpr &i);
   void RequiresPositiveParameter(
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index f22f8a9669246..4cb43a91ef09d 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -1159,8 +1159,12 @@ class ArrayConstructorContext {
   template  Result Test() {
     if (type_ && type_->category() == T::category) {
       if constexpr (T::category == TypeCategory::Derived) {
-        return AsMaybeExpr(ArrayConstructor{
-            type_->GetDerivedTypeSpec(), MakeSpecific(std::move(values_))});
+        if (type_->IsUnlimitedPolymorphic()) {
+          return std::nullopt;
+        } else {
+          return AsMaybeExpr(ArrayConstructor{type_->GetDerivedTypeSpec(),
+              MakeSpecific(std::move(values_))});
+        }
       } else if (type_->kind() == T::kind) {
         if constexpr (T::category == TypeCategory::Character) {
           if (auto len{type_->LEN()}) {
@@ -1295,6 +1299,13 @@ void ArrayConstructorContext::Add(const parser::AcValue &x) {
             auto restorer{exprAnalyzer_.GetContextualMessages().SetLocation(
                 expr.value().source)};
             if (MaybeExpr v{exprAnalyzer_.Analyze(expr.value())}) {
+              if (auto exprType{v->GetType()}) {
+                if (exprType->IsUnlimitedPolymorphic()) {
+                  exprAnalyzer_.Say(
+                      "Cannot have an unlimited polymorphic value in an "
+                      "array constructor"_err_en_US);
+                }
+              }
               Push(std::move(*v));
             }
           },
diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp
index 2f813d95f26b0..6fa59f0a82a08 100644
--- a/flang/lib/Semantics/mod-file.cpp
+++ b/flang/lib/Semantics/mod-file.cpp
@@ -8,6 +8,7 @@
 
 #include "mod-file.h"
 #include "resolve-names.h"
+#include "flang/Common/restorer.h"
 #include "flang/Evaluate/tools.h"
 #include "flang/Parser/message.h"
 #include "flang/Parser/parsing.h"
@@ -99,6 +100,9 @@ class SubprogramSymbolCollector {
 };
 
 bool ModFileWriter::WriteAll() {
+  // this flag affects character literals: force it to be consistent
+  auto restorer{
+      common::ScopedSet(parser::useHexadecimalEscapeSequences, false)};
   WriteAll(context_.globalScope());
   return !context_.AnyFatalError();
 }
diff --git a/flang/lib/Semantics/mod-file.h b/flang/lib/Semantics/mod-file.h
index 8823c5f1e4972..17ffe804c5be3 100644
--- a/flang/lib/Semantics/mod-file.h
+++ b/flang/lib/Semantics/mod-file.h
@@ -32,7 +32,7 @@ class SemanticsContext;
 
 class ModFileWriter {
 public:
-  ModFileWriter(SemanticsContext &context) : context_{context} {}
+  explicit ModFileWriter(SemanticsContext &context) : context_{context} {}
   bool WriteAll();
 
 private:
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 4d70f03dd5532..73d111ca3c093 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -2310,6 +2310,7 @@ void ModuleVisitor::Post(const parser::UseStmt &x) {
     }
     for (const auto &[name, symbol] : *useModuleScope_) {
       if (symbol->attrs().test(Attr::PUBLIC) &&
+          !symbol->attrs().test(Attr::INTRINSIC) &&
           !symbol->detailsIf()) {
         if (useNames.count(name) == 0) {
           auto *localSymbol{FindInScope(currScope(), name)};
@@ -4451,6 +4452,8 @@ std::optional DeclarationVisitor::CheckSaveAttr(
   } else if (symbol.has() &&
       !symbol.attrs().test(Attr::POINTER)) {
     return "Procedure '%s' with SAVE attribute must also have POINTER attribute"_err_en_US;
+  } else if (IsAutomatic(symbol)) {
+    return "SAVE attribute may not be applied to automatic data object '%s'"_err_en_US;
   } else {
     return std::nullopt;
   }
@@ -6010,7 +6013,8 @@ bool ResolveNamesVisitor::Pre(const parser::SpecificationPart &x) {
   Walk(std::get<1>(x.t));
   Walk(std::get<2>(x.t));
   Walk(std::get<3>(x.t));
-  const std::list &decls{std::get<4>(x.t)};
+  Walk(std::get<4>(x.t));
+  const std::list &decls{std::get<5>(x.t)};
   for (const auto &decl : decls) {
     if (const auto *spec{
             std::get_if(&decl.u)}) {
diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp
index 681e1dc5ca274..e949c92ff6ddd 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -8,8 +8,10 @@
 
 #include "flang/Semantics/semantics.h"
 #include "assignment.h"
+#include "canonicalize-acc.h"
 #include "canonicalize-do.h"
 #include "canonicalize-omp.h"
+#include "check-acc-structure.h"
 #include "check-allocate.h"
 #include "check-arithmeticif.h"
 #include "check-case.h"
@@ -154,12 +156,12 @@ class MiscChecker : public virtual BaseChecker {
 };
 
 using StatementSemanticsPass1 = ExprChecker;
-using StatementSemanticsPass2 = SemanticsVisitor;
+using StatementSemanticsPass2 = SemanticsVisitor;
 
 static bool PerformStatementSemantics(
     SemanticsContext &context, parser::Program &program) {
@@ -325,6 +327,7 @@ SymbolVector SemanticsContext::GetIndexVars(IndexVarKind kind) {
 bool Semantics::Perform() {
   return ValidateLabels(context_, program_) &&
       parser::CanonicalizeDo(program_) && // force line break
+      CanonicalizeAcc(context_.messages(), program_) &&
       CanonicalizeOmp(context_.messages(), program_) &&
       PerformStatementSemantics(context_, program_) &&
       ModFileWriter{context_}.WriteAll();
diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index f710c1107ec16..dd708c7bfb913 100644
--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -34,7 +34,7 @@ static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n,
   common::UnsignedInt128 value{0};
   for (; next; next = io.NextInField(remaining)) {
     char32_t ch{*next};
-    if (ch == ' ') {
+    if (ch == ' ' || ch == '\t') {
       continue;
     }
     int digit{0};
@@ -101,7 +101,7 @@ bool EditIntegerInput(
   common::UnsignedInt128 value;
   for (; next; next = io.NextInField(remaining)) {
     char32_t ch{*next};
-    if (ch == ' ') {
+    if (ch == ' ' || ch == '\t') {
       if (edit.modes.editingFlags & blankZero) {
         ch = '0'; // BZ mode - treat blank as if it were zero
       } else {
@@ -170,7 +170,7 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
   } else if (*next == decimal || (*next >= '0' && *next <= '9')) {
     for (; next; next = io.NextInField(remaining)) {
       char32_t ch{*next};
-      if (ch == ' ') {
+      if (ch == ' ' || ch == '\t') {
         if (edit.modes.editingFlags & blankZero) {
           ch = '0'; // BZ mode - treat blank as if it were zero
         } else {
@@ -229,7 +229,7 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
     return 0;
   }
   if (remaining) {
-    while (next && *next == ' ') {
+    while (next && (*next == ' ' || *next == '\t')) {
       next = io.NextInField(remaining);
     }
     if (next) {
@@ -337,6 +337,9 @@ bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
   }
   if (remaining) { // ignore the rest of the field
     io.HandleRelativePosition(*remaining);
+  } else if (edit.descriptor == DataEdit::ListDirected) {
+    while (io.NextInField(remaining)) { // discard rest of field
+    }
   }
   return true;
 }
@@ -383,6 +386,7 @@ static bool EditListDirectedDefaultCharacterInput(
        next = io.NextInField(remaining)) {
     switch (*next) {
     case ' ':
+    case '\t':
     case ',':
     case ';':
     case '/':
diff --git a/flang/runtime/edit-output.cpp b/flang/runtime/edit-output.cpp
index 941c5ccf3d593..4680c81129ed2 100644
--- a/flang/runtime/edit-output.cpp
+++ b/flang/runtime/edit-output.cpp
@@ -424,7 +424,8 @@ bool EditLogicalOutput(IoStatementState &io, const DataEdit &edit, bool truth) {
   switch (edit.descriptor) {
   case 'L':
   case 'G':
-    return io.Emit(truth ? "T" : "F", 1);
+    return io.EmitRepeated(' ', std::max(0, edit.width.value_or(1) - 1)) &&
+        io.Emit(truth ? "T" : "F", 1);
   default:
     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
         "Data edit descriptor '%c' may not be used with a LOGICAL data item",
diff --git a/flang/runtime/file.cpp b/flang/runtime/file.cpp
index 19c86a9d4b82f..341702df995b8 100644
--- a/flang/runtime/file.cpp
+++ b/flang/runtime/file.cpp
@@ -57,63 +57,86 @@ static int openfile_mkstemp(IoErrorHandler &handler) {
   return fd;
 }
 
-void OpenFile::Open(
-    OpenStatus status, Position position, IoErrorHandler &handler) {
-  int flags{mayRead_ ? mayWrite_ ? O_RDWR : O_RDONLY : O_WRONLY};
-  switch (status) {
-  case OpenStatus::Old:
-    if (fd_ >= 0) {
-      return;
+void OpenFile::Open(OpenStatus status, std::optional action,
+    Position position, IoErrorHandler &handler) {
+  if (fd_ >= 0 &&
+      (status == OpenStatus::Old || status == OpenStatus::Unknown)) {
+    return;
+  }
+  if (fd_ >= 0) {
+    if (fd_ <= 2) {
+      // don't actually close a standard file descriptor, we might need it
+    } else {
+      if (::close(fd_) != 0) {
+        handler.SignalErrno();
+      }
     }
-    knownSize_.reset();
-    break;
-  case OpenStatus::New:
-    flags |= O_CREAT | O_EXCL;
-    knownSize_ = 0;
-    break;
-  case OpenStatus::Scratch:
+    fd_ = -1;
+  }
+  if (status == OpenStatus::Scratch) {
     if (path_.get()) {
       handler.SignalError("FILE= must not appear with STATUS='SCRATCH'");
       path_.reset();
     }
+    if (!action) {
+      action = Action::ReadWrite;
+    }
     fd_ = openfile_mkstemp(handler);
-    knownSize_ = 0;
-    return;
-  case OpenStatus::Replace:
-    flags |= O_CREAT | O_TRUNC;
-    knownSize_ = 0;
-    break;
-  case OpenStatus::Unknown:
-    if (fd_ >= 0) {
+  } else {
+    if (!path_.get()) {
+      handler.SignalError(
+          "FILE= is required unless STATUS='OLD' and unit is connected");
       return;
     }
-    flags |= O_CREAT;
-    knownSize_.reset();
-    break;
-  }
-  // If we reach this point, we're opening a new file.
-  // TODO: Fortran shouldn't create a new file until the first WRITE.
-  if (fd_ >= 0) {
-    if (fd_ <= 2) {
-      // don't actually close a standard file descriptor, we might need it
-    } else if (::close(fd_) != 0) {
-      handler.SignalErrno();
+    int flags{0};
+    if (status != OpenStatus::Old) {
+      flags |= O_CREAT;
+    }
+    if (status == OpenStatus::New) {
+      flags |= O_EXCL;
+    } else if (status == OpenStatus::Replace) {
+      flags |= O_TRUNC;
+    }
+    if (!action) {
+      // Try to open read/write, back off to read-only on failure
+      fd_ = ::open(path_.get(), flags | O_RDWR, 0600);
+      if (fd_ >= 0) {
+        action = Action::ReadWrite;
+      } else {
+        action = Action::Read;
+      }
+    }
+    if (fd_ < 0) {
+      switch (*action) {
+      case Action::Read:
+        flags |= O_RDONLY;
+        break;
+      case Action::Write:
+        flags |= O_WRONLY;
+        break;
+      case Action::ReadWrite:
+        flags |= O_RDWR;
+        break;
+      }
+      fd_ = ::open(path_.get(), flags, 0600);
+      if (fd_ < 0) {
+        handler.SignalErrno();
+      }
     }
   }
-  if (!path_.get()) {
-    handler.SignalError(
-        "FILE= is required unless STATUS='OLD' and unit is connected");
-    return;
-  }
-  fd_ = ::open(path_.get(), flags, 0600);
-  if (fd_ < 0) {
-    handler.SignalErrno();
-  }
+  RUNTIME_CHECK(handler, action.has_value());
   pending_.reset();
   if (position == Position::Append && !RawSeekToEnd()) {
     handler.SignalErrno();
   }
   isTerminal_ = ::isatty(fd_) == 1;
+  mayRead_ = *action != Action::Write;
+  mayWrite_ = *action != Action::Read;
+  if (status == OpenStatus::Old || status == OpenStatus::Unknown) {
+    knownSize_.reset();
+  } else {
+    knownSize_ = 0;
+  }
 }
 
 void OpenFile::Predefine(int fd) {
@@ -124,6 +147,9 @@ void OpenFile::Predefine(int fd) {
   knownSize_.reset();
   nextId_ = 0;
   pending_.reset();
+  mayRead_ = fd == 0;
+  mayWrite_ = fd != 0;
+  mayPosition_ = false;
 }
 
 void OpenFile::Close(CloseStatus status, IoErrorHandler &handler) {
diff --git a/flang/runtime/file.h b/flang/runtime/file.h
index 17a5e910ecae8..1d25a91558a4c 100644
--- a/flang/runtime/file.h
+++ b/flang/runtime/file.h
@@ -21,6 +21,7 @@ namespace Fortran::runtime::io {
 enum class OpenStatus { Old, New, Scratch, Replace, Unknown };
 enum class CloseStatus { Keep, Delete };
 enum class Position { AsIs, Rewind, Append };
+enum class Action { Read, Write, ReadWrite };
 
 class OpenFile {
 public:
@@ -30,19 +31,16 @@ class OpenFile {
   void set_path(OwningPtr &&, std::size_t bytes);
   std::size_t pathLength() const { return pathLength_; }
   bool mayRead() const { return mayRead_; }
-  void set_mayRead(bool yes) { mayRead_ = yes; }
   bool mayWrite() const { return mayWrite_; }
-  void set_mayWrite(bool yes) { mayWrite_ = yes; }
+  bool mayPosition() const { return mayPosition_; }
   bool mayAsynchronous() const { return mayAsynchronous_; }
   void set_mayAsynchronous(bool yes) { mayAsynchronous_ = yes; }
-  bool mayPosition() const { return mayPosition_; }
-  void set_mayPosition(bool yes) { mayPosition_ = yes; }
   FileOffset position() const { return position_; }
   bool isTerminal() const { return isTerminal_; }
   std::optional knownSize() const { return knownSize_; }
 
   bool IsOpen() const { return fd_ >= 0; }
-  void Open(OpenStatus, Position, IoErrorHandler &);
+  void Open(OpenStatus, std::optional, Position, IoErrorHandler &);
   void Predefine(int fd);
   void Close(CloseStatus, IoErrorHandler &);
 
diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp
index 4710a590ccab9..2f077e1f9ff8f 100644
--- a/flang/runtime/io-api.cpp
+++ b/flang/runtime/io-api.cpp
@@ -73,6 +73,32 @@ Cookie IONAME(BeginInternalArrayFormattedInput)(const Descriptor &descriptor,
       formatLength, scratchArea, scratchBytes, sourceFile, sourceLine);
 }
 
+template 
+Cookie BeginInternalListIO(
+    std::conditional_t *internal,
+    std::size_t internalLength, void ** /*scratchArea*/,
+    std::size_t /*scratchBytes*/, const char *sourceFile, int sourceLine) {
+  Terminator oom{sourceFile, sourceLine};
+  return &New>{oom}(
+      internal, internalLength, sourceFile, sourceLine)
+              .release()
+              ->ioStatementState();
+}
+
+Cookie IONAME(BeginInternalListOutput)(char *internal,
+    std::size_t internalLength, void **scratchArea, std::size_t scratchBytes,
+    const char *sourceFile, int sourceLine) {
+  return BeginInternalListIO(internal, internalLength,
+      scratchArea, scratchBytes, sourceFile, sourceLine);
+}
+
+Cookie IONAME(BeginInternalListInput)(const char *internal,
+    std::size_t internalLength, void **scratchArea, std::size_t scratchBytes,
+    const char *sourceFile, int sourceLine) {
+  return BeginInternalListIO(internal, internalLength,
+      scratchArea, scratchBytes, sourceFile, sourceLine);
+}
+
 template 
 Cookie BeginInternalFormattedIO(
     std::conditional_t *internal,
@@ -90,7 +116,6 @@ Cookie IONAME(BeginInternalFormattedOutput)(char *internal,
     std::size_t internalLength, const char *format, std::size_t formatLength,
     void **scratchArea, std::size_t scratchBytes, const char *sourceFile,
     int sourceLine) {
-  Terminator oom{sourceFile, sourceLine};
   return BeginInternalFormattedIO(internal, internalLength,
       format, formatLength, scratchArea, scratchBytes, sourceFile, sourceLine);
 }
@@ -99,7 +124,6 @@ Cookie IONAME(BeginInternalFormattedInput)(const char *internal,
     std::size_t internalLength, const char *format, std::size_t formatLength,
     void **scratchArea, std::size_t scratchBytes, const char *sourceFile,
     int sourceLine) {
-  Terminator oom{sourceFile, sourceLine};
   return BeginInternalFormattedIO(internal, internalLength,
       format, formatLength, scratchArea, scratchBytes, sourceFile, sourceLine);
 }
@@ -111,8 +135,8 @@ Cookie BeginExternalListIO(
   if (unitNumber == DefaultUnit) {
     unitNumber = DIR == Direction::Input ? 5 : 6;
   }
-  ExternalFileUnit &unit{
-      ExternalFileUnit::LookUpOrCrash(unitNumber, terminator)};
+  ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous(
+      unitNumber, DIR, false /*formatted*/, terminator)};
   if (unit.access == Access::Direct) {
     terminator.Crash("List-directed I/O attempted on direct access file");
     return nullptr;
@@ -150,8 +174,8 @@ Cookie BeginExternalFormattedIO(const char *format, std::size_t formatLength,
   if (unitNumber == DefaultUnit) {
     unitNumber = DIR == Direction::Input ? 5 : 6;
   }
-  ExternalFileUnit &unit{
-      ExternalFileUnit::LookUpOrCrash(unitNumber, terminator)};
+  ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous(
+      unitNumber, DIR, false /*formatted*/, terminator)};
   if (unit.isUnformatted) {
     terminator.Crash("Formatted I/O attempted on unformatted file");
     return nullptr;
@@ -185,8 +209,8 @@ template 
 Cookie BeginUnformattedIO(
     ExternalUnit unitNumber, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
-  ExternalFileUnit &unit{
-      ExternalFileUnit::LookUpOrCrash(unitNumber, terminator)};
+  ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous(
+      unitNumber, DIR, true /*unformatted*/, terminator)};
   if (!unit.isUnformatted) {
     terminator.Crash("Unformatted output attempted on formatted file");
   }
@@ -223,7 +247,7 @@ Cookie IONAME(BeginOpenUnit)( // OPEN(without NEWUNIT=)
   bool wasExtant{false};
   Terminator terminator{sourceFile, sourceLine};
   ExternalFileUnit &unit{
-      ExternalFileUnit::LookUpOrCreate(unitNumber, terminator, &wasExtant)};
+      ExternalFileUnit::LookUpOrCreate(unitNumber, terminator, wasExtant)};
   return &unit.BeginIoStatement(
       unit, wasExtant, sourceFile, sourceLine);
 }
@@ -231,10 +255,11 @@ Cookie IONAME(BeginOpenUnit)( // OPEN(without NEWUNIT=)
 Cookie IONAME(BeginOpenNewUnit)( // OPEN(NEWUNIT=j)
     const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
+  bool ignored{false};
   ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreate(
-      ExternalFileUnit::NewUnit(terminator), terminator)};
+      ExternalFileUnit::NewUnit(terminator), terminator, ignored)};
   return &unit.BeginIoStatement(
-      unit, false /*wasExtant*/, sourceFile, sourceLine);
+      unit, false /*was an existing file*/, sourceFile, sourceLine);
 }
 
 Cookie IONAME(BeginClose)(
@@ -538,31 +563,31 @@ bool IONAME(SetAction)(Cookie cookie, const char *keyword, std::size_t length) {
     io.GetIoErrorHandler().Crash(
         "SetAction() called when not in an OPEN statement");
   }
-  bool mayRead{true};
-  bool mayWrite{true};
+  std::optional action;
   static const char *keywords[]{"READ", "WRITE", "READWRITE", nullptr};
   switch (IdentifyValue(keyword, length, keywords)) {
   case 0:
-    mayWrite = false;
+    action = Action::Read;
     break;
   case 1:
-    mayRead = false;
+    action = Action::Write;
     break;
   case 2:
+    action = Action::ReadWrite;
     break;
   default:
     open->SignalError(IostatErrorInKeyword, "Invalid ACTION='%.*s'",
         static_cast(length), keyword);
     return false;
   }
-  if (mayRead != open->unit().mayRead() ||
-      mayWrite != open->unit().mayWrite()) {
-    if (open->wasExtant()) {
+  RUNTIME_CHECK(io.GetIoErrorHandler(), action.has_value());
+  if (open->wasExtant()) {
+    if ((*action != Action::Write) != open->unit().mayRead() ||
+        (*action != Action::Read) != open->unit().mayWrite()) {
       open->SignalError("ACTION= may not be changed on an open unit");
     }
-    open->unit().set_mayRead(mayRead);
-    open->unit().set_mayWrite(mayWrite);
   }
+  open->set_action(*action);
   return true;
 }
 
diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp
index a4d8af4f7211d..9e89e0c28816d 100644
--- a/flang/runtime/io-stmt.cpp
+++ b/flang/runtime/io-stmt.cpp
@@ -162,10 +162,12 @@ void OpenStatementState::set_path(
 }
 
 int OpenStatementState::EndIoStatement() {
-  if (wasExtant_ && status_ != OpenStatus::Old) {
-    SignalError("OPEN statement for connected unit must have STATUS='OLD'");
+  if (wasExtant_ && status_ && *status_ != OpenStatus::Old) {
+    SignalError("OPEN statement for connected unit may not have STATUS= other "
+                "than 'OLD'");
   }
-  unit().OpenUnit(status_, position_, std::move(path_), pathLength_, *this);
+  unit().OpenUnit(status_.value_or(OpenStatus::Unknown), action_, position_,
+      std::move(path_), pathLength_, *this);
   return ExternalIoStatementBase::EndIoStatement();
 }
 
@@ -352,7 +354,7 @@ std::optional IoStatementState::SkipSpaces(
     std::optional &remaining) {
   while (!remaining || *remaining > 0) {
     if (auto ch{GetCurrentChar()}) {
-      if (*ch != ' ') {
+      if (*ch != ' ' && *ch != '\t') {
         return ch;
       }
       HandleRelativePosition(1);
@@ -372,6 +374,7 @@ std::optional IoStatementState::NextInField(
     if (auto next{GetCurrentChar()}) {
       switch (*next) {
       case ' ':
+      case '\t':
       case ',':
       case ';':
       case '/':
@@ -414,7 +417,7 @@ std::optional IoStatementState::NextInField(
 
 std::optional IoStatementState::GetNextNonBlank() {
   auto ch{GetCurrentChar()};
-  while (ch.value_or(' ') == ' ') {
+  while (!ch || *ch == ' ' || *ch == '\t') {
     if (ch) {
       HandleRelativePosition(1);
     } else if (!AdvanceRecord()) {
@@ -472,6 +475,10 @@ ListDirectedStatementState::GetNextDataEdit(
     edit.descriptor = DataEdit::ListDirectedNullValue;
     return edit;
   }
+  char32_t comma{','};
+  if (io.mutableModes().editingFlags & decimalComma) {
+    comma = ';';
+  }
   if (remaining_ > 0 && !realPart_) { // "r*c" repetition in progress
     while (connection.currentRecordNumber > initialRecordNumber_) {
       io.BackspaceRecord();
@@ -479,6 +486,11 @@ ListDirectedStatementState::GetNextDataEdit(
     connection.HandleAbsolutePosition(initialPositionInRecord_);
     if (!imaginaryPart_) {
       edit.repeat = std::min(remaining_, maxRepeat);
+      auto ch{io.GetNextNonBlank()};
+      if (!ch || *ch == ' ' || *ch == '\t' || *ch == comma) {
+        // "r*" repeated null
+        edit.descriptor = DataEdit::ListDirectedNullValue;
+      }
     }
     remaining_ -= edit.repeat;
     return edit;
@@ -503,10 +515,6 @@ ListDirectedStatementState::GetNextDataEdit(
     edit.descriptor = DataEdit::ListDirectedNullValue;
     return edit;
   }
-  char32_t comma{','};
-  if (io.mutableModes().editingFlags & decimalComma) {
-    comma = ';';
-  }
   bool isFirstItem{isFirstItem_};
   isFirstItem_ = false;
   if (*ch == comma) {
@@ -544,10 +552,14 @@ ListDirectedStatementState::GetNextDataEdit(
     if (r > 0 && ch && *ch == '*') { // subtle: r must be nonzero
       io.HandleRelativePosition(1);
       ch = io.GetCurrentChar();
-      if (!ch || *ch == ' ' || *ch == comma || *ch == '/') { // "r*" null
+      if (ch && *ch == '/') { // r*/
+        hitSlash_ = true;
         edit.descriptor = DataEdit::ListDirectedNullValue;
         return edit;
       }
+      if (!ch || *ch == ' ' || *ch == '\t' || *ch == comma) { // "r*" null
+        edit.descriptor = DataEdit::ListDirectedNullValue;
+      }
       edit.repeat = std::min(r, maxRepeat);
       remaining_ = r - edit.repeat;
       initialRecordNumber_ = connection.currentRecordNumber;
diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h
index 066391bd15664..da58769ef1143 100644
--- a/flang/runtime/io-stmt.h
+++ b/flang/runtime/io-stmt.h
@@ -294,15 +294,17 @@ class OpenStatementState : public ExternalIoStatementBase {
       : ExternalIoStatementBase{unit, sourceFile, sourceLine}, wasExtant_{
                                                                    wasExtant} {}
   bool wasExtant() const { return wasExtant_; }
-  void set_status(OpenStatus status) { status_ = status; }
+  void set_status(OpenStatus status) { status_ = status; } // STATUS=
   void set_path(const char *, std::size_t, int kind); // FILE=
   void set_position(Position position) { position_ = position; } // POSITION=
+  void set_action(Action action) { action_ = action; } // ACTION=
   int EndIoStatement();
 
 private:
   bool wasExtant_;
-  OpenStatus status_{OpenStatus::Unknown};
+  std::optional status_;
   Position position_{Position::AsIs};
+  std::optional action_;
   OwningPtr path_;
   std::size_t pathLength_;
 };
diff --git a/flang/runtime/stop.cpp b/flang/runtime/stop.cpp
index 49592b75a2ade..3b8c1385293ad 100644
--- a/flang/runtime/stop.cpp
+++ b/flang/runtime/stop.cpp
@@ -40,6 +40,7 @@ static void DescribeIEEESignaledExceptions() {
     if (excepts & FE_UNDERFLOW) {
       std::fputs(" UNDERFLOW", stderr);
     }
+    std::fputc('\n', stderr);
   }
 }
 
@@ -52,10 +53,11 @@ static void CloseAllExternalUnits(const char *why) {
     int code, bool isErrorStop, bool quiet) {
   CloseAllExternalUnits("STOP statement");
   if (!quiet) {
+    std::fprintf(stderr, "Fortran %s", isErrorStop ? "ERROR STOP" : "STOP");
     if (code != EXIT_SUCCESS) {
-      std::fprintf(stderr, "Fortran %s: code %d\n",
-          isErrorStop ? "ERROR STOP" : "STOP", code);
+      std::fprintf(stderr, ": code %d\n", code);
     }
+    std::fputc('\n', stderr);
     DescribeIEEESignaledExceptions();
   }
   std::exit(code);
diff --git a/flang/runtime/unit-map.h b/flang/runtime/unit-map.h
index 9efb2698d2233..be244f5ae463a 100644
--- a/flang/runtime/unit-map.h
+++ b/flang/runtime/unit-map.h
@@ -27,16 +27,11 @@ class UnitMap {
   }
 
   ExternalFileUnit &LookUpOrCreate(
-      int n, const Terminator &terminator, bool *wasExtant) {
+      int n, const Terminator &terminator, bool &wasExtant) {
     CriticalSection critical{lock_};
     auto *p{Find(n)};
-    if (wasExtant) {
-      *wasExtant = p != nullptr;
-    }
-    if (p) {
-      return *p;
-    }
-    return Create(n, terminator);
+    wasExtant = p != nullptr;
+    return p ? *p : Create(n, terminator);
   }
 
   ExternalFileUnit &NewUnit(const Terminator &terminator) {
diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp
index 2193ee0f6aca7..c6af53e6ec223 100644
--- a/flang/runtime/unit.cpp
+++ b/flang/runtime/unit.cpp
@@ -10,6 +10,7 @@
 #include "io-error.h"
 #include "lock.h"
 #include "unit-map.h"
+#include 
 
 namespace Fortran::runtime::io {
 
@@ -46,10 +47,39 @@ ExternalFileUnit &ExternalFileUnit::LookUpOrCrash(
 }
 
 ExternalFileUnit &ExternalFileUnit::LookUpOrCreate(
-    int unit, const Terminator &terminator, bool *wasExtant) {
+    int unit, const Terminator &terminator, bool &wasExtant) {
   return GetUnitMap().LookUpOrCreate(unit, terminator, wasExtant);
 }
 
+ExternalFileUnit &ExternalFileUnit::LookUpOrCreateAnonymous(
+    int unit, Direction dir, bool isUnformatted, const Terminator &terminator) {
+  bool exists{false};
+  ExternalFileUnit &result{
+      GetUnitMap().LookUpOrCreate(unit, terminator, exists)};
+  if (!exists) {
+    // I/O to an unconnected unit reads/creates a local file, e.g. fort.7
+    std::size_t pathMaxLen{32};
+    auto path{SizedNew{terminator}(pathMaxLen)};
+    std::snprintf(path.get(), pathMaxLen, "fort.%d", unit);
+    IoErrorHandler handler{terminator};
+    result.OpenUnit(
+        dir == Direction::Input ? OpenStatus::Old : OpenStatus::Replace,
+        Action::ReadWrite, Position::Rewind, std::move(path),
+        std::strlen(path.get()), handler);
+    result.isUnformatted = isUnformatted;
+  }
+  return result;
+}
+
+ExternalFileUnit &ExternalFileUnit::CreateNew(
+    int unit, const Terminator &terminator) {
+  bool wasExtant{false};
+  ExternalFileUnit &result{
+      GetUnitMap().LookUpOrCreate(unit, terminator, wasExtant)};
+  RUNTIME_CHECK(terminator, !wasExtant);
+  return result;
+}
+
 ExternalFileUnit *ExternalFileUnit::LookUpForClose(int unit) {
   return GetUnitMap().LookUpForClose(unit);
 }
@@ -58,8 +88,8 @@ int ExternalFileUnit::NewUnit(const Terminator &terminator) {
   return GetUnitMap().NewUnit(terminator).unitNumber();
 }
 
-void ExternalFileUnit::OpenUnit(OpenStatus status, Position position,
-    OwningPtr &&newPath, std::size_t newPathLength,
+void ExternalFileUnit::OpenUnit(OpenStatus status, std::optional action,
+    Position position, OwningPtr &&newPath, std::size_t newPathLength,
     IoErrorHandler &handler) {
   if (IsOpen()) {
     if (status == OpenStatus::Old &&
@@ -76,7 +106,7 @@ void ExternalFileUnit::OpenUnit(OpenStatus status, Position position,
     Close(CloseStatus::Keep, handler);
   }
   set_path(std::move(newPath), newPathLength);
-  Open(status, position, handler);
+  Open(status, action, position, handler);
   auto totalBytes{knownSize()};
   if (access == Access::Direct) {
     if (!isFixedRecordLength || !recordLength) {
@@ -155,18 +185,12 @@ UnitMap &ExternalFileUnit::GetUnitMap() {
   Terminator terminator{__FILE__, __LINE__};
   IoErrorHandler handler{terminator};
   unitMap = New{terminator}().release();
-  ExternalFileUnit &out{ExternalFileUnit::LookUpOrCreate(6, terminator)};
+  ExternalFileUnit &out{ExternalFileUnit::CreateNew(6, terminator)};
   out.Predefine(1);
-  out.set_mayRead(false);
-  out.set_mayWrite(true);
-  out.set_mayPosition(false);
   out.SetDirection(Direction::Output, handler);
   defaultOutput = &out;
-  ExternalFileUnit &in{ExternalFileUnit::LookUpOrCreate(5, terminator)};
+  ExternalFileUnit &in{ExternalFileUnit::CreateNew(5, terminator)};
   in.Predefine(0);
-  in.set_mayRead(true);
-  in.set_mayWrite(false);
-  in.set_mayPosition(false);
   in.SetDirection(Direction::Input, handler);
   defaultInput = ∈
   // TODO: Set UTF-8 mode from the environment
diff --git a/flang/runtime/unit.h b/flang/runtime/unit.h
index c54625413b875..d2d2dce035f14 100644
--- a/flang/runtime/unit.h
+++ b/flang/runtime/unit.h
@@ -39,14 +39,17 @@ class ExternalFileUnit : public ConnectionState,
   static ExternalFileUnit *LookUp(int unit);
   static ExternalFileUnit &LookUpOrCrash(int unit, const Terminator &);
   static ExternalFileUnit &LookUpOrCreate(
-      int unit, const Terminator &, bool *wasExtant = nullptr);
+      int unit, const Terminator &, bool &wasExtant);
+  static ExternalFileUnit &LookUpOrCreateAnonymous(
+      int unit, Direction, bool isUnformatted, const Terminator &);
+  static ExternalFileUnit &CreateNew(int unit, const Terminator &);
   static ExternalFileUnit *LookUpForClose(int unit);
   static int NewUnit(const Terminator &);
   static void CloseAll(IoErrorHandler &);
   static void FlushAll(IoErrorHandler &);
 
-  void OpenUnit(OpenStatus, Position, OwningPtr &&path,
-      std::size_t pathLength, IoErrorHandler &);
+  void OpenUnit(OpenStatus, std::optional, Position,
+      OwningPtr &&path, std::size_t pathLength, IoErrorHandler &);
   void CloseUnit(CloseStatus, IoErrorHandler &);
   void DestroyClosed();
 
diff --git a/flang/test/Parser/pp-dir-comments.f90 b/flang/test/Parser/pp-dir-comments.f90
new file mode 100644
index 0000000000000..f5fe4ca5c71e8
--- /dev/null
+++ b/flang/test/Parser/pp-dir-comments.f90
@@ -0,0 +1,19 @@
+! RUN: %f18 -funparse %s 2>&1 | FileCheck %s
+
+#define pmk
+#ifdef pmk // comment
+! CHECK: t1
+real t1
+#endif // comment
+#undef pmk ! comment
+#ifndef pmk ! comment
+! CHECK: t2
+real t2
+#endif // comment
+#if 0 /* C comment */ + 0
+! CHECK-NOT: misinterpreted
+# error misinterpreted #if
+#else // comment
+! CHECK: END PROGRAM
+end
+#endif ! comment
diff --git a/flang/test/Semantics/acc-branch.f90 b/flang/test/Semantics/acc-branch.f90
new file mode 100644
index 0000000000000..b1c2a6b860e44
--- /dev/null
+++ b/flang/test/Semantics/acc-branch.f90
@@ -0,0 +1,101 @@
+! RUN: %S/test_errors.sh %s %t %f18 -fopenacc
+
+! Check OpenACC restruction in branch in and out of some construct
+!
+
+program openacc_clause_validity
+
+  implicit none
+
+  integer :: i
+  integer :: N = 256
+  real(8) :: a(256)
+
+  !$acc parallel
+  !$acc loop
+  do i = 1, N
+    a(i) = 3.14
+    !ERROR: RETURN statement is not allowed in a PARALLEL construct
+    return
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+  !$acc loop
+  do i = 1, N
+    a(i) = 3.14
+    if(i == N-1) THEN
+      !ERROR: EXIT statement is not allowed in a PARALLEL construct
+      exit
+    end if
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+  !$acc loop
+  do i = 1, N
+    a(i) = 3.14
+    if(i == N-1) THEN
+      !ERROR: STOP statement is not allowed in a PARALLEL construct
+      stop 999
+    end if
+  end do
+  !$acc end parallel
+
+  !$acc kernels
+  do i = 1, N
+    a(i) = 3.14
+    !ERROR: RETURN statement is not allowed in a KERNELS construct
+    return
+  end do
+  !$acc end kernels
+
+  !$acc kernels
+  do i = 1, N
+    a(i) = 3.14
+    if(i == N-1) THEN
+      !ERROR: EXIT statement is not allowed in a KERNELS construct
+      exit
+    end if
+  end do
+  !$acc end kernels
+
+  !$acc kernels
+  do i = 1, N
+    a(i) = 3.14
+    if(i == N-1) THEN
+      !ERROR: STOP statement is not allowed in a KERNELS construct
+      stop 999
+    end if
+  end do
+  !$acc end kernels
+
+  !$acc serial
+  do i = 1, N
+    a(i) = 3.14
+    !ERROR: RETURN statement is not allowed in a SERIAL construct
+    return
+  end do
+  !$acc end serial
+
+  !$acc serial
+  do i = 1, N
+    a(i) = 3.14
+    if(i == N-1) THEN
+      !ERROR: EXIT statement is not allowed in a SERIAL construct
+      exit
+    end if
+  end do
+  !$acc end serial
+
+  !$acc serial
+  do i = 1, N
+    a(i) = 3.14
+    if(i == N-1) THEN
+      !ERROR: STOP statement is not allowed in a SERIAL construct
+      stop 999
+    end if
+  end do
+  !$acc end serial
+
+end program openacc_clause_validity
diff --git a/flang/test/Semantics/acc-clause-validity.f90 b/flang/test/Semantics/acc-clause-validity.f90
new file mode 100644
index 0000000000000..a8aefad384b12
--- /dev/null
+++ b/flang/test/Semantics/acc-clause-validity.f90
@@ -0,0 +1,179 @@
+! RUN: %S/test_errors.sh %s %t %f18 -fopenacc
+
+! Check OpenACC clause validity for the following construct and directive:
+!   2.6.5 Data
+!   2.5.1 Parallel
+!   2.5.2 Kernels
+!   2.5.3 Serial
+!   2.15.1 Routine
+!   2.11 Parallel Loop
+!   2.11 Kernels Loop
+!   2.11 Serial Loop
+
+program openacc_clause_validity
+
+  implicit none
+
+  integer :: i, j
+  integer :: N = 256
+  !ERROR: At least one clause is required on the DECLARE directive
+  !$acc declare
+  real(8) :: a(256)
+
+  !ERROR: At least one of ATTACH, COPYIN, CREATE clause must appear on the ENTER DATA directive
+  !$acc enter data
+
+  !ERROR: Only the READONLY modifier is allowed for the COPYIN clause on the ENTER DATA directive
+  !$acc enter data copyin(zero: i)
+
+  !ERROR: Only the ZERO modifier is allowed for the CREATE clause on the ENTER DATA directive
+  !$acc enter data create(readonly: i)
+
+  !ERROR: Only the ZERO modifier is allowed for the COPYOUT clause on the DATA directive
+  !$acc data copyout(readonly: i)
+  !$acc end data
+
+  !ERROR: COPYOUT clause is not allowed on the ENTER DATA directive
+  !$acc enter data copyin(i) copyout(i)
+
+  !ERROR: At most one IF clause can appear on the DATA directive
+  !$acc data copy(i) if(.true.) if(.true.)
+  !$acc end data
+
+  !ERROR: At least one of COPYOUT, DELETE, DETACH clause must appear on the EXIT DATA directive
+  !$acc exit data
+
+  !ERROR: At least one of USE_DEVICE clause must appear on the HOST_DATA directive
+  !$acc host_data
+  !$acc end host_data
+
+  !ERROR: At least one of DEFAULT_ASYNC, DEVICE_NUM, DEVICE_TYPE clause must appear on the SET directive
+  !$acc set
+
+  !ERROR: At least one of ATTACH, COPY, COPYIN, COPYOUT, CREATE, DEFAULT, DEVICEPTR, NO_CREATE, PRESENT clause must appear on the DATA directive
+  !$acc data
+  !$acc end data
+
+  !$acc data copyin(i)
+  !$acc end data
+
+  !$acc data copyin(i)
+  !ERROR: Unmatched PARALLEL directive
+  !$acc end parallel
+
+  !$acc update device(i) device_type(*) async
+
+  !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the UPDATE directive
+  !$acc update device(i) device_type(*) if(.TRUE.)
+
+  !$acc parallel
+  !ERROR: INDEPENDENT and SEQ clauses are mutually exclusive and may not appear on the same LOOP directive
+  !$acc loop seq independent
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel
+
+  !$acc parallel device_type(*) num_gangs(2)
+  !$acc loop
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+  !ERROR: The parameter of the COLLAPSE clause on the LOOP directive must be a constant positive integer expression
+  !$acc loop collapse(-1)
+  do i = 1, N
+    do j = 1, N
+      a(i) = 3.14 + j
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+  !ERROR: Clause PRIVATE is not allowed after clause DEVICE_TYPE on the LOOP directive
+  !$acc loop device_type(*) private(i)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+  !ERROR: Clause GANG is not allowed if clause SEQ appears on the LOOP directive
+  !$acc loop gang seq
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel
+
+  !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the PARALLEL directive
+  !$acc parallel device_type(*) if(.TRUE.)
+  !$acc loop
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel
+
+  !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the PARALLEL LOOP directive
+  !$acc parallel loop device_type(*) if(.TRUE.)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel loop
+
+  !$acc kernels device_type(*) async
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end kernels
+
+  !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the KERNELS directive
+  !$acc kernels device_type(*) if(.TRUE.)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end kernels
+
+  !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the KERNELS LOOP directive
+  !$acc kernels loop device_type(*) if(.TRUE.)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end kernels loop
+
+  !$acc serial device_type(*) async
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end serial
+
+  !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the SERIAL directive
+  !$acc serial device_type(*) if(.TRUE.)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end serial
+
+  !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the SERIAL LOOP directive
+  !$acc serial loop device_type(*) if(.TRUE.)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end serial loop
+
+ contains
+
+   subroutine sub1(a)
+     real :: a(:)
+     !ERROR: At least one of GANG, SEQ, VECTOR, WORKER clause must appear on the ROUTINE directive
+     !$acc routine
+   end subroutine sub1
+
+   subroutine sub2(a)
+     real :: a(:)
+     !ERROR: Clause NOHOST is not allowed after clause DEVICE_TYPE on the ROUTINE directive
+     !$acc routine seq device_type(*) nohost
+   end subroutine sub2
+
+end program openacc_clause_validity
diff --git a/flang/test/Semantics/modfile03.f90 b/flang/test/Semantics/modfile03.f90
index 1c68d0d72d920..9bdb35695f036 100644
--- a/flang/test/Semantics/modfile03.f90
+++ b/flang/test/Semantics/modfile03.f90
@@ -68,7 +68,6 @@ pure integer function f1(i)
 
 module m5b
   use m5a, only: k2 => k1, l2 => l1, f2 => f1
-  character(l2, k2) :: x
   interface
     subroutine s(x, y)
       import f2, l2
@@ -82,7 +81,6 @@ subroutine s(x, y)
 ! use m5a,only:k2=>k1
 ! use m5a,only:l2=>l1
 ! use m5a,only:f2=>f1
-! character(l2,4)::x
 ! interface
 !  subroutine s(x,y)
 !   import::f2
diff --git a/flang/test/Semantics/modfile30.f90 b/flang/test/Semantics/modfile30.f90
index 01c60d5a39900..dba950c2737aa 100644
--- a/flang/test/Semantics/modfile30.f90
+++ b/flang/test/Semantics/modfile30.f90
@@ -42,7 +42,6 @@ module m2
 ! type(t),parameter::a=t()
 !end
 
-! Don't write out intrinsics
 module m3a
   integer, parameter :: i4 = selected_int_kind(9)
 end
@@ -60,7 +59,6 @@ module m3b
 !Expect: m3b.mod
 !module m3b
 ! use m3a,only:i4
-! use m3a,only:selected_int_kind
 ! integer(4)::j
 !end
 
@@ -82,7 +80,6 @@ module m4b
 !Expect: m4b.mod
 !module m4b
 ! use m4a,only:a
-! use m4a,only:achar
 ! character(1_4,1),parameter::b="\001"
 !end
 
diff --git a/flang/test/Semantics/omp-clause-validity01.f90 b/flang/test/Semantics/omp-clause-validity01.f90
index e3f43dc5445e6..75050bdc06b5f 100644
--- a/flang/test/Semantics/omp-clause-validity01.f90
+++ b/flang/test/Semantics/omp-clause-validity01.f90
@@ -396,6 +396,9 @@
   !$omp taskyield
   !$omp barrier
   !$omp taskwait
+  !$omp taskwait depend(source)
+  !ERROR: Internal: no symbol found for 'i'
+  !$omp taskwait depend(sink:i-1)
   ! !$omp target enter data map(to:arrayA) map(alloc:arrayB)
   ! !$omp target update from(arrayA) to(arrayB)
   ! !$omp target exit data map(from:arrayA) map(delete:arrayB)
@@ -458,7 +461,6 @@
   enddo
   !$omp end taskloop simd
 
-  !ERROR: REDUCTION clause is not allowed on the TASKLOOP SIMD directive
   !$omp taskloop simd reduction(+:a)
   do i = 1, N
      a = a + 3.14
diff --git a/flang/test/Semantics/resolve14.f90 b/flang/test/Semantics/resolve14.f90
index 826e0da1c758a..44ece0b186440 100644
--- a/flang/test/Semantics/resolve14.f90
+++ b/flang/test/Semantics/resolve14.f90
@@ -3,20 +3,30 @@ module m1
   integer :: x
   integer :: y
   integer :: z
+  integer, parameter :: k1 = selected_int_kind(9)
 end
 module m2
   real :: y
   real :: z
   real :: w
+  integer, parameter :: k2 = selected_int_kind(9)
 end
 
-use m1, xx => x, y => z
-use m2
-volatile w
-!ERROR: Cannot change CONTIGUOUS attribute on use-associated 'w'
-contiguous w
-!ERROR: 'z' is use-associated from module 'm2' and cannot be re-declared
-integer z
-!ERROR: Reference to 'y' is ambiguous
-y = 1
+program p1
+  use m1
+  use m2
+  ! check that selected_int_kind is not use-associated
+  integer, parameter :: k = selected_int_kind(9)
+end
+
+program p2
+  use m1, xx => x, y => z
+  use m2
+  volatile w
+  !ERROR: Cannot change CONTIGUOUS attribute on use-associated 'w'
+  contiguous w
+  !ERROR: 'z' is use-associated from module 'm2' and cannot be re-declared
+  integer z
+  !ERROR: Reference to 'y' is ambiguous
+  y = 1
 end
diff --git a/flang/test/Semantics/resolve45.f90 b/flang/test/Semantics/resolve45.f90
index 3e98ff662a171..c2a96915836e7 100644
--- a/flang/test/Semantics/resolve45.f90
+++ b/flang/test/Semantics/resolve45.f90
@@ -68,3 +68,14 @@ subroutine s7
   !ERROR: 'x' appears as a COMMON block in a SAVE statement but not in a COMMON statement
   save /x/
 end
+
+subroutine s8a(n)
+  integer :: n
+  real :: x(n)  ! OK: save statement doesn't affect x
+  save
+end
+subroutine s8b(n)
+  integer :: n
+  !ERROR: SAVE attribute may not be applied to automatic data object 'x'
+  real, save :: x(n)
+end
diff --git a/flang/test/Semantics/resolve70.f90 b/flang/test/Semantics/resolve70.f90
index 564805a12408d..d86016e76f9d3 100644
--- a/flang/test/Semantics/resolve70.f90
+++ b/flang/test/Semantics/resolve70.f90
@@ -57,3 +57,19 @@ subroutine s1()
   !ERROR: Non-extensible derived type 'inextensible' may not be used with CLASS keyword
   class(inextensible), allocatable :: x
 end subroutine s1
+
+subroutine s2()
+  type t
+    integer i
+  end type t
+  type, extends(t) :: t2
+    real x
+  end type t2
+contains
+  function f1(dummy)
+    class(*) dummy
+    type(t) f1(1)
+    !ERROR: Cannot have an unlimited polymorphic value in an array constructor
+    f1 = [ (dummy) ]
+  end function f1
+end subroutine s2
diff --git a/flang/test/Semantics/resolve77.f90 b/flang/test/Semantics/resolve77.f90
index 8b85db96be3d7..e21de909cf3be 100644
--- a/flang/test/Semantics/resolve77.f90
+++ b/flang/test/Semantics/resolve77.f90
@@ -8,7 +8,7 @@ module m
   interface ifn3
     module procedure if3
   end interface
-  !ERROR: Specification expression must be constant in declaration of 'a' with the SAVE attribute
+  !ERROR: Automatic data object 'a' may not appear in the specification part of a module
   real :: a(if1(1))
   !ERROR: No specific procedure of generic 'ifn2' matches the actual arguments
   real :: b(ifn2(1))
diff --git a/flang/tools/f18-parse-demo/CMakeLists.txt b/flang/tools/f18-parse-demo/CMakeLists.txt
index 465873ca00ff6..a89e8ae8816cd 100644
--- a/flang/tools/f18-parse-demo/CMakeLists.txt
+++ b/flang/tools/f18-parse-demo/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS
+  FrontendOpenACC
   FrontendOpenMP
   )
 
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 8738561fe45e7..46c38fa43a2e5 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS
+  FrontendOpenACC
   FrontendOpenMP
   Support
   )
@@ -59,7 +60,7 @@ install(TARGETS f18 DESTINATION bin)
 
 set(FLANG_INTRINSIC_MODULES_DIR ${FLANG_BINARY_DIR}/include/flang)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${CMAKE_BINARY_DIR}/tools/flang/bin/flang @ONLY)
-file(COPY ${CMAKE_BINARY_DIR}/tools/flang/bin/flang DESTINATION ${CMAKE_BINARY_DIR}/bin FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE) 
+file(COPY ${CMAKE_BINARY_DIR}/tools/flang/bin/flang DESTINATION ${CMAKE_BINARY_DIR}/bin FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE)
 # The flang script to be installed needs a different path to the headers.
 set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_INSTALL_PREFIX}/include/flang)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${FLANG_BINARY_DIR}/bin/flang-install.sh @ONLY)
diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp
index 26682eaa64897..574a37074e523 100644
--- a/flang/tools/f18/f18.cpp
+++ b/flang/tools/f18/f18.cpp
@@ -101,8 +101,8 @@ struct DriverOptions {
   bool debugNoSemantics{false};
   bool debugModuleWriter{false};
   bool measureTree{false};
-  bool unparseTypedExprsToPGF90{false};
-  std::vector pgf90Args;
+  bool unparseTypedExprsToF18_FC{false};
+  std::vector F18_FCArgs;
   const char *prefix{nullptr};
   bool getDefinition{false};
   GetDefinitionArgs getDefinitionArgs{0, 0, 0};
@@ -137,8 +137,8 @@ void Exec(std::vector &argv, bool verbose = false) {
 
 void RunOtherCompiler(DriverOptions &driver, char *source, char *relo) {
   std::vector argv;
-  for (size_t j{0}; j < driver.pgf90Args.size(); ++j) {
-    argv.push_back(driver.pgf90Args[j].data());
+  for (size_t j{0}; j < driver.F18_FCArgs.size(); ++j) {
+    argv.push_back(driver.F18_FCArgs[j].data());
   }
   char dashC[3] = "-c", dashO[3] = "-o";
   argv.push_back(dashC);
@@ -342,7 +342,7 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options,
         options.features.IsEnabled(
             Fortran::common::LanguageFeature::BackslashEscapes),
         nullptr /* action before each statement */,
-        driver.unparseTypedExprsToPGF90 ? &asFortran : nullptr);
+        driver.unparseTypedExprsToF18_FC ? &asFortran : nullptr);
   }
 
   if (ParentProcess()) {
@@ -371,8 +371,8 @@ std::string CompileOtherLanguage(std::string path, DriverOptions &driver) {
 void Link(std::vector &relocatables, DriverOptions &driver) {
   if (!ParentProcess()) {
     std::vector argv;
-    for (size_t j{0}; j < driver.pgf90Args.size(); ++j) {
-      argv.push_back(driver.pgf90Args[j].data());
+    for (size_t j{0}; j < driver.F18_FCArgs.size(); ++j) {
+      argv.push_back(driver.F18_FCArgs[j].data());
     }
     for (auto &relo : relocatables) {
       argv.push_back(relo.data());
@@ -391,9 +391,9 @@ int main(int argc, char *const argv[]) {
   atexit(CleanUpAtExit);
 
   DriverOptions driver;
-  const char *pgf90{getenv("F18_FC")};
-  driver.pgf90Args.push_back(pgf90 ? pgf90 : "pgf90");
-  bool isPGF90{driver.pgf90Args.back().rfind("pgf90") != std::string::npos};
+  const char *F18_FC{getenv("F18_FC")};
+  driver.F18_FCArgs.push_back(F18_FC ? F18_FC : "gfortran");
+  bool isPGF90{driver.F18_FCArgs.back().rfind("pgf90") != std::string::npos};
 
   std::list args{argList(argc, argv)};
   std::string prefix{args.front()};
@@ -423,7 +423,7 @@ int main(int argc, char *const argv[]) {
       anyFiles = true;
       auto dot{arg.rfind(".")};
       if (dot == std::string::npos) {
-        driver.pgf90Args.push_back(arg);
+        driver.F18_FCArgs.push_back(arg);
       } else {
         std::string suffix{arg.substr(dot + 1)};
         if (suffix == "f" || suffix == "F" || suffix == "ff" ||
@@ -446,15 +446,17 @@ int main(int argc, char *const argv[]) {
         args.pop_front();
       }
       break;
-    } else if (arg == "-Mfixed") {
+    } else if (arg == "-Mfixed" || arg == "-ffixed-form") {
       driver.forcedForm = true;
       options.isFixedForm = true;
-    } else if (arg == "-Mfree") {
+    } else if (arg == "-Mfree" || arg == "-ffree-form") {
       driver.forcedForm = true;
       options.isFixedForm = false;
-    } else if (arg == "-Mextend") {
+    } else if (arg == "-Mextend" || arg == "-ffixed-line-length-132") {
       options.fixedFormColumns = 132;
-    } else if (arg == "-Munlimited") {
+    } else if (arg == "-Munlimited" || arg == "-ffree-line-length-none" ||
+        arg == "-ffree-line-length-0" || arg == "-ffixed-line-length-none" ||
+        arg == "-ffixed-line-length-0") {
       // For reparsing f18's -E output of fixed-form cooked character stream
       options.fixedFormColumns = 1000000;
     } else if (arg == "-Mbackslash") {
@@ -463,8 +465,12 @@ int main(int argc, char *const argv[]) {
     } else if (arg == "-Mnobackslash") {
       options.features.Enable(
           Fortran::common::LanguageFeature::BackslashEscapes, true);
-    } else if (arg == "-Mstandard") {
+    } else if (arg == "-Mstandard" || arg == "-std=f95" ||
+        arg == "-std=f2003" || arg == "-std=f2008" || arg == "-std=legacy") {
       driver.warnOnNonstandardUsage = true;
+    } else if (arg == "-fopenacc") {
+      options.features.Enable(Fortran::common::LanguageFeature::OpenACC);
+      options.predefinitions.emplace_back("_OPENACC", "201911");
     } else if (arg == "-fopenmp") {
       options.features.Enable(Fortran::common::LanguageFeature::OpenMP);
       options.predefinitions.emplace_back("_OPENMP", "201511");
@@ -510,8 +516,8 @@ int main(int argc, char *const argv[]) {
       driver.dumpUnparse = true;
     } else if (arg == "-funparse-with-symbols") {
       driver.dumpUnparseWithSymbols = true;
-    } else if (arg == "-funparse-typed-exprs-to-pgf90") {
-      driver.unparseTypedExprsToPGF90 = true;
+    } else if (arg == "-funparse-typed-exprs-to-f18-fc") {
+      driver.unparseTypedExprsToF18_FC = true;
     } else if (arg == "-fparse-only") {
       driver.parseOnly = true;
     } else if (arg == "-c") {
@@ -530,6 +536,8 @@ int main(int argc, char *const argv[]) {
     } else if (arg.substr(0, 2) == "-U") {
       options.predefinitions.emplace_back(
           arg.substr(2), std::optional{});
+    } else if (arg == "-fdefault-double-8") {
+      defaultKinds.set_defaultRealKind(4);
     } else if (arg == "-r8" || arg == "-fdefault-real-8") {
       defaultKinds.set_defaultRealKind(8);
     } else if (arg == "-i8" || arg == "-fdefault-integer-8") {
@@ -580,15 +588,17 @@ int main(int argc, char *const argv[]) {
     } else if (arg == "-help" || arg == "--help" || arg == "-?") {
       llvm::errs()
           << "f18 options:\n"
-          << "  -Mfixed | -Mfree     force the source form\n"
-          << "  -Mextend             132-column fixed form\n"
+          << "  -Mfixed | -Mfree | -ffixed-form | -ffree-form   force the "
+             "source form\n"
+          << "  -Mextend | -ffixed-line-length-132   132-column fixed form\n"
           << "  -f[no-]backslash     enable[disable] \\escapes in literals\n"
           << "  -M[no]backslash      disable[enable] \\escapes in literals\n"
           << "  -Mstandard           enable conformance warnings\n"
+          << "  -std=      enable conformance warnings\n"
           << "  -fenable=   enable a language feature\n"
           << "  -fdisable=  disable a language feature\n"
-          << "  -r8 | -fdefault-real-8 | -i8 | -fdefault-integer-8  "
-             "change default kinds of intrinsic types\n"
+          << "  -r8 | -fdefault-real-8 | -i8 | -fdefault-integer-8 | "
+             "-fdefault-double-8   change default kinds of intrinsic types\n"
           << "  -Werror              treat warnings as errors\n"
           << "  -ed                  enable fixed form D lines\n"
           << "  -E                   prescan & preprocess only\n"
@@ -616,11 +626,11 @@ int main(int argc, char *const argv[]) {
       llvm::errs() << "\nf18 compiler (under development)\n";
       return exitStatus;
     } else {
-      driver.pgf90Args.push_back(arg);
+      driver.F18_FCArgs.push_back(arg);
       if (arg == "-v") {
         driver.verbose = true;
       } else if (arg == "-I") {
-        driver.pgf90Args.push_back(args.front());
+        driver.F18_FCArgs.push_back(args.front());
         driver.searchDirectories.push_back(args.front());
         args.pop_front();
       } else if (arg.substr(0, 2) == "-I") {
@@ -632,21 +642,26 @@ int main(int argc, char *const argv[]) {
   if (driver.warnOnNonstandardUsage) {
     options.features.WarnOnAllNonstandard();
   }
-  if (options.features.IsEnabled(Fortran::common::LanguageFeature::OpenMP)) {
-    driver.pgf90Args.push_back("-mp");
-  }
   if (isPGF90) {
     if (!options.features.IsEnabled(
             Fortran::common::LanguageFeature::BackslashEscapes)) {
-      driver.pgf90Args.push_back(
+      driver.F18_FCArgs.push_back(
           "-Mbackslash"); // yes, this *disables* them in pgf90
     }
+    if (options.features.IsEnabled(Fortran::common::LanguageFeature::OpenMP)) {
+      driver.F18_FCArgs.push_back("-mp");
+    }
+
     Fortran::parser::useHexadecimalEscapeSequences = false;
   } else {
     if (options.features.IsEnabled(
             Fortran::common::LanguageFeature::BackslashEscapes)) {
-      driver.pgf90Args.push_back("-fbackslash");
+      driver.F18_FCArgs.push_back("-fbackslash");
     }
+    if (options.features.IsEnabled(Fortran::common::LanguageFeature::OpenMP)) {
+      driver.F18_FCArgs.push_back("-fopenmp");
+    }
+
     Fortran::parser::useHexadecimalEscapeSequences = true;
   }
 
diff --git a/flang/unittests/CMakeLists.txt b/flang/unittests/CMakeLists.txt
index 440ab4e0358de..d53d155f2f2b5 100644
--- a/flang/unittests/CMakeLists.txt
+++ b/flang/unittests/CMakeLists.txt
@@ -9,3 +9,4 @@ add_subdirectory(Optimizer)
 add_subdirectory(Decimal)
 add_subdirectory(Evaluate)
 add_subdirectory(Runtime)
+add_subdirectory(Lower)
diff --git a/flang/unittests/Lower/CMakeLists.txt b/flang/unittests/Lower/CMakeLists.txt
new file mode 100644
index 0000000000000..19535e8f45348
--- /dev/null
+++ b/flang/unittests/Lower/CMakeLists.txt
@@ -0,0 +1,13 @@
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+
+set(LIBS
+  MLIRLLVMIR
+  ${dialect_libs}
+)
+
+add_flang_unittest(FlangLoweringOpenMPTests
+	OpenMPLoweringTest.cpp
+)
+target_link_libraries(FlangLoweringOpenMPTests
+  PRIVATE
+  ${LIBS})
diff --git a/flang/unittests/Lower/OpenMPLoweringTest.cpp b/flang/unittests/Lower/OpenMPLoweringTest.cpp
new file mode 100644
index 0000000000000..185d07105a192
--- /dev/null
+++ b/flang/unittests/Lower/OpenMPLoweringTest.cpp
@@ -0,0 +1,59 @@
+//===- OpenMPLoweringTest.cpp -- OpenMPLowering unit tests ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/Builders.h"
+#include "flang/Parser/parse-tree.h"
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
+
+class OpenMPLoweringTest : public testing::Test {
+protected:
+  void SetUp() override {
+    mlir::registerDialect();
+    mlir::registerAllDialects(&ctx);
+    mlirOpBuilder.reset(new mlir::OpBuilder(&ctx));
+  }
+
+  void TearDown() override { mlirOpBuilder.reset(); }
+
+  mlir::MLIRContext ctx;
+  std::unique_ptr mlirOpBuilder;
+};
+
+TEST_F(OpenMPLoweringTest, Barrier) {
+  // Construct a dummy parse tree node for `!OMP barrier`.
+  struct Fortran::parser::OmpSimpleStandaloneDirective barrierDirective(
+      llvm::omp::Directive::OMPD_barrier);
+
+  // Check and lower the `!OMP barrier` node to `BarrierOp` operation of
+  // OpenMPDialect.
+  EXPECT_EQ(barrierDirective.v, llvm::omp::Directive::OMPD_barrier);
+  auto barrierOp = mlirOpBuilder->create(
+      mlirOpBuilder->getUnknownLoc());
+
+  EXPECT_EQ(barrierOp.getOperationName(), "omp.barrier");
+  EXPECT_EQ(succeeded(barrierOp.verify()), true);
+}
+
+TEST_F(OpenMPLoweringTest, TaskWait) {
+  // Construct a dummy parse tree node for `!OMP taskwait`.
+  struct Fortran::parser::OmpSimpleStandaloneDirective taskWaitDirective(
+      llvm::omp::Directive::OMPD_taskwait);
+
+  // Check and lower the `!OMP taskwait` node to `TaskwaitOp` operation of
+  // OpenMPDialect.
+  EXPECT_EQ(taskWaitDirective.v, llvm::omp::Directive::OMPD_taskwait);
+  auto taskWaitOp = mlirOpBuilder->create(
+      mlirOpBuilder->getUnknownLoc());
+
+  EXPECT_EQ(taskWaitOp.getOperationName(), "omp.taskwait");
+  EXPECT_EQ(succeeded(taskWaitOp.verify()), true);
+}
+
+// main() from gtest_main
diff --git a/flang/unittests/Runtime/list-input.cpp b/flang/unittests/Runtime/list-input.cpp
index c7a660dc87aae..9ec77080203a2 100644
--- a/flang/unittests/Runtime/list-input.cpp
+++ b/flang/unittests/Runtime/list-input.cpp
@@ -15,7 +15,7 @@ int main() {
 
   char buffer[4][32];
   int j{0};
-  for (const char *p : {"1 2 2*3  ,", ",6,,8,123*",
+  for (const char *p : {"1 2 2*3  ,", ",6,,8,1*",
            "2*'abcdefghijklmnopqrstuvwxyzABC", "DEFGHIJKLMNOPQRSTUVWXYZ'"}) {
     SetCharacter(buffer[j++], sizeof buffer[0], p);
   }
diff --git a/libc/AOR_v20.02/math/tools/plot.py b/libc/AOR_v20.02/math/tools/plot.py
index 611c99a9e69f2..8c7da5a8ffd72 100755
--- a/libc/AOR_v20.02/math/tools/plot.py
+++ b/libc/AOR_v20.02/math/tools/plot.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 # ULP error plot tool.
 #
diff --git a/libc/benchmarks/render.py3 b/libc/benchmarks/render.py3
index e790d18f84e7f..f8c321ff17af6 100644
--- a/libc/benchmarks/render.py3
+++ b/libc/benchmarks/render.py3
@@ -112,7 +112,7 @@ def get_configuration(jsons):
     return config
 
 
-def setup_graphs(files):
+def setup_graphs(files, display):
     """Setups the graphs to render from the json files."""
     jsons = []
     for file in files:
@@ -122,6 +122,7 @@ def setup_graphs(files):
         sys.exit("Nothing to process")
 
     for root in jsons:
+        frequency = root["Host"]["CpuFrequency"]
         for function in root["Functions"]:
             function_name = function["Name"]
             sizes = function["Sizes"]
@@ -129,7 +130,13 @@ def setup_graphs(files):
             assert len(sizes) == len(runtimes)
             values = collections.defaultdict(lambda: [])
             for i in range(len(sizes)):
-              values[sizes[i]].append(runtimes[i])
+              value = runtimes[i]
+              if display == "cycles":
+                  value = value * frequency
+              if display == "bytespercycle":
+                  value = value * frequency
+                  value = sizes[i] / value
+              values[sizes[i]].append(value)
             add_plot(function_name, values)
 
     config = get_configuration(jsons)
@@ -148,9 +155,15 @@ def setup_graphs(files):
     axes.set_title(get_title(get_host(jsons)))
     axes.set_ylim(bottom=0)
     axes.set_xlabel("Size")
-    axes.set_ylabel("Time")
     axes.xaxis.set_major_formatter(EngFormatter(unit="B"))
-    axes.yaxis.set_major_formatter(EngFormatter(unit="s"))
+    if display == "cycles":
+          axes.set_ylabel("Cycles")
+    if display == "time":
+          axes.set_ylabel("Time")
+          axes.yaxis.set_major_formatter(EngFormatter(unit="s"))
+    if display == "bytespercycle":
+          axes.set_ylabel("bytes/cycle")
+
     plt.legend()
     plt.grid()
 
@@ -164,8 +177,14 @@ def main():
         "--headless",
         help="If set do not display the graph.",
         action="store_true")
+    parser.add_argument(
+        "--display",
+        choices= ["time", "cycles", "bytespercycle"],
+        default="time",
+        help="Use to display either 'time', 'cycles' or 'bytes/cycle'.")
+
     args = parser.parse_args()
-    setup_graphs(args.files)
+    setup_graphs(args.files, args.display)
     if args.output:
         plt.savefig(args.output)
     if not args.headless:
diff --git a/libc/utils/CPP/TypeTraits.h b/libc/utils/CPP/TypeTraits.h
index dfc16b00ab745..9b121c03f8b98 100644
--- a/libc/utils/CPP/TypeTraits.h
+++ b/libc/utils/CPP/TypeTraits.h
@@ -26,27 +26,12 @@ struct FalseValue {
   static constexpr bool Value = false;
 };
 
-template  struct IsIntegral : public FalseValue {};
-template <> struct IsIntegral : public TrueValue {};
-template <> struct IsIntegral : public TrueValue {};
-template <> struct IsIntegral : public TrueValue {};
-template <> struct IsIntegral : public TrueValue {};
-template <> struct IsIntegral : public TrueValue {};
-template <> struct IsIntegral : public TrueValue {};
-template <> struct IsIntegral : public TrueValue {};
-template <> struct IsIntegral : public TrueValue {};
-template <> struct IsIntegral : public TrueValue {};
-template <> struct IsIntegral : public TrueValue {};
-template <> struct IsIntegral : public TrueValue {};
-template <> struct IsIntegral : public TrueValue {};
-
-template  struct IsPointerType : public FalseValue {};
-template  struct IsPointerType : public TrueValue {};
+template  struct TypeIdentity { typedef T Type; };
 
 template  struct IsSame : public FalseValue {};
 template  struct IsSame : public TrueValue {};
-
-template  struct TypeIdentity { typedef T Type; };
+template 
+static constexpr bool IsSameV = IsSame::Value;
 
 template  struct RemoveCV : public TypeIdentity {};
 template  struct RemoveCV : public TypeIdentity {};
@@ -56,10 +41,28 @@ struct RemoveCV : public TypeIdentity {};
 
 template  using RemoveCVType = typename RemoveCV::Type;
 
+template  struct IsIntegral {
+  using TypeNoCV = RemoveCVType;
+  static constexpr bool Value =
+      IsSameV || IsSameV ||
+      IsSameV || IsSameV ||
+      IsSameV || IsSameV ||
+      IsSameV || IsSameV ||
+      IsSameV || IsSameV ||
+      IsSameV || IsSameV;
+};
+
+template  struct IsPointerTypeNoCV : public FalseValue {};
+template  struct IsPointerTypeNoCV : public TrueValue {};
+template  struct IsPointerType {
+  static constexpr bool Value = IsPointerTypeNoCV>::Value;
+};
+
 template  struct IsFloatingPointType {
-  static constexpr bool Value = IsSame>::Value ||
-                                IsSame>::Value ||
-                                IsSame>::Value;
+  using TypeNoCV = RemoveCVType;
+  static constexpr bool Value = IsSame::Value ||
+                                IsSame::Value ||
+                                IsSame::Value;
 };
 
 } // namespace cpp
diff --git a/libc/utils/FPUtil/CMakeLists.txt b/libc/utils/FPUtil/CMakeLists.txt
index d9084f72e8410..682db93859a96 100644
--- a/libc/utils/FPUtil/CMakeLists.txt
+++ b/libc/utils/FPUtil/CMakeLists.txt
@@ -8,11 +8,14 @@ add_header_library(
   fputil
   HDRS
     ${LONG_DOUBLE_HDR}
+    BasicOperations.h
     BitPatterns.h
+    ClassificationFunctions.h
     FloatOperations.h
     FloatProperties.h
     FPBits.h
     ManipulationFunctions.h
+    NearestIntegerOperations.h
   DEPENDS
     libc.utils.CPP.standalone_cpp
 )
diff --git a/libc/utils/FPUtil/LongDoubleBitsX86.h b/libc/utils/FPUtil/LongDoubleBitsX86.h
index 3d7f455ff22c9..5438e0b2b6edb 100644
--- a/libc/utils/FPUtil/LongDoubleBitsX86.h
+++ b/libc/utils/FPUtil/LongDoubleBitsX86.h
@@ -9,7 +9,7 @@
 #ifndef LLVM_LIBC_UTILS_FPUTIL_LONG_DOUBLE_BITS_X86_H
 #define LLVM_LIBC_UTILS_FPUTIL_LONG_DOUBLE_BITS_X86_H
 
-#include "utils/FPUtil/FPBits.h"
+#include "FPBits.h"
 
 #include 
 
diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index 310ca5f56c461..caf655d6799aa 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -32,7 +32,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUIL
   project(libcxx CXX C)
 
   set(PACKAGE_NAME libcxx)
-  set(PACKAGE_VERSION 11.0.0git)
+  set(PACKAGE_VERSION 12.0.0git)
   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
 
@@ -407,14 +407,10 @@ endif ()
 # Configure System
 #===============================================================================
 
-set(LIBCXX_COMPILER    ${CMAKE_CXX_COMPILER})
 set(LIBCXX_SOURCE_DIR  ${CMAKE_CURRENT_SOURCE_DIR})
 set(LIBCXX_BINARY_DIR  ${CMAKE_CURRENT_BINARY_DIR})
 set(LIBCXX_BINARY_INCLUDE_DIR "${LIBCXX_BINARY_DIR}/include/c++build")
 
-string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" CLANG_VERSION
-       ${PACKAGE_VERSION})
-
 if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
   set(LIBCXX_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
   set(LIBCXX_HEADER_DIR ${LLVM_BINARY_DIR})
@@ -429,6 +425,7 @@ elseif(LLVM_LIBRARY_OUTPUT_INTDIR)
   set(LIBCXX_INSTALL_LIBRARY_DIR lib${LIBCXX_LIBDIR_SUFFIX})
 else()
   set(LIBCXX_LIBRARY_DIR ${CMAKE_BINARY_DIR}/lib${LIBCXX_LIBDIR_SUFFIX})
+  set(LIBCXX_HEADER_DIR  ${CMAKE_BINARY_DIR})
   set(LIBCXX_INSTALL_LIBRARY_DIR lib${LIBCXX_LIBDIR_SUFFIX})
 endif()
 
@@ -638,7 +635,6 @@ endfunction()
 # RTTI flags ==================================================================
 function(cxx_add_rtti_flags target)
   if (NOT LIBCXX_ENABLE_RTTI)
-    target_compile_definitions(${target} PUBLIC -D_LIBCPP_NO_RTTI)
     target_add_compile_flags_if_supported(${target} PUBLIC -GR-)
     target_add_compile_flags_if_supported(${target} PUBLIC -fno-rtti)
   endif()
@@ -874,22 +870,11 @@ if (DEFINED WIN32 AND LIBCXX_ENABLE_STATIC AND NOT LIBCXX_ENABLE_SHARED)
   config_define(ON _LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS)
 endif()
 
-# We generate a __config_site header (see libcxx/include/CMakeLists.txt) and
-# we make sure to include it when building the library.
-function(cxx_add_config_site target)
-  if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC")
-    target_compile_options(${target} PUBLIC /FI "${LIBCXX_BINARY_DIR}/__config_site")
-  else()
-    target_compile_options(${target} PUBLIC -include "${LIBCXX_BINARY_DIR}/__config_site")
-  endif()
-endfunction()
-
 # Setup all common build flags =================================================
 function(cxx_add_common_build_flags target)
   cxx_add_basic_build_flags(${target})
   cxx_add_warning_flags(${target})
   cxx_add_windows_flags(${target})
-  cxx_add_config_site(${target})
   cxx_add_exception_flags(${target})
   cxx_add_rtti_flags(${target})
   cxx_add_module_flags(${target})
@@ -899,7 +884,6 @@ endfunction()
 #===============================================================================
 # Setup Source Code And Tests
 #===============================================================================
-include_directories(include)
 add_subdirectory(include)
 add_subdirectory(src)
 
diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index f012cccb696e9..8480ede23a49f 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -87,8 +87,6 @@ set(BENCHMARK_TEST_COMPILE_FLAGS
     -I${LIBCXX_SOURCE_DIR}/test/support
 )
 set(BENCHMARK_TEST_LIBCXX_COMPILE_FLAGS
-    -nostdinc++
-    -isystem ${LIBCXX_SOURCE_DIR}/include
     ${BENCHMARK_TEST_COMPILE_FLAGS}
     ${SANITIZER_FLAGS}
     -Wno-user-defined-literals
@@ -130,7 +128,7 @@ function(add_benchmark_test name source_file)
   set(libcxx_target ${name}_libcxx)
   list(APPEND libcxx_benchmark_targets ${libcxx_target})
   add_executable(${libcxx_target} EXCLUDE_FROM_ALL ${source_file})
-  add_dependencies(${libcxx_target} cxx cxx-headers google-benchmark-libcxx)
+  add_dependencies(${libcxx_target} cxx google-benchmark-libcxx)
   add_dependencies(cxx-benchmarks ${libcxx_target})
   if (LIBCXX_ENABLE_SHARED)
     target_link_libraries(${libcxx_target} PRIVATE cxx_shared)
diff --git a/libcxx/cmake/Modules/DefineLinkerScript.cmake b/libcxx/cmake/Modules/DefineLinkerScript.cmake
index 2e68121f6187e..11a6ca57dfc0d 100644
--- a/libcxx/cmake/Modules/DefineLinkerScript.cmake
+++ b/libcxx/cmake/Modules/DefineLinkerScript.cmake
@@ -31,6 +31,9 @@ function(define_linker_script target)
   set(link_libraries)
   if (interface_libs)
     foreach(lib IN LISTS interface_libs)
+      if ("${lib}" STREQUAL "cxx-headers")
+        continue()
+      endif()
       if (TARGET "${lib}" OR
           (${lib} MATCHES "cxxabi(_static|_shared)?" AND HAVE_LIBCXXABI) OR
           (${lib} MATCHES "unwind(_static|_shared)?" AND HAVE_LIBUNWIND))
diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst
index 1db79153ed894..5157685cba01a 100644
--- a/libcxx/docs/ReleaseNotes.rst
+++ b/libcxx/docs/ReleaseNotes.rst
@@ -1,5 +1,5 @@
 =========================================
-Libc++ 11.0.0 (In-Progress) Release Notes
+Libc++ 12.0.0 (In-Progress) Release Notes
 =========================================
 
 .. contents::
@@ -10,7 +10,7 @@ Written by the `Libc++ Team `_
 
 .. warning::
 
-   These are in-progress notes for the upcoming libc++ 11 release.
+   These are in-progress notes for the upcoming libc++ 12 release.
    Release notes for previous releases can be found on
    `the Download Page `_.
 
@@ -18,7 +18,7 @@ Introduction
 ============
 
 This document contains the release notes for the libc++ C++ Standard Library,
-part of the LLVM Compiler Infrastructure, release 11.0.0. Here we describe the
+part of the LLVM Compiler Infrastructure, release 12.0.0. Here we describe the
 status of libc++ in some detail, including major improvements from the previous
 release and new feature work. For the general LLVM release notes, see `the LLVM
 documentation `_. All LLVM releases may
@@ -32,13 +32,13 @@ main Libc++ web page, this document applies to the *next* release, not
 the current one. To see the release notes for a specific release, please
 see the `releases page `_.
 
-What's New in Libc++ 11.0.0?
+What's New in Libc++ 12.0.0?
 ============================
 
 New Features
 ------------
 
-- ````
+- ...
 
 API Changes
 -----------
diff --git a/libcxx/docs/conf.py b/libcxx/docs/conf.py
index 00000eec027d3..797a4d80f512c 100644
--- a/libcxx/docs/conf.py
+++ b/libcxx/docs/conf.py
@@ -47,9 +47,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '11.0'
+version = '12.0'
 # The full version, including alpha/beta/rc tags.
-release = '11.0'
+release = '12.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index da6623f103b67..be8141c981667 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -201,7 +201,6 @@ add_custom_command(OUTPUT ${LIBCXX_BINARY_DIR}/__generated_config
 # Add a target that executes the generation commands.
 add_custom_target(cxx-generated-config ALL
   DEPENDS ${LIBCXX_BINARY_DIR}/__generated_config)
-set(generated_config_deps cxx-generated-config)
 
 # In some build configurations (like bootstrapping clang), we need to be able to
 # install the libcxx headers before the CMake configuration for libcxx runs. Making
@@ -229,16 +228,30 @@ if(LIBCXX_HEADER_DIR)
   set(src ${LIBCXX_BINARY_DIR}/__generated_config)
   set(dst ${output_dir}/__config)
   add_custom_command(OUTPUT ${dst}
-      DEPENDS ${src} ${generated_config_deps}
+      DEPENDS ${src} cxx-generated-config
       COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${dst}
       COMMENT "Copying CXX __config")
   list(APPEND out_files ${dst})
+  add_custom_target(generate-cxx-headers DEPENDS ${out_files})
 
-  add_custom_target(${CXX_HEADER_TARGET} ALL DEPENDS ${out_files} ${LIBCXX_CXX_ABI_HEADER_TARGET})
+  add_library(${CXX_HEADER_TARGET} INTERFACE)
+  add_dependencies(${CXX_HEADER_TARGET} generate-cxx-headers ${LIBCXX_CXX_ABI_HEADER_TARGET})
+  # TODO: Use target_include_directories once we figure out why that breaks the runtimes build
+  if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC")
+    target_compile_options(${CXX_HEADER_TARGET} INTERFACE /I "${output_dir}")
+  else()
+    target_compile_options(${CXX_HEADER_TARGET} INTERFACE -I "${output_dir}")
+  endif()
+
+  # Make sure the generated __config_site header is included when we build the library.
+  if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC")
+    target_compile_options(${CXX_HEADER_TARGET} INTERFACE /FI "${LIBCXX_BINARY_DIR}/__config_site")
+  else()
+    target_compile_options(${CXX_HEADER_TARGET} INTERFACE -include "${LIBCXX_BINARY_DIR}/__config_site")
+  endif()
 else()
-  add_custom_target(${CXX_HEADER_TARGET})
+  add_library(${CXX_HEADER_TARGET} INTERFACE)
 endif()
-set_target_properties(${CXX_HEADER_TARGET} PROPERTIES FOLDER "Misc")
 
 if (LIBCXX_INSTALL_HEADERS)
   foreach(file ${files})
@@ -259,7 +272,7 @@ if (LIBCXX_INSTALL_HEADERS)
 
   if (NOT CMAKE_CONFIGURATION_TYPES)
     add_custom_target(install-${CXX_HEADER_TARGET}
-                      DEPENDS ${CXX_HEADER_TARGET} ${generated_config_deps}
+                      DEPENDS ${CXX_HEADER_TARGET} cxx-generated-config
                       COMMAND "${CMAKE_COMMAND}"
                               -DCMAKE_INSTALL_COMPONENT=${CXX_HEADER_TARGET}
                               -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 7e4c37431ea44..3b019a0493b55 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -32,7 +32,7 @@
 #  define _GNUC_VER_NEW 0
 #endif
 
-#define _LIBCPP_VERSION 11000
+#define _LIBCPP_VERSION 12000
 
 #ifndef _LIBCPP_ABI_VERSION
 #  define _LIBCPP_ABI_VERSION 1
@@ -423,10 +423,6 @@ typedef __char32_t char32_t;
 #  define _LIBCPP_NO_EXCEPTIONS
 #endif
 
-#if !(__has_feature(cxx_rtti)) && !defined(_LIBCPP_NO_RTTI)
-#define _LIBCPP_NO_RTTI
-#endif
-
 #if !(__has_feature(cxx_strong_enums))
 #define _LIBCPP_HAS_NO_STRONG_ENUMS
 #endif
@@ -1109,13 +1105,12 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container(
 #endif
 
 // Try to find out if RTTI is disabled.
-// g++ and cl.exe have RTTI on by default and define a macro when it is.
-#if !defined(_LIBCPP_NO_RTTI)
-#  if defined(__GNUC__) && !defined(__GXX_RTTI)
-#    define _LIBCPP_NO_RTTI
-#  elif defined(_LIBCPP_COMPILER_MSVC) && !defined(_CPPRTTI)
-#    define _LIBCPP_NO_RTTI
-#  endif
+#if defined(_LIBCPP_COMPILER_CLANG) && !__has_feature(cxx_rtti)
+#  define _LIBCPP_NO_RTTI
+#elif defined(__GNUC__) && !defined(__GXX_RTTI)
+#  define _LIBCPP_NO_RTTI
+#elif defined(_LIBCPP_COMPILER_MSVC) && !defined(_CPPRTTI)
+#  define _LIBCPP_NO_RTTI
 #endif
 
 #ifndef _LIBCPP_WEAK
diff --git a/libcxx/include/__libcpp_version b/libcxx/include/__libcpp_version
index 82b3803a20e9f..e334181b40062 100644
--- a/libcxx/include/__libcpp_version
+++ b/libcxx/include/__libcpp_version
@@ -1 +1 @@
-11000
+12000
diff --git a/libcxx/include/ios b/libcxx/include/ios
index d6967edbccdb7..7f0e2d65e6406 100644
--- a/libcxx/include/ios
+++ b/libcxx/include/ios
@@ -843,7 +843,7 @@ basic_ios<_CharT, _Traits>::set_rdbuf(basic_streambuf* _
     ios_base::set_rdbuf(__sb);
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 boolalpha(ios_base& __str)
 {
@@ -851,7 +851,7 @@ boolalpha(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 noboolalpha(ios_base& __str)
 {
@@ -859,7 +859,7 @@ noboolalpha(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 showbase(ios_base& __str)
 {
@@ -867,7 +867,7 @@ showbase(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 noshowbase(ios_base& __str)
 {
@@ -875,7 +875,7 @@ noshowbase(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 showpoint(ios_base& __str)
 {
@@ -883,7 +883,7 @@ showpoint(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 noshowpoint(ios_base& __str)
 {
@@ -891,7 +891,7 @@ noshowpoint(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 showpos(ios_base& __str)
 {
@@ -899,7 +899,7 @@ showpos(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 noshowpos(ios_base& __str)
 {
@@ -907,7 +907,7 @@ noshowpos(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 skipws(ios_base& __str)
 {
@@ -915,7 +915,7 @@ skipws(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 noskipws(ios_base& __str)
 {
@@ -923,7 +923,7 @@ noskipws(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 uppercase(ios_base& __str)
 {
@@ -931,7 +931,7 @@ uppercase(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 nouppercase(ios_base& __str)
 {
@@ -939,7 +939,7 @@ nouppercase(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 unitbuf(ios_base& __str)
 {
@@ -947,7 +947,7 @@ unitbuf(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 nounitbuf(ios_base& __str)
 {
@@ -955,7 +955,7 @@ nounitbuf(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 internal(ios_base& __str)
 {
@@ -963,7 +963,7 @@ internal(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 left(ios_base& __str)
 {
@@ -971,7 +971,7 @@ left(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 right(ios_base& __str)
 {
@@ -979,7 +979,7 @@ right(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 dec(ios_base& __str)
 {
@@ -987,7 +987,7 @@ dec(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 hex(ios_base& __str)
 {
@@ -995,7 +995,7 @@ hex(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 oct(ios_base& __str)
 {
@@ -1003,7 +1003,7 @@ oct(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 fixed(ios_base& __str)
 {
@@ -1011,7 +1011,7 @@ fixed(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 scientific(ios_base& __str)
 {
@@ -1019,7 +1019,7 @@ scientific(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 hexfloat(ios_base& __str)
 {
@@ -1027,7 +1027,7 @@ hexfloat(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 defaultfloat(ios_base& __str)
 {
diff --git a/libcxx/include/ostream b/libcxx/include/ostream
index ea3870532f329..697732d54e6d8 100644
--- a/libcxx/include/ostream
+++ b/libcxx/include/ostream
@@ -999,7 +999,7 @@ basic_ostream<_CharT, _Traits>::seekp(off_type __off, ios_base::seekdir __dir)
 }
 
 template 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 basic_ostream<_CharT, _Traits>&
 endl(basic_ostream<_CharT, _Traits>& __os)
 {
@@ -1009,7 +1009,7 @@ endl(basic_ostream<_CharT, _Traits>& __os)
 }
 
 template 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 basic_ostream<_CharT, _Traits>&
 ends(basic_ostream<_CharT, _Traits>& __os)
 {
@@ -1018,7 +1018,7 @@ ends(basic_ostream<_CharT, _Traits>& __os)
 }
 
 template 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 basic_ostream<_CharT, _Traits>&
 flush(basic_ostream<_CharT, _Traits>& __os)
 {
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
index 9c2db48b66b7f..2001c09761d96 100644
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -163,7 +163,8 @@ if (LIBCXX_ENABLE_SHARED)
   if(COMMAND llvm_setup_rpath)
     llvm_setup_rpath(cxx_shared)
   endif()
-  target_link_libraries(cxx_shared PRIVATE ${LIBCXX_LIBRARIES})
+  target_link_libraries(cxx_shared PUBLIC cxx-headers
+                                   PRIVATE ${LIBCXX_LIBRARIES})
   set_target_properties(cxx_shared
     PROPERTIES
       COMPILE_FLAGS "${LIBCXX_COMPILE_FLAGS}"
@@ -244,7 +245,8 @@ endif()
 # Build the static library.
 if (LIBCXX_ENABLE_STATIC)
   add_library(cxx_static STATIC ${exclude_from_all} ${LIBCXX_SOURCES} ${LIBCXX_HEADERS})
-  target_link_libraries(cxx_static PRIVATE ${LIBCXX_LIBRARIES})
+  target_link_libraries(cxx_static PUBLIC cxx-headers
+                                   PRIVATE ${LIBCXX_LIBRARIES})
   set(CMAKE_STATIC_LIBRARY_PREFIX "lib")
   set_target_properties(cxx_static
     PROPERTIES
@@ -298,7 +300,7 @@ if (LIBCXX_ENABLE_STATIC)
 endif()
 
 # Add a meta-target for both libraries.
-add_custom_target(cxx DEPENDS cxx-headers ${LIBCXX_BUILD_TARGETS})
+add_custom_target(cxx DEPENDS ${LIBCXX_BUILD_TARGETS})
 
 if (LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY)
   set(LIBCXX_EXPERIMENTAL_SOURCES
diff --git a/libcxx/test/libcxx/selftest/dsl/dsl.sh.py b/libcxx/test/libcxx/selftest/dsl/dsl.sh.py
index ff4ac2147bf68..7086c69cdd116 100644
--- a/libcxx/test/libcxx/selftest/dsl/dsl.sh.py
+++ b/libcxx/test/libcxx/selftest/dsl/dsl.sh.py
@@ -244,21 +244,29 @@ def test_trivial(self):
         self.assertIn('name', self.config.available_features)
 
     def test_name_can_be_a_callable(self):
-        feature = dsl.Feature(name=lambda cfg: (self.assertIs(self.config, cfg), 'name')[1])
+        feature = dsl.Feature(name=lambda cfg: 'name')
         assert feature.isSupported(self.config)
+        self.assertEqual('name', feature.getName(self.config))
         feature.enableIn(self.config)
         self.assertIn('name', self.config.available_features)
 
     def test_name_is_not_a_string_1(self):
         feature = dsl.Feature(name=None)
         assert feature.isSupported(self.config)
+        self.assertRaises(ValueError, lambda: feature.getName(self.config))
         self.assertRaises(ValueError, lambda: feature.enableIn(self.config))
 
     def test_name_is_not_a_string_2(self):
         feature = dsl.Feature(name=lambda cfg: None)
         assert feature.isSupported(self.config)
+        self.assertRaises(ValueError, lambda: feature.getName(self.config))
         self.assertRaises(ValueError, lambda: feature.enableIn(self.config))
 
+    def test_getName_when_unsupported(self):
+        feature = dsl.Feature(name='name', when=lambda _: False)
+        assert not feature.isSupported(self.config)
+        self.assertRaises(AssertionError, lambda: feature.getName(self.config))
+
     def test_adding_compile_flag(self):
         feature = dsl.Feature(name='name', compileFlag='-foo')
         origLinkFlags = copy.deepcopy(self.getSubstitution('%{link_flags}'))
diff --git a/libcxx/test/lit.site.cfg.in b/libcxx/test/lit.site.cfg.in
index 939776f2287da..1f3370ccc9bc2 100644
--- a/libcxx/test/lit.site.cfg.in
+++ b/libcxx/test/lit.site.cfg.in
@@ -3,7 +3,7 @@
 import os
 import site
 
-config.cxx_under_test           = "@LIBCXX_COMPILER@"
+config.cxx_under_test           = "@CMAKE_CXX_COMPILER@"
 config.project_obj_root         = "@CMAKE_BINARY_DIR@"
 config.libcxx_src_root          = "@LIBCXX_SOURCE_DIR@"
 config.libcxx_obj_root          = "@LIBCXX_BINARY_DIR@"
diff --git a/libcxx/test/pretty_printers/gdb_pretty_printer_test.sh.cpp b/libcxx/test/pretty_printers/gdb_pretty_printer_test.sh.cpp
index 081e778540a0b..540db56478e4e 100644
--- a/libcxx/test/pretty_printers/gdb_pretty_printer_test.sh.cpp
+++ b/libcxx/test/pretty_printers/gdb_pretty_printer_test.sh.cpp
@@ -383,6 +383,10 @@ void set_test() {
   ComparePrettyPrintToChars(prime_pairs,
       "std::set with 2 elements = {"
       "{first = 3, second = 5}, {first = 5, second = 7}}");
+
+  using using_set = std::set;
+  using_set other{1, 2, 3};
+  ComparePrettyPrintToChars(other, "std::set with 3 elements = {1, 2, 3}");
 }
 
 void stack_test() {
diff --git a/libcxx/test/std/namespace/addressable_functions.sh.cpp b/libcxx/test/std/namespace/addressable_functions.sh.cpp
new file mode 100644
index 0000000000000..fb731abf306ca
--- /dev/null
+++ b/libcxx/test/std/namespace/addressable_functions.sh.cpp
@@ -0,0 +1,185 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Make sure functions specified as being 'addressable' (their address can be
+// taken in a well-defined manner) are indeed addressable. This notion was
+// added by http://wg21.link/p0551. While it was technically only introduced
+// in C++20, we test it in all standard modes because it's basic QOI to provide
+// a consistent behavior for that across standard modes.
+
+// RUN: %{cxx} %{flags} %{compile_flags} -c %s -o %t.tu1.o -DTU1
+// RUN: %{cxx} %{flags} %{compile_flags} -c %s -o %t.tu2.o -DTU2
+// RUN: %{cxx} %{flags} %{link_flags} %t.tu1.o %t.tu2.o -o %t.exe
+// RUN: %{exec} %t.exe
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+
+typedef std::ios_base& (FormatFlagFunction)(std::ios_base&);
+typedef std::basic_ostream& (OstreamManipFunction)(std::basic_ostream&);
+typedef std::basic_ostream& (WOstreamManipFunction)(std::basic_ostream&);
+typedef std::basic_istream& (IstreamManipFunction)(std::basic_istream&);
+typedef std::basic_istream& (WIstreamManipFunction)(std::basic_istream&);
+
+extern FormatFlagFunction* get_formatflag_tu1(std::string);
+extern FormatFlagFunction* get_formatflag_tu2(std::string);
+
+extern OstreamManipFunction* get_ostreammanip_tu1(std::string);
+extern OstreamManipFunction* get_ostreammanip_tu2(std::string);
+extern WOstreamManipFunction* get_wostreammanip_tu1(std::string);
+extern WOstreamManipFunction* get_wostreammanip_tu2(std::string);
+
+extern IstreamManipFunction* get_istreammanip_tu1(std::string);
+extern IstreamManipFunction* get_istreammanip_tu2(std::string);
+extern WIstreamManipFunction* get_wistreammanip_tu1(std::string);
+extern WIstreamManipFunction* get_wistreammanip_tu2(std::string);
+
+#ifdef TU1
+FormatFlagFunction* get_formatflag_tu1(std::string func)
+#else
+FormatFlagFunction* get_formatflag_tu2(std::string func)
+#endif
+{
+    std::map all_funcs;
+
+    // [fmtflags.manip]
+    all_funcs.insert(std::make_pair("boolalpha", &std::boolalpha));
+    all_funcs.insert(std::make_pair("noboolalpha", &std::noboolalpha));
+    all_funcs.insert(std::make_pair("showbase", &std::showbase));
+    all_funcs.insert(std::make_pair("noshowbase", &std::noshowbase));
+    all_funcs.insert(std::make_pair("showpoint", &std::showpoint));
+    all_funcs.insert(std::make_pair("noshowpoint", &std::noshowpoint));
+    all_funcs.insert(std::make_pair("showpos", &std::showpos));
+    all_funcs.insert(std::make_pair("noshowpos", &std::noshowpos));
+    all_funcs.insert(std::make_pair("skipws", &std::skipws));
+    all_funcs.insert(std::make_pair("noskipws", &std::noskipws));
+    all_funcs.insert(std::make_pair("uppercase", &std::uppercase));
+    all_funcs.insert(std::make_pair("nouppercase", &std::nouppercase));
+    all_funcs.insert(std::make_pair("unitbuf", &std::unitbuf));
+    all_funcs.insert(std::make_pair("nounitbuf", &std::nounitbuf));
+
+    // [adjustfield.manip]
+    all_funcs.insert(std::make_pair("internal", &std::internal));
+    all_funcs.insert(std::make_pair("left", &std::left));
+    all_funcs.insert(std::make_pair("right", &std::right));
+
+    // [basefield.manip]
+    all_funcs.insert(std::make_pair("dec", &std::dec));
+    all_funcs.insert(std::make_pair("hex", &std::hex));
+    all_funcs.insert(std::make_pair("oct", &std::oct));
+
+    // [floatfield.manip]
+    all_funcs.insert(std::make_pair("fixed", &std::fixed));
+    all_funcs.insert(std::make_pair("scientific", &std::scientific));
+    all_funcs.insert(std::make_pair("hexfloat", &std::hexfloat));
+    all_funcs.insert(std::make_pair("defaultfloat", &std::defaultfloat));
+
+    return all_funcs.at(func);
+}
+
+// [ostream.manip] (char)
+#ifdef TU1
+OstreamManipFunction* get_ostreammanip_tu1(std::string func)
+#else
+OstreamManipFunction* get_ostreammanip_tu2(std::string func)
+#endif
+{
+    std::map all_funcs;
+    typedef std::char_traits Traits;
+    all_funcs.insert(std::make_pair("endl", &std::endl));
+    all_funcs.insert(std::make_pair("ends", &std::ends));
+    all_funcs.insert(std::make_pair("flush", &std::flush));
+    return all_funcs.at(func);
+}
+
+// [ostream.manip] (wchar_t)
+#ifdef TU1
+WOstreamManipFunction* get_wostreammanip_tu1(std::string func)
+#else
+WOstreamManipFunction* get_wostreammanip_tu2(std::string func)
+#endif
+{
+    std::map all_funcs;
+    typedef std::char_traits Traits;
+    all_funcs.insert(std::make_pair("endl", &std::endl));
+    all_funcs.insert(std::make_pair("ends", &std::ends));
+    all_funcs.insert(std::make_pair("flush", &std::flush));
+    return all_funcs.at(func);
+}
+
+// [istream.manip] (char)
+#ifdef TU1
+IstreamManipFunction* get_istreammanip_tu1(std::string func)
+#else
+IstreamManipFunction* get_istreammanip_tu2(std::string func)
+#endif
+{
+    std::map all_funcs;
+    typedef std::char_traits Traits;
+    all_funcs.insert(std::make_pair("ws", &std::ws));
+    return all_funcs.at(func);
+}
+
+// [istream.manip] (wchar_t)
+#ifdef TU1
+WIstreamManipFunction* get_wistreammanip_tu1(std::string func)
+#else
+WIstreamManipFunction* get_wistreammanip_tu2(std::string func)
+#endif
+{
+    std::map all_funcs;
+    typedef std::char_traits Traits;
+    all_funcs.insert(std::make_pair("ws", &std::ws));
+    return all_funcs.at(func);
+}
+
+
+#ifdef TU2
+    int main() {
+        assert(get_formatflag_tu1("boolalpha") == get_formatflag_tu2("boolalpha"));
+        assert(get_formatflag_tu1("noboolalpha") == get_formatflag_tu2("noboolalpha"));
+        assert(get_formatflag_tu1("showbase") == get_formatflag_tu2("showbase"));
+        assert(get_formatflag_tu1("noshowbase") == get_formatflag_tu2("noshowbase"));
+        assert(get_formatflag_tu1("showpoint") == get_formatflag_tu2("showpoint"));
+        assert(get_formatflag_tu1("noshowpoint") == get_formatflag_tu2("noshowpoint"));
+        assert(get_formatflag_tu1("showpos") == get_formatflag_tu2("showpos"));
+        assert(get_formatflag_tu1("noshowpos") == get_formatflag_tu2("noshowpos"));
+        assert(get_formatflag_tu1("skipws") == get_formatflag_tu2("skipws"));
+        assert(get_formatflag_tu1("noskipws") == get_formatflag_tu2("noskipws"));
+        assert(get_formatflag_tu1("uppercase") == get_formatflag_tu2("uppercase"));
+        assert(get_formatflag_tu1("nouppercase") == get_formatflag_tu2("nouppercase"));
+        assert(get_formatflag_tu1("unitbuf") == get_formatflag_tu2("unitbuf"));
+        assert(get_formatflag_tu1("nounitbuf") == get_formatflag_tu2("nounitbuf"));
+        assert(get_formatflag_tu1("internal") == get_formatflag_tu2("internal"));
+        assert(get_formatflag_tu1("left") == get_formatflag_tu2("left"));
+        assert(get_formatflag_tu1("right") == get_formatflag_tu2("right"));
+        assert(get_formatflag_tu1("dec") == get_formatflag_tu2("dec"));
+        assert(get_formatflag_tu1("hex") == get_formatflag_tu2("hex"));
+        assert(get_formatflag_tu1("oct") == get_formatflag_tu2("oct"));
+        assert(get_formatflag_tu1("fixed") == get_formatflag_tu2("fixed"));
+        assert(get_formatflag_tu1("scientific") == get_formatflag_tu2("scientific"));
+        assert(get_formatflag_tu1("hexfloat") == get_formatflag_tu2("hexfloat"));
+        assert(get_formatflag_tu1("defaultfloat") == get_formatflag_tu2("defaultfloat"));
+
+        assert(get_ostreammanip_tu1("endl") == get_ostreammanip_tu2("endl"));
+        assert(get_ostreammanip_tu1("ends") == get_ostreammanip_tu2("ends"));
+        assert(get_ostreammanip_tu1("flush") == get_ostreammanip_tu2("flush"));
+
+        assert(get_wostreammanip_tu1("endl") == get_wostreammanip_tu2("endl"));
+        assert(get_wostreammanip_tu1("ends") == get_wostreammanip_tu2("ends"));
+        assert(get_wostreammanip_tu1("flush") == get_wostreammanip_tu2("flush"));
+
+        assert(get_istreammanip_tu1("ws") == get_istreammanip_tu2("ws"));
+
+        assert(get_wistreammanip_tu1("ws") == get_wistreammanip_tu2("ws"));
+    }
+#endif
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp
index 78fc96ba4a0de..4ab6ff69417ca 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp
index b3ac748edf6a1..22a265f3abab0 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp
index 501d11dd937c6..6b04074820317 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp
index 3741611c36931..cfffafdce822f 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // ALLOW_RETRIES: 2
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp
index 831873320b2ed..aada7797c14d7 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp
index eb19009e99b9c..1e3f4e012f26e 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp
index 45cdf646340a9..31643071f1fd6 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp
index 9a3e222e30a06..f0081c2c2dd85 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp
index a7004151f7bb6..d13eeb860fc34 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // ALLOW_RETRIES: 2
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp
index d13884e934058..6db3a44c71f51 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // ALLOW_RETRIES: 2
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp
index 39cca84ab22e0..50916833b169d 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp
index 94da2a46257f6..fe8d038d6d1a9 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp
index d7989df91c52b..8ed1fcfb81455 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp
index f75ddafbb42ee..e59ae952e1de2 100644
--- a/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp
index 9b1e7dd42b003..f1367c8402fa8 100644
--- a/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp
index 1c4eceb1353b3..7ccaef832d1d0 100644
--- a/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp
index 88aa8ac07e72f..88789e146c60f 100644
--- a/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp
index ff9e5f7e7b7ec..5d3ff7b46e87a 100644
--- a/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp
index a04282c3f374a..7e21a1a9ec584 100644
--- a/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp
index c5a1afd5ebbeb..47f609225ff83 100644
--- a/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp
index d5c1416a86bdb..bb6b3345d9457 100644
--- a/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp
index fa94283143360..f2d5f0640202e 100644
--- a/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp
index 7c1ec1f323081..8cce5bfd6254c 100644
--- a/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp
index 838c1185cdcfe..175e9f6732041 100644
--- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp
index 495a928725135..be245998cdbaf 100644
--- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.fail.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.fail.cpp
index e14ecae5bab35..018e7b817009e 100644
--- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.fail.cpp
+++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.fail.cpp
@@ -7,7 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// UNSUPPORTED: dylib-has-no-bad_any_cast
+
+// Throwing bad_any_cast is supported starting in macosx10.13
+// UNSUPPORTED: with_system_cxx_lib=macosx10.12
+// UNSUPPORTED: with_system_cxx_lib=macosx10.11
+// UNSUPPORTED: with_system_cxx_lib=macosx10.10
+// UNSUPPORTED: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp
index 79bbcfd118f2e..8e43be514ade8 100644
--- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp
+++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp
@@ -7,7 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// UNSUPPORTED: dylib-has-no-bad_any_cast
+
+// Throwing bad_any_cast is supported starting in macosx10.13
+// UNSUPPORTED: with_system_cxx_lib=macosx10.12
+// UNSUPPORTED: with_system_cxx_lib=macosx10.11
+// UNSUPPORTED: with_system_cxx_lib=macosx10.10
+// UNSUPPORTED: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp
index 2f29278035590..4c314fd69d226 100644
--- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp
+++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp
@@ -7,7 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// UNSUPPORTED: dylib-has-no-bad_any_cast
+
+// Throwing bad_any_cast is supported starting in macosx10.13
+// UNSUPPORTED: with_system_cxx_lib=macosx10.12
+// UNSUPPORTED: with_system_cxx_lib=macosx10.11
+// UNSUPPORTED: with_system_cxx_lib=macosx10.10
+// UNSUPPORTED: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp
index 6974d7e746b06..9bebbcd6ee0fa 100644
--- a/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp
index fbbca818c1a55..b4ac37edcdd54 100644
--- a/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp b/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp
index 7f35b5a34fb8d..7c8c9c3966478 100644
--- a/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp b/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp
index adb13a29c20dc..f0ea77371ab92 100644
--- a/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp
@@ -7,7 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// XFAIL: dylib-has-no-bad_optional_access
+
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp
index c2b74d4eebd23..f48a304e5add7 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp
index 2838627297dde..af6cd5721762e 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
index c19f6cc9c719a..fcbd26797acba 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp
index ebb70defb232e..fc19d52e0de2b 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp
index 4fcdd96b3e114..f2cabd34e5760 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp
index cff41d3775029..48996026d0eea 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp
index 958d506fe561c..4a64c8ad8c43f 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp
index ef907cc02ac37..655dbd33664b6 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp
@@ -9,7 +9,11 @@
 // UNSUPPORTED: c++03, c++11, c++14
 // 
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // constexpr T& optional::value() &&;
 
diff --git a/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp b/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp
index d1a0945712471..49678bb68b3d5 100644
--- a/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp
@@ -7,7 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 //
diff --git a/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp b/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp
index 1e06373c7a39e..61c2cae221049 100644
--- a/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp
@@ -9,8 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access
-
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp b/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp
index 1575bc4ad62d1..2570dde92b5b1 100644
--- a/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp b/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp
index 06a3767f56135..2b8e67a116169 100644
--- a/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp
index 8659fa1272ad3..f3af26539539e 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp
index ae0cf0da54cee..3b46850a42664 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp
index dcc317cf51901..6d11b50acf0d7 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp
@@ -9,8 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
-
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
index 383ebd11a986d..1ac42019f7de2 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
@@ -8,7 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp
index 9f3c47ed54ed1..d70b305714efa 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp
index 6857f62c151ff..04b8074a80561 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp
index 9f1322a104e81..2273a0e1c5976 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp
index b285f97dc2ac9..495a72bbeeb00 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp
@@ -8,7 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp
index 88b10bf74f44b..ac1a05a0f9d0b 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp
index 8bf369957dc67..ed464b86a8634 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp
@@ -8,7 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp
index ab30a417567d7..7157988dafcc2 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp
index 529c78d43a6c4..290a05b0223c9 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp
index 78a7f41f5184f..04e1178c4eb68 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp
index 458bbc28bdb4e..e4eaffd0c5f19 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp
index 2a4def6087b2f..30259b67d275a 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp
index 0a3d7b4c91cc7..313ce67632105 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 
diff --git a/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp b/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp
index dce94110cedee..41ce442ae5103 100644
--- a/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // 
 // template 
diff --git a/libcxx/utils/gdb/libcxx/printers.py b/libcxx/utils/gdb/libcxx/printers.py
index 7cccc07997b3b..0ee446f46c51f 100644
--- a/libcxx/utils/gdb/libcxx/printers.py
+++ b/libcxx/utils/gdb/libcxx/printers.py
@@ -698,7 +698,7 @@ class StdMapPrinter(AbstractRBTreePrinter):
 
     def _init_cast_type(self, val_type):
         map_it_type = gdb.lookup_type(
-            str(val_type) + "::iterator").strip_typedefs()
+            str(val_type.strip_typedefs()) + "::iterator").strip_typedefs()
         tree_it_type = map_it_type.template_argument(0)
         node_ptr_type = tree_it_type.template_argument(1)
         return node_ptr_type
@@ -717,7 +717,7 @@ class StdSetPrinter(AbstractRBTreePrinter):
 
     def _init_cast_type(self, val_type):
         set_it_type = gdb.lookup_type(
-            str(val_type) + "::iterator").strip_typedefs()
+            str(val_type.strip_typedefs()) + "::iterator").strip_typedefs()
         node_ptr_type = set_it_type.template_argument(1)
         return node_ptr_type
 
diff --git a/libcxx/utils/google-benchmark/mingw.py b/libcxx/utils/google-benchmark/mingw.py
index 706ad559db9c7..0b69692ca2a40 100644
--- a/libcxx/utils/google-benchmark/mingw.py
+++ b/libcxx/utils/google-benchmark/mingw.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#!/usr/bin/env python
 # encoding: utf-8
 
 import argparse
diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py
index 99a966dbfdfde..d54ee8fa32913 100644
--- a/libcxx/utils/libcxx/test/config.py
+++ b/libcxx/utils/libcxx/test/config.py
@@ -20,6 +20,7 @@
 from libcxx.test.target_info import make_target_info
 import libcxx.util
 import libcxx.test.features
+import libcxx.test.newconfig
 import libcxx.test.params
 
 def loadSiteConfig(lit_config, config, param_name, env_name):
@@ -118,8 +119,8 @@ def make_static_lib_name(self, name):
             return 'lib' + name + '.a'
 
     def configure(self):
-        self.configure_target_info()
-        self.configure_executor()
+        self.target_info = make_target_info(self)
+        self.executor = self.get_lit_conf('executor')
         self.configure_cxx()
         self.configure_triple()
         self.configure_deployment()
@@ -139,35 +140,20 @@ def configure(self):
         self.configure_modules()
         self.configure_substitutions()
         self.configure_features()
-        self.configure_new_params()
-        self.configure_new_features()
-
-    def configure_new_features(self):
-        supportedFeatures = [f for f in libcxx.test.features.features if f.isSupported(self.config)]
-        for feature in supportedFeatures:
-            feature.enableIn(self.config)
 
-    def configure_new_params(self):
-        for param in libcxx.test.params.parameters:
-            feature = param.getFeature(self.config, self.lit_config.params)
-            if feature:
-                feature.enableIn(self.config)
+        libcxx.test.newconfig.configure(
+            libcxx.test.params.DEFAULT_PARAMETERS,
+            libcxx.test.features.DEFAULT_FEATURES,
+            self.config,
+            self.lit_config
+        )
 
     def print_config_info(self):
-        # Print the final compile and link flags.
-        self.lit_config.note('Using compiler: %s' % self.cxx.path)
-        self.lit_config.note('Using flags: %s' % self.cxx.flags)
         if self.cxx.use_modules:
             self.lit_config.note('Using modules flags: %s' %
                                  self.cxx.modules_flags)
-        self.lit_config.note('Using compile flags: %s'
-                             % self.cxx.compile_flags)
         if len(self.cxx.warning_flags):
             self.lit_config.note('Using warnings: %s' % self.cxx.warning_flags)
-        self.lit_config.note('Using link flags: %s' % self.cxx.link_flags)
-        # Print as list to prevent "set([...])" from being printed.
-        self.lit_config.note('Using available_features: %s' %
-                             list(sorted(self.config.available_features)))
         show_env_vars = {}
         for k,v in self.exec_env.items():
             if k not in os.environ or os.environ[k] != v:
@@ -185,13 +171,6 @@ def get_test_format(self):
             self.executor,
             exec_env=self.exec_env)
 
-    def configure_executor(self):
-        self.executor = self.get_lit_conf('executor')
-        self.lit_config.note("Using executor: {}".format(self.executor))
-
-    def configure_target_info(self):
-        self.target_info = make_target_info(self)
-
     def configure_cxx(self):
         # Gather various compiler parameters.
         cxx = self.get_lit_conf('cxx_under_test')
@@ -319,7 +298,6 @@ def configure_default_compile_flags(self):
         self.configure_compile_flags_header_includes()
         self.target_info.add_cxx_compile_flags(self.cxx.compile_flags)
         # Configure feature flags.
-        self.configure_compile_flags_rtti()
         enable_32bit = self.get_lit_bool('enable_32bit', False)
         if enable_32bit:
             self.cxx.flags += ['-m32']
@@ -406,12 +384,6 @@ def configure_config_site_header(self):
             return
         self.cxx.compile_flags += ['-include', config_site_header]
 
-    def configure_compile_flags_rtti(self):
-        enable_rtti = self.get_lit_bool('enable_rtti', True)
-        if not enable_rtti:
-            self.config.available_features.add('-fno-rtti')
-            self.cxx.compile_flags += ['-fno-rtti', '-D_LIBCPP_NO_RTTI']
-
     def configure_link_flags(self):
         # Configure library path
         self.configure_link_flags_cxx_library_path()
@@ -737,11 +709,8 @@ def configure_deployment(self):
         arch = self.get_lit_conf('arch')
         if not arch:
             arch = self.cxx.getTriple().split('-', 1)[0]
-            self.lit_config.note("inferred arch as: %r" % arch)
 
-        inferred_platform, name, version = self.target_info.get_platform()
-        if inferred_platform:
-            self.lit_config.note("inferred platform as: %r" % (name + version))
+        _, name, version = self.target_info.get_platform()
         self.config.deployment = (arch, name, version)
 
         # Set the target triple for use by lit.
@@ -749,27 +718,9 @@ def configure_deployment(self):
         self.lit_config.note(
             "computed target_triple as: %r" % self.config.target_triple)
 
-        # If we're testing a system libc++ as opposed to the upstream LLVM one,
-        # take the version of the system libc++ into account to compute which
-        # features are enabled/disabled. Otherwise, disable availability markup,
+        # If we're testing the upstream LLVM libc++, disable availability markup,
         # which is not relevant for non-shipped flavors of libc++.
-        if self.use_system_cxx_lib:
-            # Dylib support for shared_mutex was added in macosx10.12.
-            if name == 'macosx' and version in ('10.%s' % v for v in range(9, 12)):
-                self.config.available_features.add('dylib-has-no-shared_mutex')
-                self.lit_config.note("shared_mutex is not supported by the deployment target")
-            # Throwing bad_optional_access, bad_variant_access and bad_any_cast is
-            # supported starting in macosx10.13.
-            if name == 'macosx' and version in ('10.%s' % v for v in range(9, 13)):
-                self.config.available_features.add('dylib-has-no-bad_optional_access')
-                self.lit_config.note("throwing bad_optional_access is not supported by the deployment target")
-
-                self.config.available_features.add('dylib-has-no-bad_variant_access')
-                self.lit_config.note("throwing bad_variant_access is not supported by the deployment target")
-
-                self.config.available_features.add('dylib-has-no-bad_any_cast')
-                self.lit_config.note("throwing bad_any_cast is not supported by the deployment target")
-        else:
+        if not self.use_system_cxx_lib:
             self.cxx.compile_flags += ['-D_LIBCPP_DISABLE_AVAILABILITY']
 
     def configure_env(self):
diff --git a/libcxx/utils/libcxx/test/dsl.py b/libcxx/utils/libcxx/test/dsl.py
index e0e09e6bcac47..2c54921844b2a 100644
--- a/libcxx/utils/libcxx/test/dsl.py
+++ b/libcxx/utils/libcxx/test/dsl.py
@@ -227,6 +227,19 @@ def isSupported(self, config):
     """
     return self._isSupported(config)
 
+  def getName(self, config):
+    """
+    Return the name of the feature.
+
+    It is an error to call `f.getName(cfg)` if the feature `f` is not supported.
+    """
+    assert self.isSupported(config), \
+      "Trying to get the name of a feature that is not supported in the given configuration"
+    name = self._name(config) if callable(self._name) else self._name
+    if not isinstance(name, str):
+      raise ValueError("Feature did not resolve to a name that's a string, got {}".format(name))
+    return name
+
   def enableIn(self, config):
     """
     Enable a feature in a TestingConfig.
@@ -249,11 +262,7 @@ def enableIn(self, config):
     if self._linkFlag:
       linkFlag = self._linkFlag(config) if callable(self._linkFlag) else self._linkFlag
       config.substitutions = addTo(config.substitutions, '%{link_flags}', linkFlag)
-
-    name = self._name(config) if callable(self._name) else self._name
-    if not isinstance(name, str):
-      raise ValueError("Feature did not resolve to a name that's a string, got {}".format(name))
-    config.available_features.add(name)
+    config.available_features.add(self.getName(config))
 
 
 def _str_to_bool(s):
diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py
index f3d8e782be8e1..6a16ca851d3f9 100644
--- a/libcxx/utils/libcxx/test/features.py
+++ b/libcxx/utils/libcxx/test/features.py
@@ -13,7 +13,7 @@
 _isAppleClang = lambda cfg: '__apple_build_version__' in compilerMacros(cfg)
 _isGCC        = lambda cfg: '__GNUC__' in compilerMacros(cfg) and '__clang__' not in compilerMacros(cfg)
 
-features = [
+DEFAULT_FEATURES = [
   Feature(name='fcoroutines-ts', compileFlag='-fcoroutines-ts',
           when=lambda cfg: hasCompileFlag(cfg, '-fcoroutines-ts') and
                            featureTestMacros(cfg, flags='-fcoroutines-ts').get('__cpp_coroutines', 0) >= 201703),
@@ -76,7 +76,7 @@
   '_LIBCPP_ABI_UNSTABLE': 'libcpp-abi-unstable'
 }
 for macro, feature in macros.items():
-  features += [
+  DEFAULT_FEATURES += [
     Feature(name=lambda cfg, m=macro, f=feature: f + (
               '={}'.format(compilerMacros(cfg)[m]) if compilerMacros(cfg)[m] else ''
             ),
@@ -104,14 +104,14 @@
   'cs_CZ.ISO8859-2': ['cs_CZ.ISO8859-2', 'Czech_Czech Republic.1250']
 }
 for locale, alts in locales.items():
-  features += [
+  DEFAULT_FEATURES += [
     Feature(name='locale.{}'.format(locale),
             when=lambda cfg: any(hasLocale(cfg, alt) for alt in alts))
   ]
 
 
 # Add features representing the platform name: darwin, linux, windows, etc...
-features += [
+DEFAULT_FEATURES += [
   Feature(name='darwin', when=lambda cfg: '__APPLE__' in compilerMacros(cfg)),
   Feature(name='windows', when=lambda cfg: '_WIN32' in compilerMacros(cfg)),
   Feature(name='linux', when=lambda cfg: '__linux__' in compilerMacros(cfg)),
diff --git a/libcxx/utils/libcxx/test/newconfig.py b/libcxx/utils/libcxx/test/newconfig.py
new file mode 100644
index 0000000000000..8996484ba20ba
--- /dev/null
+++ b/libcxx/utils/libcxx/test/newconfig.py
@@ -0,0 +1,36 @@
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+def _getSubstitution(substitution, config):
+  for (orig, replacement) in config.substitutions:
+    if orig == substitution:
+      return replacement
+  raise ValueError('Substitution {} is not in the config.'.format(substitution))
+
+def configure(parameters, features, config, lit_config):
+  # Apply parameters to the configuration first, since parameters are things
+  # that we request explicitly and which might influence what features are
+  # implicitly made available next.
+  for param in parameters:
+    feature = param.getFeature(config, lit_config.params)
+    if feature:
+      feature.enableIn(config)
+      lit_config.note("Enabling Lit feature '{}' as a result of parameter '{}'".format(feature.getName(config), param.name))
+
+  # Then, apply the automatically-detected features.
+  printFeatures = []
+  for feature in features:
+    if feature.isSupported(config):
+      feature.enableIn(config)
+      printFeatures.append(feature.getName(config))
+  printFeatures = ["'{}'".format(f) for f in sorted(printFeatures)]
+  lit_config.note("Enabling implicitly detected Lit features {}".format(', '.join(printFeatures)))
+
+  # Print the basic substitutions
+  for sub in ('%{cxx}', '%{flags}', '%{compile_flags}', '%{link_flags}', '%{exec}'):
+    lit_config.note("Using {} substitution: '{}'".format(sub, _getSubstitution(sub, config)))
diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py
index 864a5108fc098..a9431ec073f8d 100644
--- a/libcxx/utils/libcxx/test/params.py
+++ b/libcxx/utils/libcxx/test/params.py
@@ -10,7 +10,7 @@
 
 _allStandards = ['c++98', 'c++03', 'c++11', 'c++14', 'c++17', 'c++2a']
 
-parameters = [
+DEFAULT_PARAMETERS = [
   # Core parameters of the test suite
   Parameter(name='std', choices=_allStandards, type=str,
             help="The version of the standard to compile the test suite with.",
@@ -24,6 +24,11 @@
             feature=lambda exceptions: None if exceptions else
               Feature(name='no-exceptions', compileFlag='-fno-exceptions')),
 
+  Parameter(name='enable_rtti', choices=[True, False], type=bool, default=True,
+            help="Whether to enable RTTI when compiling the test suite.",
+            feature=lambda rtti: None if rtti else
+              Feature(name='-fno-rtti', compileFlag='-fno-rtti')),
+
   Parameter(name='stdlib', choices=['libc++', 'libstdc++', 'msvc'], type=str, default='libc++',
             help="The C++ Standard Library implementation being tested.",
             feature=lambda stdlib: Feature(name=stdlib)),
diff --git a/libcxx/www/cxx2a_status.html b/libcxx/www/cxx2a_status.html
index 6a2f2f44d1456..ad9bb36859cb3 100644
--- a/libcxx/www/cxx2a_status.html
+++ b/libcxx/www/cxx2a_status.html
@@ -73,7 +73,7 @@ 

Paper Status

P0777R1LWGTreating Unnecessary decayAlbuquerqueComplete7.0 P0122R7LWG<span>JacksonvilleComplete7.0 P0355R7LWGExtending chrono to Calendars and Time ZonesJacksonvilleIn progress - P0551R3LWGThou Shalt Not Specialize std Function Templates!Jacksonville + P0551R3LWGThou Shalt Not Specialize std Function Templates!JacksonvilleComplete11.0 P0753R2LWGManipulators for C++ Synchronized Buffered OstreamJacksonville P0754R2LWG<version>JacksonvilleComplete7.0 P0809R0LWGComparing Unordered ContainersJacksonville diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index 8881a5018dc43..e4e20d950b890 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -151,13 +151,9 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ) -set(LIBCXXABI_COMPILER ${CMAKE_CXX_COMPILER}) set(LIBCXXABI_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(LIBCXXABI_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) -string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" CLANG_VERSION - ${PACKAGE_VERSION}) - if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE) set(LIBCXXABI_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++) set(LIBCXXABI_INSTALL_LIBRARY_DIR lib${LLVM_LIBDIR_SUFFIX}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++) diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index 42bec421d2be7..0503569115286 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -20,7 +20,9 @@ set(LIBCXXABI_SOURCES ) if (LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS) - list(APPEND LIBCXXABI_SOURCES stdlib_new_delete.cpp) + list(APPEND LIBCXXABI_SOURCES + stdlib_new_delete.cpp + ) endif() if (LIBCXXABI_ENABLE_EXCEPTIONS) diff --git a/libcxxabi/test/guard_threaded_test.pass.cpp b/libcxxabi/test/guard_threaded_test.pass.cpp index 0dc911c73a495..d562b4851afe5 100644 --- a/libcxxabi/test/guard_threaded_test.pass.cpp +++ b/libcxxabi/test/guard_threaded_test.pass.cpp @@ -36,7 +36,7 @@ constexpr int TestSamples = 50; void BusyWait() { - std::this_thread::yield(); + std::this_thread::yield(); } void YieldAfterBarrier() { diff --git a/libcxxabi/test/libcxxabi/test/config.py b/libcxxabi/test/libcxxabi/test/config.py index b9b2b6e90c6a9..45fb0f5d7afcb 100644 --- a/libcxxabi/test/libcxxabi/test/config.py +++ b/libcxxabi/test/libcxxabi/test/config.py @@ -83,6 +83,3 @@ def configure_compile_flags_header_includes(self): self.lit_config.fatal("libunwind_headers='%s' is not a directory." % libunwind_headers) self.cxx.compile_flags += ['-I' + libunwind_headers] - - def configure_compile_flags_rtti(self): - pass diff --git a/libcxxabi/test/lit.site.cfg.in b/libcxxabi/test/lit.site.cfg.in index 75fde7ee92507..06d5706da7d24 100644 --- a/libcxxabi/test/lit.site.cfg.in +++ b/libcxxabi/test/lit.site.cfg.in @@ -3,7 +3,7 @@ import os import site -config.cxx_under_test = "@LIBCXXABI_COMPILER@" +config.cxx_under_test = "@CMAKE_CXX_COMPILER@" config.project_obj_root = "@CMAKE_BINARY_DIR@" config.libcxxabi_src_root = "@LIBCXXABI_SOURCE_DIR@" config.libcxxabi_obj_root = "@LIBCXXABI_BINARY_DIR@" diff --git a/libcxxabi/test/test_exception_address_alignment.pass.cpp b/libcxxabi/test/test_exception_address_alignment.pass.cpp index 5847f23caa000..92652f464b8bd 100644 --- a/libcxxabi/test/test_exception_address_alignment.pass.cpp +++ b/libcxxabi/test/test_exception_address_alignment.pass.cpp @@ -11,11 +11,10 @@ // The header provided in the SDK of older Xcodes used to provide // an incorrectly aligned _Unwind_Exception type. That causes these tests to -// fail with those SDKs. Note that we use the AppleClang version as a cheap -// proxy for the SDK version. -// XFAIL: apple-clang-11 && libcxxabi-has-system-unwinder -// XFAIL: apple-clang-10 && libcxxabi-has-system-unwinder -// XFAIL: apple-clang-9 && libcxxabi-has-system-unwinder +// fail with those SDKs. +// FIXME: We mark the test as unsupported on Apple until we have a Lit feature +// representing the SDK version. +// UNSUPPORTED: darwin // Test that the address of the exception object is properly aligned as required // by the relevant ABI diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt index 7065112627a25..b50550dc376ee 100644 --- a/libunwind/CMakeLists.txt +++ b/libunwind/CMakeLists.txt @@ -83,7 +83,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_B endif() set(PACKAGE_NAME libunwind) - set(PACKAGE_VERSION 11.0.0git) + set(PACKAGE_VERSION 12.0.0git) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") @@ -182,13 +182,9 @@ set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) -set(LIBUNWIND_COMPILER ${CMAKE_CXX_COMPILER}) set(LIBUNWIND_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(LIBUNWIND_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) -string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" CLANG_VERSION - ${PACKAGE_VERSION}) - if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE) set(LIBUNWIND_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++) set(LIBUNWIND_INSTALL_LIBRARY_DIR lib${LLVM_LIBDIR_SUFFIX}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++) diff --git a/libunwind/docs/conf.py b/libunwind/docs/conf.py index 70fb71fd87330..6217ead0caf28 100644 --- a/libunwind/docs/conf.py +++ b/libunwind/docs/conf.py @@ -48,9 +48,9 @@ # built documents. # # The short X.Y version. -version = '11.0' +version = '12.0' # The full version, including alpha/beta/rc tags. -release = '11.0' +release = '12.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp index a4564cb673286..764aaa3489f26 100644 --- a/libunwind/src/AddressSpace.hpp +++ b/libunwind/src/AddressSpace.hpp @@ -290,11 +290,11 @@ inline int64_t LocalAddressSpace::getSLEB128(pint_t &addr, pint_t end) { if (p == pend) _LIBUNWIND_ABORT("truncated sleb128 expression"); byte = *p++; - result |= ((byte & 0x7f) << bit); + result |= (uint64_t)(byte & 0x7f) << bit; bit += 7; } while (byte & 0x80); // sign extend negative numbers - if ((byte & 0x40) != 0) + if ((byte & 0x40) != 0 && bit < 64) result |= (-1ULL) << bit; addr = (pint_t) p; return result; diff --git a/libunwind/src/DwarfParser.hpp b/libunwind/src/DwarfParser.hpp index d05ac468367f9..c98c4f92a6ad3 100644 --- a/libunwind/src/DwarfParser.hpp +++ b/libunwind/src/DwarfParser.hpp @@ -336,7 +336,8 @@ const char *CFI_Parser::parseCIE(A &addressSpace, pint_t cie, // parse data alignment factor cieInfo->dataAlignFactor = (int)addressSpace.getSLEB128(p, cieContentEnd); // parse return address register - uint64_t raReg = addressSpace.getULEB128(p, cieContentEnd); + uint64_t raReg = (version == 1) ? addressSpace.get8(p++) + : addressSpace.getULEB128(p, cieContentEnd); assert(raReg < 255 && "return address register too large"); cieInfo->returnAddressRegister = (uint8_t)raReg; // parse augmentation data based on augmentation string diff --git a/libunwind/test/libunwind/test/config.py b/libunwind/test/libunwind/test/config.py index 31f6148879c5f..977f9a0fb3f93 100644 --- a/libunwind/test/libunwind/test/config.py +++ b/libunwind/test/libunwind/test/config.py @@ -59,9 +59,6 @@ def configure_compile_flags_header_includes(self): % libunwind_headers) self.cxx.compile_flags += ['-I' + libunwind_headers] - def configure_compile_flags_rtti(self): - pass - def configure_link_flags_cxx_library(self): # libunwind tests should not link with libc++ pass diff --git a/libunwind/test/lit.site.cfg.in b/libunwind/test/lit.site.cfg.in index d0f0e08fc9263..30a996cf37837 100644 --- a/libunwind/test/lit.site.cfg.in +++ b/libunwind/test/lit.site.cfg.in @@ -3,7 +3,7 @@ import os import site -config.cxx_under_test = "@LIBUNWIND_COMPILER@" +config.cxx_under_test = "@CMAKE_CXX_COMPILER@" config.project_obj_root = "@CMAKE_BINARY_DIR@" config.libunwind_src_root = "@LIBUNWIND_SOURCE_DIR@" config.libunwind_obj_root = "@LIBUNWIND_BINARY_DIR@" diff --git a/lld/COFF/CMakeLists.txt b/lld/COFF/CMakeLists.txt index 4592ace373efa..796f7a82a3de1 100644 --- a/lld/COFF/CMakeLists.txt +++ b/lld/COFF/CMakeLists.txt @@ -2,10 +2,6 @@ set(LLVM_TARGET_DEFINITIONS Options.td) tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(COFFOptionsTableGen) -if(NOT LLD_BUILT_STANDALONE) - set(tablegen_deps intrinsics_gen) -endif() - add_lld_library(lldCOFF Chunks.cpp DebugTypes.cpp @@ -48,5 +44,5 @@ add_lld_library(lldCOFF DEPENDS COFFOptionsTableGen - ${tablegen_deps} + intrinsics_gen ) diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp index 4790b0166799c..b8c488f26908a 100644 --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -377,7 +377,8 @@ static PrecompSource *findObjByName(StringRef fileNameOnly) { return nullptr; } -Expected findPrecompMap(ObjFile *file, PrecompRecord &pr) { +static Expected findPrecompMap(ObjFile *file, + PrecompRecord &pr) { // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly, // the paths embedded in the OBJs are in the Windows format. diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index ffa0a0006f0ed..3bcc1777f7ac8 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -1864,6 +1864,10 @@ void Writer::sortExceptionTable() { uint8_t *end = bufAddr(lastPdata) + lastPdata->getSize(); if (config->machine == AMD64) { struct Entry { ulittle32_t begin, end, unwind; }; + if ((end - begin) % sizeof(Entry) != 0) { + fatal("unexpected .pdata size: " + Twine(end - begin) + + " is not a multiple of " + Twine(sizeof(Entry))); + } parallelSort( MutableArrayRef((Entry *)begin, (Entry *)end), [](const Entry &a, const Entry &b) { return a.begin < b.begin; }); @@ -1871,6 +1875,10 @@ void Writer::sortExceptionTable() { } if (config->machine == ARMNT || config->machine == ARM64) { struct Entry { ulittle32_t begin, unwind; }; + if ((end - begin) % sizeof(Entry) != 0) { + fatal("unexpected .pdata size: " + Twine(end - begin) + + " is not a multiple of " + Twine(sizeof(Entry))); + } parallelSort( MutableArrayRef((Entry *)begin, (Entry *)end), [](const Entry &a, const Entry &b) { return a.begin < b.begin; }); diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt index 53649032bd987..212328b784c5b 100644 --- a/lld/Common/CMakeLists.txt +++ b/lld/Common/CMakeLists.txt @@ -1,7 +1,3 @@ -if(NOT LLD_BUILT_STANDALONE) - set(tablegen_deps intrinsics_gen) -endif() - find_first_existing_vc_file("${LLVM_MAIN_SRC_DIR}" llvm_vc) find_first_existing_vc_file("${LLD_SOURCE_DIR}" lld_vc) @@ -57,5 +53,5 @@ add_lld_library(lldCommon ${LLVM_PTHREAD_LIB} DEPENDS - ${tablegen_deps} + intrinsics_gen ) diff --git a/lld/Common/Filesystem.cpp b/lld/Common/Filesystem.cpp index 206b892f0a69c..671b352a3f6bc 100644 --- a/lld/Common/Filesystem.cpp +++ b/lld/Common/Filesystem.cpp @@ -40,6 +40,9 @@ using namespace lld; // This function spawns a background thread to remove the file. // The calling thread returns almost immediately. void lld::unlinkAsync(StringRef path) { + if (!sys::fs::exists(path) || !sys::fs::is_regular_file(path)) + return; + // Removing a file is async on windows. #if defined(_WIN32) // On Windows co-operative programs can be expected to open LLD's @@ -71,8 +74,7 @@ void lld::unlinkAsync(StringRef path) { } sys::fs::remove(path); #else - if (parallel::strategy.ThreadsRequested == 1 || !sys::fs::exists(path) || - !sys::fs::is_regular_file(path)) + if (parallel::strategy.ThreadsRequested == 1) return; // We cannot just remove path from a different thread because we are now going diff --git a/lld/ELF/Arch/AVR.cpp b/lld/ELF/Arch/AVR.cpp index 9b733837dd5d0..4513a970b32d7 100644 --- a/lld/ELF/Arch/AVR.cpp +++ b/lld/ELF/Arch/AVR.cpp @@ -54,11 +54,131 @@ AVR::AVR() { noneRel = R_AVR_NONE; } RelExpr AVR::getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const { - return R_ABS; + switch (type) { + case R_AVR_7_PCREL: + case R_AVR_13_PCREL: + return R_PC; + default: + return R_ABS; + } +} + +static void writeLDI(uint8_t *loc, uint64_t val) { + write16le(loc, (read16le(loc) & 0xf0f0) | (val & 0xf0) << 4 | (val & 0x0f)); } void AVR::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { switch (rel.type) { + case R_AVR_8: + checkUInt(loc, val, 8, rel); + *loc = val; + break; + case R_AVR_16: + // Note: this relocation is often used between code and data space, which + // are 0x800000 apart in the output ELF file. The bitmask cuts off the high + // bit. + write16le(loc, val & 0xffff); + break; + case R_AVR_16_PM: + checkAlignment(loc, val, 2, rel); + checkUInt(loc, val >> 1, 16, rel); + write16le(loc, val >> 1); + break; + case R_AVR_32: + checkUInt(loc, val, 32, rel); + write32le(loc, val); + break; + + case R_AVR_LDI: + checkUInt(loc, val, 8, rel); + writeLDI(loc, val & 0xff); + break; + + case R_AVR_LO8_LDI_NEG: + writeLDI(loc, -val & 0xff); + break; + case R_AVR_LO8_LDI: + writeLDI(loc, val & 0xff); + break; + case R_AVR_HI8_LDI_NEG: + writeLDI(loc, (-val >> 8) & 0xff); + break; + case R_AVR_HI8_LDI: + writeLDI(loc, (val >> 8) & 0xff); + break; + case R_AVR_HH8_LDI_NEG: + writeLDI(loc, (-val >> 16) & 0xff); + break; + case R_AVR_HH8_LDI: + writeLDI(loc, (val >> 16) & 0xff); + break; + case R_AVR_MS8_LDI_NEG: + writeLDI(loc, (-val >> 24) & 0xff); + break; + case R_AVR_MS8_LDI: + writeLDI(loc, (val >> 24) & 0xff); + break; + + case R_AVR_LO8_LDI_PM: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (val >> 1) & 0xff); + break; + case R_AVR_HI8_LDI_PM: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (val >> 9) & 0xff); + break; + case R_AVR_HH8_LDI_PM: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (val >> 17) & 0xff); + break; + + case R_AVR_LO8_LDI_PM_NEG: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (-val >> 1) & 0xff); + break; + case R_AVR_HI8_LDI_PM_NEG: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (-val >> 9) & 0xff); + break; + case R_AVR_HH8_LDI_PM_NEG: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (-val >> 17) & 0xff); + break; + + case R_AVR_PORT5: + checkUInt(loc, val, 5, rel); + write16le(loc, (read16le(loc) & 0xff07) | (val << 3)); + break; + case R_AVR_PORT6: + checkUInt(loc, val, 6, rel); + write16le(loc, (read16le(loc) & 0xf9f0) | (val & 0x30) << 5 | (val & 0x0f)); + break; + + // Since every jump destination is word aligned we gain an extra bit + case R_AVR_7_PCREL: { + checkInt(loc, val, 7, rel); + checkAlignment(loc, val, 2, rel); + const uint16_t target = (val - 2) >> 1; + write16le(loc, (read16le(loc) & 0xfc07) | ((target & 0x7f) << 3)); + break; + } + case R_AVR_13_PCREL: { + checkAlignment(loc, val, 2, rel); + const uint16_t target = (val - 2) >> 1; + write16le(loc, (read16le(loc) & 0xf000) | (target & 0xfff)); + break; + } + + case R_AVR_6: + checkInt(loc, val, 6, rel); + write16le(loc, (read16le(loc) & 0xd3f8) | (val & 0x20) << 8 | + (val & 0x18) << 7 | (val & 0x07)); + break; + case R_AVR_6_ADIW: + checkInt(loc, val, 6, rel); + write16le(loc, (read16le(loc) & 0xff30) | (val & 0x30) << 2 | (val & 0x0F)); + break; + case R_AVR_CALL: { uint16_t hi = val >> 17; uint16_t lo = val >> 1; diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt index b89f4436288aa..f85d0fb9f55e3 100644 --- a/lld/ELF/CMakeLists.txt +++ b/lld/ELF/CMakeLists.txt @@ -2,10 +2,6 @@ set(LLVM_TARGET_DEFINITIONS Options.td) tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(ELFOptionsTableGen) -if(NOT LLD_BUILT_STANDALONE) - set(tablegen_deps intrinsics_gen) -endif() - add_lld_library(lldELF AArch64ErrataFix.cpp Arch/AArch64.cpp @@ -66,5 +62,5 @@ add_lld_library(lldELF DEPENDS ELFOptionsTableGen - ${tablegen_deps} + intrinsics_gen ) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 301f11359823b..4637a3b306daf 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1944,9 +1944,9 @@ template void LinkerDriver::link(opt::InputArgList &args) { handleUndefinedGlob(pat); // Mark -init and -fini symbols so that the LTO doesn't eliminate them. - if (Symbol *sym = symtab->find(config->init)) + if (Symbol *sym = dyn_cast_or_null(symtab->find(config->init))) sym->isUsedInRegularObj = true; - if (Symbol *sym = symtab->find(config->fini)) + if (Symbol *sym = dyn_cast_or_null(symtab->find(config->fini))) sym->isUsedInRegularObj = true; // If any of our inputs are bitcode files, the LTO code generator may create diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index e59bf626be501..c2f1830a981b8 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -632,6 +632,8 @@ void ObjFile::initializeSections(bool ignoreComdats) { break; case SHT_SYMTAB: case SHT_STRTAB: + case SHT_REL: + case SHT_RELA: case SHT_NULL: break; default: @@ -639,11 +641,21 @@ void ObjFile::initializeSections(bool ignoreComdats) { } } - // This block handles SHF_LINK_ORDER. + // We have a second loop. It is used to: + // 1) handle SHF_LINK_ORDER sections. + // 2) create SHT_REL[A] sections. In some cases the section header index of a + // relocation section may be smaller than that of the relocated section. In + // such cases, the relocation section would attempt to reference a target + // section that has not yet been created. For simplicity, delay creation of + // relocation sections until now. for (size_t i = 0, e = objSections.size(); i < e; ++i) { if (this->sections[i] == &InputSection::discarded) continue; const Elf_Shdr &sec = objSections[i]; + + if (sec.sh_type == SHT_REL || sec.sh_type == SHT_RELA) + this->sections[i] = createInputSection(sec); + if (!(sec.sh_flags & SHF_LINK_ORDER)) continue; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 2a3b0042162eb..4dfb387e4e622 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -204,7 +204,7 @@ static bool isArchString(StringRef s) { // entry (the one nearest to the front of the list.) // // The file can also have line comments that start with '#'. -void parseOrderFile(StringRef path) { +static void parseOrderFile(StringRef path) { Optional buffer = readFile(path); if (!buffer) { error("Could not read order file at " + path); diff --git a/lld/MinGW/CMakeLists.txt b/lld/MinGW/CMakeLists.txt index bb0fe4a3887d7..1dc04d73eca65 100644 --- a/lld/MinGW/CMakeLists.txt +++ b/lld/MinGW/CMakeLists.txt @@ -2,10 +2,6 @@ set(LLVM_TARGET_DEFINITIONS Options.td) tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(MinGWOptionsTableGen) -if(NOT LLD_BUILT_STANDALONE) - set(tablegen_deps intrinsics_gen) -endif() - add_lld_library(lldMinGW Driver.cpp @@ -19,5 +15,5 @@ add_lld_library(lldMinGW DEPENDS MinGWOptionsTableGen - ${tablegen_deps} + intrinsics_gen ) diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp index e7f51a36a39df..f33b5e19502c6 100644 --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -251,6 +251,10 @@ bool mingw::link(ArrayRef argsArr, bool canExitEarly, add("-reproduce:" + StringRef(a->getValue())); if (auto *a = args.getLastArg(OPT_thinlto_cache_dir)) add("-lldltocache:" + StringRef(a->getValue())); + if (auto *a = args.getLastArg(OPT_file_alignment)) + add("-filealign:" + StringRef(a->getValue())); + if (auto *a = args.getLastArg(OPT_section_alignment)) + add("-align:" + StringRef(a->getValue())); if (auto *a = args.getLastArg(OPT_o)) add("-out:" + StringRef(a->getValue())); diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td index 6410e4c1901c5..3281951dc89dd 100644 --- a/lld/MinGW/Options.td +++ b/lld/MinGW/Options.td @@ -34,6 +34,7 @@ def exclude_all_symbols: F<"exclude-all-symbols">, HelpText<"Don't automatically export any symbols">; def export_all_symbols: F<"export-all-symbols">, HelpText<"Export all symbols even if a def file or dllexport attributes are used">; +defm file_alignment: Eq<"file-alignment", "Set file alignment">; def gc_sections: F<"gc-sections">, HelpText<"Remove unused sections">; def help: F<"help">, HelpText<"Print option help">; def icf: J<"icf=">, HelpText<"Identical code folding">; @@ -64,6 +65,7 @@ def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"">, HelpText<"Path to file to write output">; defm out_implib: Eq<"out-implib", "Import library name">; defm output_def: Eq<"output-def", "Output def file">; +defm section_alignment: Eq<"section-alignment", "Set section alignment">; def shared: F<"shared">, HelpText<"Build a shared object">; defm subs: Eq<"subsystem", "Specify subsystem">; def stack: S<"stack">; diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index fe3de8306cd85..f50c3064f4744 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -1,19 +1,19 @@ ======================== -lld 11.0.0 Release Notes +lld 12.0.0 Release Notes ======================== .. contents:: :local: .. warning:: - These are in-progress notes for the upcoming LLVM 11.0.0 release. + These are in-progress notes for the upcoming LLVM 12.0.0 release. Release notes for previous releases can be found on `the Download Page `_. Introduction ============ -This document contains the release notes for the lld linker, release 11.0.0. +This document contains the release notes for the lld linker, release 12.0.0. Here we describe the status of lld, including major improvements from the previous release. All lld releases may be downloaded from the `LLVM releases web site `_. @@ -24,18 +24,12 @@ Non-comprehensive list of changes in this release ELF Improvements ---------------- -* New ``--time-trace`` option records a time trace file that can be viewed in - chrome://tracing. The file can be specified with ``--time-trace-file``. - Trace granularity can be specified with ``--time-trace-granularity``. - (`D71060 `_) * ... Breaking changes ---------------- -* One-dash form of some long option (``--thinlto-*``, ``--lto-*``, ``--shuffle-sections=``) - are no longer supported. -* ``--export-dynamic-symbol`` no longer implies ``-u``. +* ... COFF Improvements ----------------- diff --git a/lld/docs/conf.py b/lld/docs/conf.py index 7d4fc0c5ad75f..7867d9a7dddf3 100644 --- a/lld/docs/conf.py +++ b/lld/docs/conf.py @@ -48,9 +48,9 @@ # built documents. # # The short version. -version = '11' +version = '12' # The full version, including alpha/beta/rc tags. -release = '11' +release = '12' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/lld/lib/Core/CMakeLists.txt b/lld/lib/Core/CMakeLists.txt index 2d4d9ded08862..d5e507536b720 100644 --- a/lld/lib/Core/CMakeLists.txt +++ b/lld/lib/Core/CMakeLists.txt @@ -1,7 +1,3 @@ -if(NOT LLD_BUILT_STANDALONE) - set(tablegen_deps intrinsics_gen) -endif() - add_lld_library(lldCore DefinedAtom.cpp Error.cpp @@ -24,5 +20,5 @@ add_lld_library(lldCore ${LLVM_PTHREAD_LIB} DEPENDS - ${tablegen_deps} + intrinsics_gen ) diff --git a/lld/test/COFF/pdata-arm64-bad.yaml b/lld/test/COFF/pdata-arm64-bad.yaml new file mode 100644 index 0000000000000..d6b4967457952 --- /dev/null +++ b/lld/test/COFF/pdata-arm64-bad.yaml @@ -0,0 +1,89 @@ +# RUN: yaml2obj < %s > %t.obj +# RUN: not lld-link /out:%t.exe /entry:func1 /subsystem:console %t.obj 2>&1 | FileCheck %s + +# This file is like pdata-arm64.yaml, except that .pdata has been extended with +# 4 bytes. This can happen due to for example bad assembler input. Check that +# lld errors gracefully instead of crashing. + +# CHECK: unexpected .pdata size: 20 is not a multiple of 8 + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_ARM64 + Characteristics: [ ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: ff4300d1f37b00a9f303012a00000094e003132a00000094f37b40a9ff430091c0035fd6f353bea9fe0b00f9f303012af403022a00000094e003132a00000094e003142a00000094fe0b40f9f353c2a8c0035fd6c0035fd6 + Relocations: + - VirtualAddress: 12 + SymbolName: func3 + Type: IMAGE_REL_ARM64_BRANCH26 + - VirtualAddress: 20 + SymbolName: func3 + Type: IMAGE_REL_ARM64_BRANCH26 + - VirtualAddress: 52 + SymbolName: func3 + Type: IMAGE_REL_ARM64_BRANCH26 + - VirtualAddress: 60 + SymbolName: func3 + Type: IMAGE_REL_ARM64_BRANCH26 + - VirtualAddress: 68 + SymbolName: func3 + Type: IMAGE_REL_ARM64_BRANCH26 + - Name: .pdata + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: 0000000031002201000000002500a10000000000 + Relocations: + - VirtualAddress: 0 + SymbolName: func2 + Type: IMAGE_REL_ARM64_ADDR32NB + - VirtualAddress: 8 + SymbolName: func1 + Type: IMAGE_REL_ARM64_ADDR32NB +symbols: + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 57 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 1 + - Name: .pdata + Value: 0 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 20 + NumberOfRelocations: 2 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 2 + - Name: func1 + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: func2 + Value: 36 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: func3 + Value: 84 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/lld/test/ELF/avr-reloc.s b/lld/test/ELF/avr-reloc.s new file mode 100644 index 0000000000000..49f78044068b9 --- /dev/null +++ b/lld/test/ELF/avr-reloc.s @@ -0,0 +1,84 @@ +; REQUIRES: avr +; RUN: llvm-mc -filetype=obj -triple=avr -mcpu=atmega328p %s -o %t.o +; RUN: ld.lld %t.o --defsym=a=0x12345678 --defsym=b=30 -o %t +; RUN: llvm-objdump -d --print-imm-hex %t | FileCheck %s +; RUN: llvm-objdump -s %t | FileCheck --check-prefix=HEX %s + +.section .LDI,"ax",@progbits +; CHECK-LABEL: section .LDI: +; CHECK: ldi r20, 0x78 +; CHECK-NEXT: ldi r20, 0x56 +; CHECK-NEXT: ldi r20, 0x34 +; CHECK-NEXT: ldi r20, 0x12 +; CHECK-NEXT: ldi r20, 0x3c +; CHECK-NEXT: ldi r20, 0x2b +; CHECK-NEXT: ldi r20, 0x1a +; CHECK-NEXT: ldi r20, 0xff +ldi r20, lo8(a) ; R_AVR_LO8_LDI +ldi r20, hi8(a) ; R_AVR_HI8_LDI +ldi r20, hh8(a) ; R_AVR_HH8_LDI +ldi r20, hhi8(a) ; R_AVR_MS8_LDI + +ldi r20, pm_lo8(a) ; R_AVR_LO8_LDI_PM +ldi r20, pm_hi8(a) ; R_AVR_HI8_LDI_PM +ldi r20, pm_hh8(a) ; R_AVR_HH8_LDI_PM + +ldi r20, b+225 + +.section .LDI_NEG,"ax",@progbits +; CHECK-LABEL: section .LDI_NEG: +; CHECK: ldi r20, 0x88 +; CHECK-NEXT: ldi r20, 0xa9 +; CHECK-NEXT: ldi r20, 0xcb +; CHECK-NEXT: ldi r20, 0xed +; CHECK-NEXT: ldi r20, 0xc4 +; CHECK-NEXT: ldi r20, 0xd4 +; CHECK-NEXT: ldi r20, 0xe5 +ldi r20, lo8(-(a)) ; R_AVR_LO8_LDI_NEG +ldi r20, hi8(-(a)) ; R_AVR_HI8_LDI_NEG +ldi r20, hh8(-(a)) ; R_AVR_HH8_LDI_NEG +ldi r20, hhi8(-(a)) ; R_AVR_MS8_LDI_NEG + +ldi r20, pm_lo8(-(a)) ; R_AVR_LO8_LDI_PM_NEG +ldi r20, pm_hi8(-(a)) ; R_AVR_HI8_LDI_PM_NEG +ldi r20, pm_hh8(-(a)) ; R_AVR_HH8_LDI_PM_NEG + +;; The disassembler is not yet able to decode those opcodes +;; 9e 8e std Y+30, r9 +;; 9e 8c ldd r9, Y+30 +;; 4e 96 adiw r24, 0x1e +.section .SIX,"ax",@progbits +; HEX-LABEL: section .SIX: +; HEX-NEXT: 9e8e9e8c 4e96 +std Y+b, r9 ; R_AVR_6 +ldd r9, Y+b ; R_AVR_6 +adiw r24, b ; R_AVR_6_ADIW + +.section .PORT,"ax",@progbits +; CHECK-LABEL: section .PORT: +; CHECK: in r20, 0x1e +; CHECK-NEXT: sbic 0x1e, 0x1 +in r20, b ; R_AVR_PORT6 +sbic b, 1 ; R_AVR_PORT5 + +;; The disassembler is not yet able to decode those opcodes +;; 0f c0 rjmp .+30 +;; ee cf rjmp .-36 +;; 69 f0 breq .+26 +;; 61 f3 breq .-40 +.section .PCREL,"ax",@progbits +; HEX-LABEL: section .PCREL: +; HEX-NEXT: 0fc0eecf 69f061f3 +foo: +rjmp foo + 32 ; R_AVR_13_PCREL +rjmp foo - 32 ; R_AVR_13_PCREL +breq foo + 32 ; R_AVR_7_PCREL +breq foo - 32 ; R_AVR_7_PCREL + +.section .DATA,"ax",@progbits +; HEX-LABEL: section .DATA: +; HEX-NEXT: {{.*}} 1e1e000f 00785634 12 +.byte b ; R_AVR_8 +.short b ; R_AVR_16 +.short gs(b) ; R_AVR_16_PM +.long a ; R_AVR_32 diff --git a/lld/test/ELF/eh-frame-hdr-augmentation.s b/lld/test/ELF/eh-frame-hdr-augmentation.s index dbec54a1a8a22..9c9b862ae80d5 100644 --- a/lld/test/ELF/eh-frame-hdr-augmentation.s +++ b/lld/test/ELF/eh-frame-hdr-augmentation.s @@ -20,7 +20,8 @@ // CHECK-NEXT: DW_CFA_nop: // CHECK-NEXT: DW_CFA_nop: -// CHECK: 00000020 00000014 00000024 FDE cie=00000000 pc=00001014...00001014 +/// FIXME Handle relocation correctly +// CHECK: 00000020 00000014 00000024 FDE cie=00000000 pc=0000103c...0000103c // CHECK-NEXT: Format: DWARF32 // CHECK-NEXT: LSDA Address: 000000000000100b // CHECK-NEXT: DW_CFA_nop: diff --git a/lld/test/ELF/init-fini.s b/lld/test/ELF/init-fini.s index 40aa98e95cebe..a07d4e3122c4a 100644 --- a/lld/test/ELF/init-fini.s +++ b/lld/test/ELF/init-fini.s @@ -46,6 +46,14 @@ // NOENTRY-NOT: Name: _unknown // NOENTRY: ] +// Should not add entries for "_init" and "_fini" to the symbol table +// if the symbols are defined in non-fetched achive members. +// RUN: rm -f %t.a +// RUN: llvm-ar rcs %t.a %t +// RUN: ld.lld -shared -m elf_x86_64 -e _unknown %t.a -o %t.so +// RUN: llvm-nm %t.so | \ +// RUN: FileCheck %s --implicit-check-not=_init --implicit-check-not=_fini + .global _start,_init,_fini,_foo,_bar,_undef _start: _init = 0x11010 diff --git a/lld/test/ELF/invalid/invalid-e_shnum.test b/lld/test/ELF/invalid/invalid-e_shnum.test index 7c04c105d2920..4e622ac9bece1 100644 --- a/lld/test/ELF/invalid/invalid-e_shnum.test +++ b/lld/test/ELF/invalid/invalid-e_shnum.test @@ -10,6 +10,6 @@ FileHeader: Data: ELFDATA2LSB Type: ET_REL Machine: EM_X86_64 - SHOff: 0 - SHNum: 0x1 - SHStrNdx: 0 + EShOff: 0 + EShNum: 0x1 + EShStrNdx: 0 diff --git a/lld/test/ELF/invalid/reloc-section-reordered.test b/lld/test/ELF/invalid/reloc-section-reordered.test deleted file mode 100644 index 91f25f61b7e6f..0000000000000 --- a/lld/test/ELF/invalid/reloc-section-reordered.test +++ /dev/null @@ -1,33 +0,0 @@ -# REQUIRES: x86 - -# RUN: yaml2obj %s -o %t.o -# RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck %s -# CHECK: unsupported relocation reference - -## YAML below lists .rela.text before .text, we do not support it. - -!ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - OSABI: ELFOSABI_FREEBSD - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Type: SHT_REL - Name: .rela.text - Link: .symtab - Info: .text - AddressAlign: 0x04 - Relocations: - - Symbol: .text - Type: R_X86_64_NONE - - Type: SHT_PROGBITS - Name: .text - Flags: [ SHF_ALLOC, SHF_EXECINSTR ] - AddressAlign: 0x04 - Content: "FFFFFFFFFFFFFFFF" -Symbols: - - Name: .text - Type: STT_SECTION - Section: .text diff --git a/lld/test/ELF/ppc64-rel-so-local-calls.s b/lld/test/ELF/ppc64-rel-so-local-calls.s index 2bc89d554a022..3d2e0673c3a74 100644 --- a/lld/test/ELF/ppc64-rel-so-local-calls.s +++ b/lld/test/ELF/ppc64-rel-so-local-calls.s @@ -2,15 +2,11 @@ // RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o // RUN: ld.lld -shared %t.o -o %t.so -// RUN: llvm-readelf -dyn-relocations %t.so | FileCheck %s +// RUN: llvm-readelf -dyn-relocations %t.so | FileCheck %s -allow-empty --implicit-check-not={{.}} // RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o // RUN: ld.lld -shared %t.o -o %t.so -// RUN: llvm-readelf -dyn-relocations %t.so | FileCheck %s - - -// CHECK-NOT: foo -// CHECK-NOT: bar +// RUN: llvm-readelf -dyn-relocations %t.so | FileCheck %s -allow-empty --implicit-check-not={{.}} .text .abiversion 2 diff --git a/lld/test/ELF/reloc-sec-before-relocated.test b/lld/test/ELF/reloc-sec-before-relocated.test new file mode 100644 index 0000000000000..a56231294a0ca --- /dev/null +++ b/lld/test/ELF/reloc-sec-before-relocated.test @@ -0,0 +1,38 @@ +## If the section header index of a SHT_REL[A] section is smaller than the +## section header index of the relocated section, we should handle it properly. +## Normally it is not what compilers would emit, but some custom tools might +## want to use this feature, which is not restricted by ELF gABI. +## GNU ld supports this as well. + +# RUN: yaml2obj %s -DTYPE=SHT_RELA -o %t1.o +# RUN: ld.lld -shared %t1.o -o %t1 +# RUN: llvm-readelf --relocs %t1 | FileCheck %s + +# RUN: yaml2obj %s -DTYPE=SHT_REL -o %t2.o +# RUN: ld.lld -shared %t2.o -o %t2 +# RUN: llvm-readelf --relocs %t2 | FileCheck %s + +## Check we handle the relocation properly. +# CHECK: Relocation section '.rela.dyn' at offset 0x238 contains 1 entries: +# CHECK-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# CHECK-NEXT: 00000000000022f0 0000000100000001 R_X86_64_64 0000000000000000 foo + 0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .relx.data + Type: [[TYPE]] + Info: .data + Relocations: + - Symbol: foo + Type: R_X86_64_64 + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_WRITE ] +Symbols: + - Name: foo + Binding: STB_GLOBAL diff --git a/lld/test/MinGW/driver.test b/lld/test/MinGW/driver.test index 803bf34f1ec52..385822c7e1f70 100644 --- a/lld/test/MinGW/driver.test +++ b/lld/test/MinGW/driver.test @@ -244,3 +244,15 @@ DISABLE_RUNTIME_PSEUDO_RELOC: -runtime-pseudo-reloc:no RUN: ld.lld -### foo.o -m i386pe --thinlto-cache-dir=_foo | FileCheck -check-prefix=THINLTO_CACHEDIR %s THINLTO_CACHEDIR: -lldltocache:_foo + +RUN: ld.lld -### -m i386pep foo.o --file-alignment 0x1000 | FileCheck -check-prefix FILEALIGN %s +RUN: ld.lld -### -m i386pep foo.o -file-alignment 0x1000 | FileCheck -check-prefix FILEALIGN %s +RUN: ld.lld -### -m i386pep foo.o --file-alignment=0x1000 | FileCheck -check-prefix FILEALIGN %s +RUN: ld.lld -### -m i386pep foo.o -file-alignment=0x1000 | FileCheck -check-prefix FILEALIGN %s +FILEALIGN: -filealign:0x1000 + +RUN: ld.lld -### -m i386pep foo.o --section-alignment 0x2000 | FileCheck -check-prefix ALIGN %s +RUN: ld.lld -### -m i386pep foo.o -section-alignment 0x2000 | FileCheck -check-prefix ALIGN %s +RUN: ld.lld -### -m i386pep foo.o --section-alignment=0x2000 | FileCheck -check-prefix ALIGN %s +RUN: ld.lld -### -m i386pep foo.o -section-alignment=0x2000 | FileCheck -check-prefix ALIGN %s +ALIGN: -align:0x2000 diff --git a/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp index aad5f8afcfdc3..fbf18a8d9e007 100644 --- a/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp +++ b/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp @@ -75,7 +75,7 @@ TEST(BinaryReaderTest, empty_obj_x86_64) { fromBinary(fileBytes, sizeof(fileBytes), "x86_64"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); EXPECT_TRUE(f->undefinedSymbols.empty()); @@ -106,7 +106,7 @@ TEST(BinaryReaderTest, empty_obj_x86) { fromBinary(fileBytes, sizeof(fileBytes), "i386"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); EXPECT_TRUE(f->undefinedSymbols.empty()); @@ -137,7 +137,7 @@ TEST(BinaryReaderTest, empty_obj_ppc) { fromBinary(fileBytes, sizeof(fileBytes), "ppc"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); EXPECT_TRUE(f->undefinedSymbols.empty()); @@ -168,7 +168,7 @@ TEST(BinaryReaderTest, empty_obj_armv7) { fromBinary(fileBytes, sizeof(fileBytes), "armv7"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); EXPECT_TRUE(f->undefinedSymbols.empty()); @@ -182,7 +182,7 @@ TEST(BinaryReaderTest, empty_obj_x86_64_arm7) { fromBinary(fileBytes, sizeof(fileBytes), "x86_64"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); EXPECT_TRUE(f->undefinedSymbols.empty()); @@ -191,7 +191,7 @@ TEST(BinaryReaderTest, empty_obj_x86_64_arm7) { fromBinary(fileBytes, sizeof(fileBytes), "armv7"); EXPECT_EQ(f2->arch, lld::MachOLinkingContext::arch_armv7); EXPECT_EQ((int)(f2->fileType), MH_OBJECT); - EXPECT_EQ((int)(f2->flags), MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f2->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f2->localSymbols.empty()); EXPECT_TRUE(f2->globalSymbols.empty()); EXPECT_TRUE(f2->undefinedSymbols.empty()); @@ -268,7 +268,7 @@ TEST(BinaryReaderTest, hello_obj_x86_64) { EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& text = f->sections[0]; EXPECT_TRUE(text.segmentName.equals("__TEXT")); @@ -393,7 +393,7 @@ TEST(BinaryReaderTest, hello_obj_x86) { EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& text = f->sections[0]; EXPECT_TRUE(text.segmentName.equals("__TEXT")); @@ -525,7 +525,7 @@ TEST(BinaryReaderTest, hello_obj_armv7) { EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& text = f->sections[0]; EXPECT_TRUE(text.segmentName.equals("__TEXT")); @@ -669,7 +669,7 @@ TEST(BinaryReaderTest, hello_obj_ppc) { EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& text = f->sections[0]; EXPECT_TRUE(text.segmentName.equals("__TEXT")); diff --git a/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp index 6ceb197b4b84a..dbfe3a051811a 100644 --- a/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp +++ b/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp @@ -50,7 +50,7 @@ TEST(ObjectFileYAML, empty_ppc) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)(int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->sections.empty()); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); @@ -66,7 +66,7 @@ TEST(ObjectFileYAML, empty_x86_64) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)(int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->sections.empty()); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); @@ -82,7 +82,7 @@ TEST(ObjectFileYAML, empty_x86) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->sections.empty()); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); @@ -98,7 +98,7 @@ TEST(ObjectFileYAML, empty_armv6) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv6); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->sections.empty()); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); @@ -114,7 +114,7 @@ TEST(ObjectFileYAML, empty_armv7) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->sections.empty()); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); @@ -130,7 +130,7 @@ TEST(ObjectFileYAML, empty_armv7s) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7s); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->sections.empty()); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); @@ -143,7 +143,7 @@ TEST(ObjectFileYAML, roundTrip) { NormalizedFile f; f.arch = lld::MachOLinkingContext::arch_x86_64; f.fileType = llvm::MachO::MH_OBJECT; - f.flags = llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS; + f.flags = (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS; f.os = lld::MachOLinkingContext::OS::macOSX; toYAML(f, intermediate); } @@ -151,7 +151,7 @@ TEST(ObjectFileYAML, roundTrip) { std::unique_ptr f2 = fromYAML(intermediate); EXPECT_EQ(f2->arch, lld::MachOLinkingContext::arch_x86_64); EXPECT_EQ((int)(f2->fileType), llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f2->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f2->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f2->sections.empty()); EXPECT_TRUE(f2->localSymbols.empty()); EXPECT_TRUE(f2->globalSymbols.empty()); @@ -275,7 +275,7 @@ TEST(ObjectFileYAML, hello_x86_64) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& sect1 = f->sections[0]; @@ -405,7 +405,7 @@ TEST(ObjectFileYAML, hello_x86) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& sect1 = f->sections[0]; @@ -533,7 +533,7 @@ TEST(ObjectFileYAML, hello_armv6) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv6); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& sect1 = f->sections[0]; @@ -673,7 +673,7 @@ TEST(ObjectFileYAML, hello_armv7) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& sect1 = f->sections[0]; diff --git a/lld/wasm/CMakeLists.txt b/lld/wasm/CMakeLists.txt index d2ba862c1e4a0..cd46f0a826ac9 100644 --- a/lld/wasm/CMakeLists.txt +++ b/lld/wasm/CMakeLists.txt @@ -2,10 +2,6 @@ set(LLVM_TARGET_DEFINITIONS Options.td) tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(WasmOptionsTableGen) -if(NOT LLD_BUILT_STANDALONE) - set(tablegen_deps intrinsics_gen) -endif() - add_lld_library(lldWasm Driver.cpp InputChunks.cpp @@ -37,5 +33,5 @@ add_lld_library(lldWasm DEPENDS WasmOptionsTableGen - ${tablegen_deps} + intrinsics_gen ) diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index cae2852baf86b..e8d018f09bf6e 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -37,7 +37,7 @@ struct Configuration { bool importMemory; bool sharedMemory; bool importTable; - bool is64; + llvm::Optional is64; bool mergeDataSegments; bool pie; bool printGcSections; diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index d0805bf3b3036..7307aaa3f7be1 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -380,7 +380,6 @@ static void readConfigs(opt::InputArgList &args) { args.hasFlag(OPT_export_dynamic, OPT_no_export_dynamic, config->shared); // Parse wasm32/64. - config->is64 = false; if (auto *arg = args.getLastArg(OPT_m)) { StringRef s = arg->getValue(); if (s == "wasm32") @@ -528,7 +527,7 @@ createUndefinedGlobal(StringRef name, llvm::wasm::WasmGlobalType *type) { static GlobalSymbol *createGlobalVariable(StringRef name, bool isMutable, int value) { llvm::wasm::WasmGlobal wasmGlobal; - if (config->is64) { + if (config->is64.getValueOr(false)) { wasmGlobal.Type = {WASM_TYPE_I64, isMutable}; wasmGlobal.InitExpr.Value.Int64 = value; wasmGlobal.InitExpr.Opcode = WASM_OPCODE_I64_CONST; @@ -570,16 +569,18 @@ static void createSyntheticSymbols() { if (config->isPic) { - WasmSym::stackPointer = createUndefinedGlobal( - "__stack_pointer", - config->is64 ? &mutableGlobalTypeI64 : &mutableGlobalTypeI32); + WasmSym::stackPointer = + createUndefinedGlobal("__stack_pointer", config->is64.getValueOr(false) + ? &mutableGlobalTypeI64 + : &mutableGlobalTypeI32); // For PIC code, we import two global variables (__memory_base and // __table_base) from the environment and use these as the offset at // which to load our static data and function table. // See: // https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md WasmSym::memoryBase = createUndefinedGlobal( - "__memory_base", config->is64 ? &globalTypeI64 : &globalTypeI32); + "__memory_base", + config->is64.getValueOr(false) ? &globalTypeI64 : &globalTypeI32); WasmSym::tableBase = createUndefinedGlobal("__table_base", &globalTypeI32); WasmSym::memoryBase->markLive(); WasmSym::tableBase->markLive(); @@ -604,9 +605,9 @@ static void createSyntheticSymbols() { WasmSym::tlsAlign = createGlobalVariable("__tls_align", false, 1); WasmSym::initTLS = symtab->addSyntheticFunction( "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN, - make(config->is64 ? i64ArgSignature - : i32ArgSignature, - "__wasm_init_tls")); + make( + config->is64.getValueOr(false) ? i64ArgSignature : i32ArgSignature, + "__wasm_init_tls")); } } diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp index 7f06e61a4b5ae..c7261cf3da2c2 100644 --- a/lld/wasm/InputChunks.cpp +++ b/lld/wasm/InputChunks.cpp @@ -72,6 +72,7 @@ void InputChunk::verifyRelocTargets() const { existingValue = decodeULEB128(loc, &bytesRead); break; case R_WASM_TABLE_INDEX_SLEB: + case R_WASM_TABLE_INDEX_SLEB64: case R_WASM_TABLE_INDEX_REL_SLEB: case R_WASM_MEMORY_ADDR_SLEB: case R_WASM_MEMORY_ADDR_SLEB64: @@ -86,6 +87,7 @@ void InputChunk::verifyRelocTargets() const { case R_WASM_GLOBAL_INDEX_I32: existingValue = read32le(loc); break; + case R_WASM_TABLE_INDEX_I64: case R_WASM_MEMORY_ADDR_I64: existingValue = read64le(loc); break; @@ -151,6 +153,7 @@ void InputChunk::writeTo(uint8_t *buf) const { case R_WASM_MEMORY_ADDR_REL_SLEB: encodeSLEB128(static_cast(value), loc, 5); break; + case R_WASM_TABLE_INDEX_SLEB64: case R_WASM_MEMORY_ADDR_SLEB64: case R_WASM_MEMORY_ADDR_REL_SLEB64: encodeSLEB128(static_cast(value), loc, 10); @@ -162,6 +165,7 @@ void InputChunk::writeTo(uint8_t *buf) const { case R_WASM_GLOBAL_INDEX_I32: write32le(loc, value); break; + case R_WASM_TABLE_INDEX_I64: case R_WASM_MEMORY_ADDR_I64: write64le(loc, value); break; @@ -219,6 +223,7 @@ static unsigned writeCompressedReloc(uint8_t *buf, const WasmRelocation &rel, case R_WASM_MEMORY_ADDR_LEB64: return encodeULEB128(value, buf); case R_WASM_TABLE_INDEX_SLEB: + case R_WASM_TABLE_INDEX_SLEB64: case R_WASM_MEMORY_ADDR_SLEB: case R_WASM_MEMORY_ADDR_SLEB64: return encodeSLEB128(static_cast(value), buf); @@ -237,6 +242,7 @@ static unsigned getRelocWidthPadded(const WasmRelocation &rel) { case R_WASM_TABLE_INDEX_SLEB: case R_WASM_MEMORY_ADDR_SLEB: return 5; + case R_WASM_TABLE_INDEX_SLEB64: case R_WASM_MEMORY_ADDR_LEB64: case R_WASM_MEMORY_ADDR_SLEB64: return 10; @@ -335,10 +341,12 @@ void InputSegment::generateRelocationCode(raw_ostream &os) const { LLVM_DEBUG(dbgs() << "generating runtime relocations: " << getName() << " count=" << relocations.size() << "\n"); - unsigned opcode_ptr_const = - config->is64 ? WASM_OPCODE_I64_CONST : WASM_OPCODE_I32_CONST; - unsigned opcode_ptr_add = - config->is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD; + unsigned opcode_ptr_const = config->is64.getValueOr(false) + ? WASM_OPCODE_I64_CONST + : WASM_OPCODE_I32_CONST; + unsigned opcode_ptr_add = config->is64.getValueOr(false) + ? WASM_OPCODE_I64_ADD + : WASM_OPCODE_I32_ADD; // TODO(sbc): Encode the relocations in the data section and write a loop // here to apply them. @@ -380,7 +388,8 @@ void InputSegment::generateRelocationCode(raw_ostream &os) const { } } else { const GlobalSymbol* baseSymbol = WasmSym::memoryBase; - if (rel.Type == R_WASM_TABLE_INDEX_I32) + if (rel.Type == R_WASM_TABLE_INDEX_I32 || + rel.Type == R_WASM_TABLE_INDEX_I64) baseSymbol = WasmSym::tableBase; writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); writeUleb128(os, baseSymbol->getGlobalIndex(), "base"); diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp index 93d390a5457a9..fbe6888355eae 100644 --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -126,7 +126,9 @@ uint64_t ObjFile::calcNewAddend(const WasmRelocation &reloc) const { uint64_t ObjFile::calcExpectedValue(const WasmRelocation &reloc) const { switch (reloc.Type) { case R_WASM_TABLE_INDEX_I32: - case R_WASM_TABLE_INDEX_SLEB: { + case R_WASM_TABLE_INDEX_I64: + case R_WASM_TABLE_INDEX_SLEB: + case R_WASM_TABLE_INDEX_SLEB64: { const WasmSymbol &sym = wasmObj->syms()[reloc.Index]; return tableEntries[sym.Info.ElementIndex]; } @@ -195,7 +197,9 @@ uint64_t ObjFile::calcNewValue(const WasmRelocation &reloc) const { switch (reloc.Type) { case R_WASM_TABLE_INDEX_I32: + case R_WASM_TABLE_INDEX_I64: case R_WASM_TABLE_INDEX_SLEB: + case R_WASM_TABLE_INDEX_SLEB64: case R_WASM_TABLE_INDEX_REL_SLEB: { if (!getFunctionSymbol(reloc.Index)->hasTableIndex()) return 0; @@ -576,10 +580,16 @@ void BitcodeFile::parse() { obj = check(lto::InputFile::create(MemoryBufferRef( mb.getBuffer(), saver.save(archiveName + mb.getBufferIdentifier())))); Triple t(obj->getTargetTriple()); - if (t.getArch() != Triple::wasm32) { - error(toString(this) + ": machine type must be wasm32"); + if (!t.isWasm()) { + error(toString(this) + ": machine type must be wasm32 or wasm64"); + return; + } + bool is64 = t.getArch() == Triple::wasm64; + if (config->is64.hasValue() && *config->is64 != is64) { + error(toString(this) + ": machine type for all bitcode files must match"); return; } + config->is64 = is64; std::vector keptComdats; for (StringRef s : obj->getComdatTable()) keptComdats.push_back(symtab->addComdat(s)); diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp index 6906f86f7150a..2764c88f492cf 100644 --- a/lld/wasm/MarkLive.cpp +++ b/lld/wasm/MarkLive.cpp @@ -122,7 +122,9 @@ void MarkLive::mark() { // functions used for weak-undefined symbols have this behaviour (compare // equal to null pointer, only reachable via direct call). if (reloc.Type == R_WASM_TABLE_INDEX_SLEB || - reloc.Type == R_WASM_TABLE_INDEX_I32) { + reloc.Type == R_WASM_TABLE_INDEX_SLEB64 || + reloc.Type == R_WASM_TABLE_INDEX_I32 || + reloc.Type == R_WASM_TABLE_INDEX_I64) { auto *funcSym = cast(sym); if (funcSym->hasTableIndex() && funcSym->getTableIndex() == 0) continue; diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp index 4cc4fff91cd90..2559e0f869cce 100644 --- a/lld/wasm/Relocations.cpp +++ b/lld/wasm/Relocations.cpp @@ -70,7 +70,9 @@ void scanRelocations(InputChunk *chunk) { switch (reloc.Type) { case R_WASM_TABLE_INDEX_I32: + case R_WASM_TABLE_INDEX_I64: case R_WASM_TABLE_INDEX_SLEB: + case R_WASM_TABLE_INDEX_SLEB64: case R_WASM_TABLE_INDEX_REL_SLEB: if (requiresGOTAccess(sym)) break; @@ -86,6 +88,7 @@ void scanRelocations(InputChunk *chunk) { if (config->isPic) { switch (reloc.Type) { case R_WASM_TABLE_INDEX_SLEB: + case R_WASM_TABLE_INDEX_SLEB64: case R_WASM_MEMORY_ADDR_SLEB: case R_WASM_MEMORY_ADDR_LEB: case R_WASM_MEMORY_ADDR_SLEB64: @@ -97,6 +100,7 @@ void scanRelocations(InputChunk *chunk) { "; recompile with -fPIC"); break; case R_WASM_TABLE_INDEX_I32: + case R_WASM_TABLE_INDEX_I64: case R_WASM_MEMORY_ADDR_I32: case R_WASM_MEMORY_ADDR_I64: // These relocation types are only present in the data section and diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index 70d6a10200c6e..753482fda4109 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -139,7 +139,7 @@ void ImportSection::writeBody() { } if (config->sharedMemory) import.Memory.Flags |= WASM_LIMITS_FLAG_IS_SHARED; - if (config->is64) + if (config->is64.getValueOr(false)) import.Memory.Flags |= WASM_LIMITS_FLAG_IS_64; writeImport(os, import); } @@ -236,7 +236,7 @@ void MemorySection::writeBody() { flags |= WASM_LIMITS_FLAG_HAS_MAX; if (config->sharedMemory) flags |= WASM_LIMITS_FLAG_IS_SHARED; - if (config->is64) + if (config->is64.getValueOr(false)) flags |= WASM_LIMITS_FLAG_IS_64; writeUleb128(os, flags, "memory limits flags"); writeUleb128(os, numMemoryPages, "initial pages"); diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 1401dc50931b3..36b56a408f1dd 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -304,7 +304,8 @@ void Writer::layoutMemory() { if (WasmSym::heapBase) WasmSym::heapBase->setVirtualAddress(memoryPtr); - uint64_t maxMemorySetting = 1ULL << (config->is64 ? 48 : 32); + uint64_t maxMemorySetting = 1ULL + << (config->is64.getValueOr(false) ? 48 : 32); if (config->initialMemory != 0) { if (config->initialMemory != alignTo(config->initialMemory, WasmPageSize)) diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt index bf748020ea407..b1c0597cf3b3f 100644 --- a/lldb/CMakeLists.txt +++ b/lldb/CMakeLists.txt @@ -64,7 +64,7 @@ endif () # some of these generated headers. This approach is copied from Clang's main # CMakeLists.txt, so it should kept in sync the code in Clang which was added # in llvm-svn 308844. -if(LLVM_ENABLE_MODULES AND NOT LLDB_BUILT_STANDALONE) +if(LLVM_ENABLE_MODULES) list(APPEND LLVM_COMMON_DEPENDS intrinsics_gen) endif() diff --git a/lldb/examples/darwin/heap_find/heap.py b/lldb/examples/darwin/heap_find/heap.py index a8bc377ffe4cc..8fb2a8c95927e 100644 --- a/lldb/examples/darwin/heap_find/heap.py +++ b/lldb/examples/darwin/heap_find/heap.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python #---------------------------------------------------------------------- # This module is designed to live inside the "lldb" python package diff --git a/lldb/examples/python/armv7_cortex_m_target_defintion.py b/lldb/examples/python/armv7_cortex_m_target_defintion.py index 342de89c462fc..e8f39ccb09e8b 100755 --- a/lldb/examples/python/armv7_cortex_m_target_defintion.py +++ b/lldb/examples/python/armv7_cortex_m_target_defintion.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python #===-- armv7_cortex_m_target_definition.py.py ------------------*- C++ -*-===// # # The LLVM Compiler Infrastructure diff --git a/lldb/examples/python/bsd.py b/lldb/examples/python/bsd.py index c66226e3710c9..fdf5455fe69e3 100755 --- a/lldb/examples/python/bsd.py +++ b/lldb/examples/python/bsd.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python from __future__ import print_function import cmd diff --git a/lldb/examples/python/cmdtemplate.py b/lldb/examples/python/cmdtemplate.py index 97af943e6de2e..aa99e4c03beb8 100644 --- a/lldb/examples/python/cmdtemplate.py +++ b/lldb/examples/python/cmdtemplate.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # --------------------------------------------------------------------- # Be sure to add the python path that points to the LLDB shared library. diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py index 1c1602b0131ea..c9494d708d22f 100755 --- a/lldb/examples/python/crashlog.py +++ b/lldb/examples/python/crashlog.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python #---------------------------------------------------------------------- # Be sure to add the python path that points to the LLDB shared library. diff --git a/lldb/examples/python/delta.py b/lldb/examples/python/delta.py index 1a1f060c5e5e4..0176fb0b3345f 100755 --- a/lldb/examples/python/delta.py +++ b/lldb/examples/python/delta.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python #---------------------------------------------------------------------- # This module will enable GDB remote packet logging when the diff --git a/lldb/examples/python/disasm-stress-test.py b/lldb/examples/python/disasm-stress-test.py index 5d0ce96fbd685..241a73acd4ea9 100755 --- a/lldb/examples/python/disasm-stress-test.py +++ b/lldb/examples/python/disasm-stress-test.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python import argparse import datetime diff --git a/lldb/examples/python/disasm.py b/lldb/examples/python/disasm.py index 819a0522388e6..20b441835f1a4 100755 --- a/lldb/examples/python/disasm.py +++ b/lldb/examples/python/disasm.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python #---------------------------------------------------------------------- # Be sure to add the python path that points to the LLDB shared library. diff --git a/lldb/examples/python/file_extract.py b/lldb/examples/python/file_extract.py index decbba0b2997c..7278ce54a8a74 100755 --- a/lldb/examples/python/file_extract.py +++ b/lldb/examples/python/file_extract.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python +#!/usr/bin/env python import string import struct diff --git a/lldb/examples/python/gdbremote.py b/lldb/examples/python/gdbremote.py index 52601c09d3bee..804977259de77 100755 --- a/lldb/examples/python/gdbremote.py +++ b/lldb/examples/python/gdbremote.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python #---------------------------------------------------------------------- # This module will enable GDB remote packet logging when the diff --git a/lldb/examples/python/globals.py b/lldb/examples/python/globals.py index 3e77344164223..96645afef00fb 100755 --- a/lldb/examples/python/globals.py +++ b/lldb/examples/python/globals.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python #---------------------------------------------------------------------- # For the shells csh, tcsh: diff --git a/lldb/examples/python/lldb_module_utils.py b/lldb/examples/python/lldb_module_utils.py index 2b2fea9d4f15a..c0ac5751ce285 100644 --- a/lldb/examples/python/lldb_module_utils.py +++ b/lldb/examples/python/lldb_module_utils.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python from __future__ import print_function import lldb diff --git a/lldb/examples/python/lldbtk.py b/lldb/examples/python/lldbtk.py index 3734b14f95b24..a6a420ba1bd1f 100644 --- a/lldb/examples/python/lldbtk.py +++ b/lldb/examples/python/lldbtk.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python from __future__ import print_function import lldb diff --git a/lldb/examples/python/mach_o.py b/lldb/examples/python/mach_o.py index 1780bc3e32094..03ab73b3be9de 100755 --- a/lldb/examples/python/mach_o.py +++ b/lldb/examples/python/mach_o.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python from __future__ import print_function import cmd diff --git a/lldb/examples/python/memory.py b/lldb/examples/python/memory.py index 9f8f7e384c42c..26703462c2cde 100755 --- a/lldb/examples/python/memory.py +++ b/lldb/examples/python/memory.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python #---------------------------------------------------------------------- # Be sure to add the python path that points to the LLDB shared library. diff --git a/lldb/examples/python/operating_system.py b/lldb/examples/python/operating_system.py index bfa13f0568ea9..f4a5d385320e0 100644 --- a/lldb/examples/python/operating_system.py +++ b/lldb/examples/python/operating_system.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python import lldb import struct diff --git a/lldb/examples/python/performance.py b/lldb/examples/python/performance.py index aec6b307f8767..f90857808fc0c 100755 --- a/lldb/examples/python/performance.py +++ b/lldb/examples/python/performance.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python #---------------------------------------------------------------------- # Be sure to add the python path that points to the LLDB shared library. diff --git a/lldb/examples/python/process_events.py b/lldb/examples/python/process_events.py index 6039ebf00200b..3a1391c4476f2 100755 --- a/lldb/examples/python/process_events.py +++ b/lldb/examples/python/process_events.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python #---------------------------------------------------------------------- # Be sure to add the python path that points to the LLDB shared library. diff --git a/lldb/examples/python/sbvalue.py b/lldb/examples/python/sbvalue.py index 6e512998da2f3..cc7188a8ea08d 100755 --- a/lldb/examples/python/sbvalue.py +++ b/lldb/examples/python/sbvalue.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python import lldb diff --git a/lldb/examples/python/shadow.py b/lldb/examples/python/shadow.py index b14467c52c9a5..73534dce53527 100644 --- a/lldb/examples/python/shadow.py +++ b/lldb/examples/python/shadow.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python from __future__ import print_function import lldb diff --git a/lldb/examples/python/sources.py b/lldb/examples/python/sources.py index 9684f7f6e7862..38b3926768b24 100644 --- a/lldb/examples/python/sources.py +++ b/lldb/examples/python/sources.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python from __future__ import print_function import lldb diff --git a/lldb/examples/python/stacks.py b/lldb/examples/python/stacks.py index a676b82d097d5..41729ec67443f 100755 --- a/lldb/examples/python/stacks.py +++ b/lldb/examples/python/stacks.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python from __future__ import print_function import lldb import optparse diff --git a/lldb/examples/python/symbolication.py b/lldb/examples/python/symbolication.py index a6daa802cda20..7b29489bd0967 100755 --- a/lldb/examples/python/symbolication.py +++ b/lldb/examples/python/symbolication.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python #---------------------------------------------------------------------- # Be sure to add the python path that points to the LLDB shared library. diff --git a/lldb/examples/python/types.py b/lldb/examples/python/types.py index a401e373f4814..513a03b2600ad 100755 --- a/lldb/examples/python/types.py +++ b/lldb/examples/python/types.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python #---------------------------------------------------------------------- # Be sure to add the python path that points to the LLDB shared library. diff --git a/lldb/examples/python/x86_64_linux_target_definition.py b/lldb/examples/python/x86_64_linux_target_definition.py index 13bde540f7e2f..a39b9bed134f8 100644 --- a/lldb/examples/python/x86_64_linux_target_definition.py +++ b/lldb/examples/python/x86_64_linux_target_definition.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python #===-- x86_64_linux_target_definition.py -----------------------------*- C++ -*-===// # # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. diff --git a/lldb/examples/python/x86_64_qemu_target_definition.py b/lldb/examples/python/x86_64_qemu_target_definition.py index aa081c1611717..f0bed692baacd 100644 --- a/lldb/examples/python/x86_64_qemu_target_definition.py +++ b/lldb/examples/python/x86_64_qemu_target_definition.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python #===-- x86_64_qemu_target_definition.py -----------------------------*- C++ -*-===// # # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. diff --git a/lldb/examples/python/x86_64_target_definition.py b/lldb/examples/python/x86_64_target_definition.py index 3f7f60dec9b0a..533cafca4007f 100644 --- a/lldb/examples/python/x86_64_target_definition.py +++ b/lldb/examples/python/x86_64_target_definition.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python #===-- x86_64_target_definition.py -----------------------------*- C++ -*-===// # # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. diff --git a/lldb/include/lldb/Core/Module.h b/lldb/include/lldb/Core/Module.h index 3fae2d0cd04ab..8bd70ab16b5ab 100644 --- a/lldb/include/lldb/Core/Module.h +++ b/lldb/include/lldb/Core/Module.h @@ -958,6 +958,12 @@ class Module : public std::enable_shared_from_this, ///by \a m_file. uint64_t m_object_offset; llvm::sys::TimePoint<> m_object_mod_time; + + /// DataBuffer containing the module image, if it was provided at + /// construction time. Otherwise the data will be retrieved by mapping + /// one of the FileSpec members above. + lldb::DataBufferSP m_data_sp; + lldb::ObjectFileSP m_objfile_sp; ///< A shared pointer to the object file ///parser for this module as it may or may ///not be shared with the SymbolFile diff --git a/lldb/include/lldb/Core/ModuleSpec.h b/lldb/include/lldb/Core/ModuleSpec.h index 01398d443edc9..9dd398a05291f 100644 --- a/lldb/include/lldb/Core/ModuleSpec.h +++ b/lldb/include/lldb/Core/ModuleSpec.h @@ -30,11 +30,19 @@ class ModuleSpec { m_object_name(), m_object_offset(0), m_object_size(0), m_source_mappings() {} - ModuleSpec(const FileSpec &file_spec, const UUID &uuid = UUID()) + /// If the \param data argument is passed, its contents will be used + /// as the module contents instead of trying to read them from + /// \param file_spec. + ModuleSpec(const FileSpec &file_spec, const UUID &uuid = UUID(), + lldb::DataBufferSP data = lldb::DataBufferSP()) : m_file(file_spec), m_platform_file(), m_symbol_file(), m_arch(), - m_uuid(uuid), m_object_name(), m_object_offset(0), - m_object_size(FileSystem::Instance().GetByteSize(file_spec)), - m_source_mappings() {} + m_uuid(uuid), m_object_name(), m_object_offset(0), m_source_mappings(), + m_data(data) { + if (data) + m_object_size = data->GetByteSize(); + else if (m_file) + m_object_size = FileSystem::Instance().GetByteSize(file_spec); + } ModuleSpec(const FileSpec &file_spec, const ArchSpec &arch) : m_file(file_spec), m_platform_file(), m_symbol_file(), m_arch(arch), @@ -42,30 +50,6 @@ class ModuleSpec { m_object_size(FileSystem::Instance().GetByteSize(file_spec)), m_source_mappings() {} - ModuleSpec(const ModuleSpec &rhs) - : m_file(rhs.m_file), m_platform_file(rhs.m_platform_file), - m_symbol_file(rhs.m_symbol_file), m_arch(rhs.m_arch), - m_uuid(rhs.m_uuid), m_object_name(rhs.m_object_name), - m_object_offset(rhs.m_object_offset), m_object_size(rhs.m_object_size), - m_object_mod_time(rhs.m_object_mod_time), - m_source_mappings(rhs.m_source_mappings) {} - - ModuleSpec &operator=(const ModuleSpec &rhs) { - if (this != &rhs) { - m_file = rhs.m_file; - m_platform_file = rhs.m_platform_file; - m_symbol_file = rhs.m_symbol_file; - m_arch = rhs.m_arch; - m_uuid = rhs.m_uuid; - m_object_name = rhs.m_object_name; - m_object_offset = rhs.m_object_offset; - m_object_size = rhs.m_object_size; - m_object_mod_time = rhs.m_object_mod_time; - m_source_mappings = rhs.m_source_mappings; - } - return *this; - } - FileSpec *GetFileSpecPtr() { return (m_file ? &m_file : nullptr); } const FileSpec *GetFileSpecPtr() const { @@ -146,6 +130,8 @@ class ModuleSpec { PathMappingList &GetSourceMappingList() const { return m_source_mappings; } + lldb::DataBufferSP GetData() const { return m_data; } + void Clear() { m_file.Clear(); m_platform_file.Clear(); @@ -289,6 +275,7 @@ class ModuleSpec { uint64_t m_object_size; llvm::sys::TimePoint<> m_object_mod_time; mutable PathMappingList m_source_mappings; + lldb::DataBufferSP m_data = {}; }; class ModuleSpecList { diff --git a/lldb/include/lldb/Host/HostInfoBase.h b/lldb/include/lldb/Host/HostInfoBase.h index 70682c9b685eb..15bb168aad97f 100644 --- a/lldb/include/lldb/Host/HostInfoBase.h +++ b/lldb/include/lldb/Host/HostInfoBase.h @@ -11,6 +11,7 @@ #include "lldb/Utility/ArchSpec.h" #include "lldb/Utility/FileSpec.h" +#include "lldb/Utility/UUID.h" #include "lldb/Utility/UserIDResolver.h" #include "lldb/Utility/XcodeSDK.h" #include "lldb/lldb-enumerations.h" @@ -24,6 +25,11 @@ namespace lldb_private { class FileSpec; +struct SharedCacheImageInfo { + UUID uuid; + lldb::DataBufferSP data_sp; +}; + class HostInfoBase { private: // Static class, unconstructable. @@ -98,6 +104,13 @@ class HostInfoBase { /// Return the directory containing a specific Xcode SDK. static llvm::StringRef GetXcodeSDKPath(XcodeSDK sdk) { return {}; } + /// Return information about module \p image_name if it is loaded in + /// the current process's address space. + static SharedCacheImageInfo + GetSharedCacheImageInfo(llvm::StringRef image_name) { + return {}; + } + protected: static bool ComputeSharedLibraryDirectory(FileSpec &file_spec); static bool ComputeSupportExeDirectory(FileSpec &file_spec); diff --git a/lldb/include/lldb/Host/macosx/HostInfoMacOSX.h b/lldb/include/lldb/Host/macosx/HostInfoMacOSX.h index 3941414f8abdd..ee9f12a90943f 100644 --- a/lldb/include/lldb/Host/macosx/HostInfoMacOSX.h +++ b/lldb/include/lldb/Host/macosx/HostInfoMacOSX.h @@ -37,6 +37,11 @@ class HostInfoMacOSX : public HostInfoPosix { /// Query xcrun to find an Xcode SDK directory. static llvm::StringRef GetXcodeSDKPath(XcodeSDK sdk); + + /// Shared cache utilities + static SharedCacheImageInfo + GetSharedCacheImageInfo(llvm::StringRef image_name); + protected: static bool ComputeSupportExeDirectory(FileSpec &file_spec); static void ComputeHostArchitectureSupport(ArchSpec &arch_32, diff --git a/lldb/include/lldb/Symbol/LineTable.h b/lldb/include/lldb/Symbol/LineTable.h index d66b58ca4c6d7..b48e82f19ffb1 100644 --- a/lldb/include/lldb/Symbol/LineTable.h +++ b/lldb/include/lldb/Symbol/LineTable.h @@ -9,6 +9,7 @@ #ifndef LLDB_SYMBOL_LINETABLE_H #define LLDB_SYMBOL_LINETABLE_H +#include "lldb/Core/Address.h" #include "lldb/Core/ModuleChild.h" #include "lldb/Core/Section.h" #include "lldb/Symbol/LineEntry.h" diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h index 966a1861f0ddf..e814015c0bf7b 100644 --- a/lldb/include/lldb/Symbol/ObjectFile.h +++ b/lldb/include/lldb/Symbol/ObjectFile.h @@ -172,10 +172,10 @@ class ObjectFile : public std::enable_shared_from_this, lldb::addr_t header_addr, lldb::DataBufferSP &file_data_sp); - static size_t GetModuleSpecifications(const FileSpec &file, - lldb::offset_t file_offset, - lldb::offset_t file_size, - ModuleSpecList &specs); + static size_t + GetModuleSpecifications(const FileSpec &file, lldb::offset_t file_offset, + lldb::offset_t file_size, ModuleSpecList &specs, + lldb::DataBufferSP data_sp = lldb::DataBufferSP()); static size_t GetModuleSpecifications(const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp, diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h index 52696f131f827..6234b8244b3f3 100644 --- a/lldb/include/lldb/Target/Platform.h +++ b/lldb/include/lldb/Target/Platform.h @@ -372,9 +372,13 @@ class Platform : public PluginInterface { virtual lldb::ProcessSP ConnectProcess(llvm::StringRef connect_url, llvm::StringRef plugin_name, - lldb_private::Debugger &debugger, - lldb_private::Target *target, - lldb_private::Status &error); + Debugger &debugger, Target *target, + Status &error); + + virtual lldb::ProcessSP + ConnectProcessSynchronous(llvm::StringRef connect_url, + llvm::StringRef plugin_name, Debugger &debugger, + Stream &stream, Target *target, Status &error); /// Attach to an existing process using a process ID. /// @@ -848,6 +852,12 @@ class Platform : public PluginInterface { } protected: + /// Private implementation of connecting to a process. If the stream is set + /// we connect synchronously. + lldb::ProcessSP DoConnectProcess(llvm::StringRef connect_url, + llvm::StringRef plugin_name, + Debugger &debugger, Stream *stream, + Target *target, Status &error); bool m_is_host; // Set to true when we are able to actually set the OS version while being // connected. For remote platforms, we might set the version ahead of time diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index a3fb3447169e6..bf9b64547ed50 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -737,7 +737,7 @@ class Process : public std::enable_shared_from_this, /// /// \return /// Returns an error object. - virtual Status ConnectRemote(Stream *strm, llvm::StringRef remote_url); + virtual Status ConnectRemote(llvm::StringRef remote_url); bool GetShouldDetach() const { return m_should_detach; } @@ -925,7 +925,7 @@ class Process : public std::enable_shared_from_this, /// /// \return /// Returns an error object. - virtual Status DoConnectRemote(Stream *strm, llvm::StringRef remote_url) { + virtual Status DoConnectRemote(llvm::StringRef remote_url) { Status error; error.SetErrorString("remote connections are not supported"); return error; diff --git a/lldb/include/lldb/Target/StackFrameRecognizer.h b/lldb/include/lldb/Target/StackFrameRecognizer.h index 9c9105ac04e44..38b21e7c9856b 100644 --- a/lldb/include/lldb/Target/StackFrameRecognizer.h +++ b/lldb/include/lldb/Target/StackFrameRecognizer.h @@ -17,6 +17,8 @@ #include "lldb/lldb-private-forward.h" #include "lldb/lldb-public.h" +#include + namespace lldb_private { /// \class RecognizedStackFrame @@ -95,37 +97,45 @@ class ScriptedStackFrameRecognizer : public StackFrameRecognizer { operator=(const ScriptedStackFrameRecognizer &) = delete; }; -/// \class StackFrameRecognizerManager -/// -/// Static class that provides a registry of known stack frame recognizers. -/// Has static methods to add, enumerate, remove, query and invoke recognizers. - +/// Class that provides a registry of known stack frame recognizers. class StackFrameRecognizerManager { public: - static void AddRecognizer(lldb::StackFrameRecognizerSP recognizer, - ConstString module, - llvm::ArrayRef symbols, - bool first_instruction_only = true); + void AddRecognizer(lldb::StackFrameRecognizerSP recognizer, + ConstString module, llvm::ArrayRef symbols, + bool first_instruction_only = true); + + void AddRecognizer(lldb::StackFrameRecognizerSP recognizer, + lldb::RegularExpressionSP module, + lldb::RegularExpressionSP symbol, + bool first_instruction_only = true); - static void AddRecognizer(lldb::StackFrameRecognizerSP recognizer, - lldb::RegularExpressionSP module, - lldb::RegularExpressionSP symbol, - bool first_instruction_only = true); + void ForEach(std::function< + void(uint32_t recognizer_id, std::string recognizer_name, + std::string module, llvm::ArrayRef symbols, + bool regexp)> const &callback); - static void - ForEach(std::function symbols, - bool regexp)> const &callback); + bool RemoveRecognizerWithID(uint32_t recognizer_id); - static bool RemoveRecognizerWithID(uint32_t recognizer_id); + void RemoveAllRecognizers(); - static void RemoveAllRecognizers(); + lldb::StackFrameRecognizerSP GetRecognizerForFrame(lldb::StackFrameSP frame); - static lldb::StackFrameRecognizerSP GetRecognizerForFrame( - lldb::StackFrameSP frame); + lldb::RecognizedStackFrameSP RecognizeFrame(lldb::StackFrameSP frame); + +private: + struct RegisteredEntry { + uint32_t recognizer_id; + bool deleted; + lldb::StackFrameRecognizerSP recognizer; + bool is_regexp; + ConstString module; + lldb::RegularExpressionSP module_regexp; + std::vector symbols; + lldb::RegularExpressionSP symbol_regexp; + bool first_instruction_only; + }; - static lldb::RecognizedStackFrameSP RecognizeFrame(lldb::StackFrameSP frame); + std::deque m_recognizers; }; /// \class ValueObjectRecognizerSynthesizedValue diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index 280ce6359c72f..c12c68d292b8b 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -1251,6 +1251,10 @@ class Target : public std::enable_shared_from_this, void SetREPL(lldb::LanguageType language, lldb::REPLSP repl_sp); + StackFrameRecognizerManager &GetFrameRecognizerManager() { + return *m_frame_recognizer_manager_up; + } + protected: /// Implementing of ModuleList::Notifier. @@ -1325,6 +1329,8 @@ class Target : public std::enable_shared_from_this, bool m_suppress_stop_hooks; bool m_is_dummy_target; unsigned m_next_persistent_variable_index = 0; + /// Stores the frame recognizers of this target. + lldb::StackFrameRecognizerManagerUP m_frame_recognizer_manager_up; static void ImageSearchPathsChanged(const PathMappingList &path_list, void *baton); diff --git a/lldb/include/lldb/Utility/DataBuffer.h b/lldb/include/lldb/Utility/DataBuffer.h index bdc384a3815f9..302b13307958d 100644 --- a/lldb/include/lldb/Utility/DataBuffer.h +++ b/lldb/include/lldb/Utility/DataBuffer.h @@ -79,6 +79,20 @@ class DataBuffer { } }; +class DataBufferUnowned : public DataBuffer { +public: + DataBufferUnowned(uint8_t *bytes, lldb::offset_t size) + : m_bytes(bytes), m_size(size) {} + + uint8_t *GetBytes() override { return m_bytes; } + const uint8_t *GetBytes() const override { return m_bytes; } + lldb::offset_t GetByteSize() const override { return m_size; } + +private: + uint8_t *m_bytes; + lldb::offset_t m_size; +}; + } // namespace lldb_private #endif /// #if defined(__cplusplus) diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index 4fd2a07dd6165..478ed1a06443e 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -402,6 +402,8 @@ typedef std::weak_ptr StackFrameWP; typedef std::shared_ptr StackFrameListSP; typedef std::shared_ptr StackFrameRecognizerSP; +typedef std::unique_ptr + StackFrameRecognizerManagerUP; typedef std::shared_ptr StopInfoSP; typedef std::shared_ptr StoppointLocationSP; typedef std::shared_ptr StreamSP; diff --git a/lldb/packages/Python/lldbsuite/__init__.py b/lldb/packages/Python/lldbsuite/__init__.py index 195b2683f7b4d..62f6aee71f307 100644 --- a/lldb/packages/Python/lldbsuite/__init__.py +++ b/lldb/packages/Python/lldbsuite/__init__.py @@ -8,14 +8,14 @@ def find_lldb_root(): lldb_root = os.path.dirname(inspect.getfile(inspect.currentframe())) while True: - lldb_root = os.path.dirname(lldb_root) - if lldb_root is None: - return None + parent = os.path.dirname(lldb_root) + if parent == lldb_root: # dirname('/') == '/' + raise Exception("use_lldb_suite_root.py not found") + lldb_root = parent test_path = os.path.join(lldb_root, "use_lldb_suite_root.py") if os.path.isfile(test_path): return lldb_root - return None # lldbsuite.lldb_root refers to the root of the git/svn source checkout lldb_root = find_lldb_root() diff --git a/lldb/packages/Python/lldbsuite/test/configuration.py b/lldb/packages/Python/lldbsuite/test/configuration.py index f05152253c75f..84de0130f9907 100644 --- a/lldb/packages/Python/lldbsuite/test/configuration.py +++ b/lldb/packages/Python/lldbsuite/test/configuration.py @@ -57,6 +57,9 @@ # Path to the FileCheck testing tool. Not optional. filecheck = None +# Path to the yaml2obj tool. Not optional. +yaml2obj = None + # The arch might dictate some specific CFLAGS to be passed to the toolchain to build # the inferior programs. The global variable cflags_extras provides a hook to do # just that. @@ -119,10 +122,6 @@ clang_module_cache_dir = None # Test results handling globals -results_filename = None -results_formatter_name = None -results_formatter_object = None -results_formatter_options = None test_result = None # Reproducers @@ -163,6 +162,13 @@ def get_filecheck_path(): if filecheck and os.path.lexists(filecheck): return filecheck +def get_yaml2obj_path(): + """ + Get the path to the yaml2obj tool. + """ + if yaml2obj and os.path.lexists(yaml2obj): + return yaml2obj + def is_reproducer_replay(): """ Returns true when dotest is being replayed from a reproducer. Never use diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index ec084184cd656..534bcbf59ac20 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -552,6 +552,16 @@ def are_sb_headers_missing(): return skipTestIfFn(are_sb_headers_missing)(func) +def skipIfRosetta(bugnumber): + """Skip a test when running the testsuite on macOS under the Rosetta translation layer.""" + def is_running_rosetta(self): + if not lldbplatformutil.getPlatform() in ['darwin', 'macosx']: + return "not on macOS" + if (platform.uname()[5] == "arm") and (self.getArchitecture() == "x86_64"): + return "skipped under Rosetta" + return None + return skipTestIfFn(is_running_rosetta) + def skipIfiOSSimulator(func): """Decorate the item to skip tests that should be skipped on the iOS Simulator.""" def is_ios_simulator(): diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index f9975b27c4759..67f227cad7155 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -272,13 +272,17 @@ def parseOptionsAndInitTestdirs(): configuration.dsymutil = seven.get_command_output( 'xcrun -find -toolchain default dsymutil') + + # The lldb-dotest script produced by the CMake build passes in a path to a + # working FileCheck and yaml2obj binary. So does one specific Xcode + # project target. However, when invoking dotest.py directly, a valid + # --filecheck and --yaml2obj option needs to be given. if args.filecheck: - # The lldb-dotest script produced by the CMake build passes in a path - # to a working FileCheck binary. So does one specific Xcode project - # target. However, when invoking dotest.py directly, a valid --filecheck - # option needs to be given. configuration.filecheck = os.path.abspath(args.filecheck) + if args.yaml2obj: + configuration.yaml2obj = os.path.abspath(args.yaml2obj) + if not configuration.get_filecheck_path(): logging.warning('No valid FileCheck executable; some tests may fail...') logging.warning('(Double-check the --filecheck argument to dotest.py)') @@ -404,19 +408,6 @@ def parseOptionsAndInitTestdirs(): if do_help: usage(parser) - if args.results_file: - configuration.results_filename = args.results_file - - if args.results_formatter: - configuration.results_formatter_name = args.results_formatter - if args.results_formatter_options: - configuration.results_formatter_options = args.results_formatter_options - - # Default to using the BasicResultsFormatter if no formatter is specified. - if configuration.results_formatter_name is None: - configuration.results_formatter_name = ( - "lldbsuite.test_event.formatter.results_formatter.ResultsFormatter") - # Reproducer arguments if args.capture_path and args.replay_path: logging.error('Cannot specify both a capture and a replay path.') @@ -465,16 +456,10 @@ def parseOptionsAndInitTestdirs(): def setupTestResults(): """Sets up test results-related objects based on arg settings.""" - # Setup the results formatter configuration. - formatter_config = formatter.FormatterConfig() - formatter_config.filename = configuration.results_filename - formatter_config.formatter_name = configuration.results_formatter_name - formatter_config.formatter_options = ( - configuration.results_formatter_options) # Create the results formatter. formatter_spec = formatter.create_results_formatter( - formatter_config) + "lldbsuite.test_event.formatter.results_formatter.ResultsFormatter") if formatter_spec is not None and formatter_spec.formatter is not None: configuration.results_formatter_object = formatter_spec.formatter diff --git a/lldb/packages/Python/lldbsuite/test/dotest_args.py b/lldb/packages/Python/lldbsuite/test/dotest_args.py index ff2ac5a47ea50..05dd523e744ac 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest_args.py +++ b/lldb/packages/Python/lldbsuite/test/dotest_args.py @@ -51,7 +51,7 @@ def create_parser(): suggestions: do not lump the "-A arch1 -A arch2" together such that the -E option applies to only one of the architectures''')) group.add_argument('--dsymutil', metavar='dsymutil', dest='dsymutil', help=textwrap.dedent('Specify which dsymutil to use.')) - + group.add_argument('--yaml2obj', metavar='yaml2obj', dest='yaml2obj', help=textwrap.dedent('Specify which yaml2obj binary to use.')) group.add_argument('--filecheck', metavar='filecheck', dest='filecheck', help=textwrap.dedent('Specify which FileCheck binary to use.')) # Test filtering options @@ -244,28 +244,6 @@ def create_parser(): help='(Windows only) When LLDB crashes, display the Windows crash dialog.') group.set_defaults(disable_crash_dialog=True) - # Test results support. - group = parser.add_argument_group('Test results options') - group.add_argument( - '--results-file', - action='store', - help=('Specifies the file where test results will be written ' - 'according to the results-formatter class used')) - group.add_argument( - '--results-formatter', - action='store', - help=('Specifies the full package/module/class name used to translate ' - 'test events into some kind of meaningful report, written to ' - 'the designated output results file-like object')) - group.add_argument( - '--results-formatter-option', - '-O', - action='append', - dest='results_formatter_options', - help=('Specify an option to pass to the formatter. ' - 'Use --results-formatter-option="--option1=val1" ' - 'syntax. Note the "=" is critical, don\'t include whitespace.')) - # Re-run related arguments group = parser.add_argument_group('Test Re-run Options') group.add_argument( diff --git a/lldb/packages/Python/lldbsuite/test/lldbinline.py b/lldb/packages/Python/lldbsuite/test/lldbinline.py index 29a708440c2a7..0d1cb24a54dfd 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbinline.py +++ b/lldb/packages/Python/lldbsuite/test/lldbinline.py @@ -82,20 +82,16 @@ def handle_breakpoint(self, test, breakpoint_id): class InlineTest(TestBase): - # Overrides def getBuildDirBasename(self): return self.__class__.__name__ + "." + self.testMethodName - # Internal implementation - def BuildMakefile(self): makefilePath = self.getBuildArtifact("Makefile") if os.path.exists(makefilePath): return categories = {} - for f in os.listdir(self.getSourceDir()): t = source_type(f) if t: @@ -104,24 +100,20 @@ def BuildMakefile(self): else: categories[t] = [f] - makefile = open(makefilePath, 'w+') + with open(makefilePath, 'w+') as makefile: + for t in list(categories.keys()): + line = t + " := " + " ".join(categories[t]) + makefile.write(line + "\n") - for t in list(categories.keys()): - line = t + " := " + " ".join(categories[t]) - makefile.write(line + "\n") + if ('OBJCXX_SOURCES' in list(categories.keys())) or \ + ('OBJC_SOURCES' in list(categories.keys())): + makefile.write( + "LDFLAGS = $(CFLAGS) -lobjc -framework Foundation\n") - if ('OBJCXX_SOURCES' in list(categories.keys())) or ( - 'OBJC_SOURCES' in list(categories.keys())): - makefile.write( - "LDFLAGS = $(CFLAGS) -lobjc -framework Foundation\n") + if ('CXX_SOURCES' in list(categories.keys())): + makefile.write("CXXFLAGS += -std=c++11\n") - if ('CXX_SOURCES' in list(categories.keys())): - makefile.write("CXXFLAGS += -std=c++11\n") - - makefile.write("include Makefile.rules\n") - makefile.write("\ncleanup:\n\trm -f Makefile *.d\n\n") - makefile.flush() - makefile.close() + makefile.write("include Makefile.rules\n") def _test(self): self.BuildMakefile() @@ -168,8 +160,6 @@ def do_test(self): lldb.eStateExited], PROCESS_EXITED) - # Utilities for testcases - def check_expression(self, expression, expected_result, use_summary=True): value = self.frame().EvaluateExpression(expression) self.assertTrue(value.IsValid(), expression + "returned a valid value") diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index ddb79de0ab32f..25805726f9b39 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -576,6 +576,7 @@ def enableLogChannelsForCurrentTest(self): # confirm that the file is writeable host_log_path = "{}-host.log".format(log_basename) open(host_log_path, 'w').close() + self.log_files.append(host_log_path) log_enable = "log enable -Tpn -f {} ".format(host_log_path) for channel_with_categories in lldbtest_config.channels: @@ -602,6 +603,7 @@ def enableLogChannelsForCurrentTest(self): if lldb.remote_platform is None: server_log_path = "{}-server.log".format(log_basename) open(server_log_path, 'w').close() + self.log_files.append(server_log_path) os.environ["LLDB_DEBUGSERVER_LOG_FILE"] = server_log_path # Communicate channels to lldb-server @@ -623,12 +625,13 @@ def disableLogChannelsForCurrentTest(self): # Retrieve the server log (if any) from the remote system. It is assumed the server log # is writing to the "server.log" file in the current test directory. This can be # achieved by setting LLDB_DEBUGSERVER_LOG_FILE="server.log" when starting remote - # platform. If the remote logging is not enabled, then just let the Get() command silently - # fail. + # platform. if lldb.remote_platform: - lldb.remote_platform.Get( - lldb.SBFileSpec("server.log"), lldb.SBFileSpec( - self.getLogBasenameForCurrentTest() + "-server.log")) + server_log_path = self.getLogBasenameForCurrentTest() + "-server.log" + if lldb.remote_platform.Get( + lldb.SBFileSpec("server.log"), + lldb.SBFileSpec(server_log_path)).Success(): + self.log_files.append(server_log_path) def setPlatformWorkingDir(self): if not lldb.remote_platform or not configuration.lldb_platform_working_dir: @@ -797,14 +800,12 @@ def setUp(self): # List of spawned subproces.Popen objects self.subprocesses = [] - # List of forked process PIDs - self.forkedProcessPids = [] + # List of log files produced by the current test. + self.log_files = [] - # Create a string buffer to record the session info, to be dumped into a - # test case specific file if test failure is encountered. - self.log_basename = self.getLogBasenameForCurrentTest() + session_file = self.getLogBasenameForCurrentTest()+".log" + self.log_files.append(session_file) - session_file = "{}.log".format(self.log_basename) # Python 3 doesn't support unbuffered I/O in text mode. Open buffered. self.session = encoded_file.open(session_file, "utf-8", mode="w") @@ -883,26 +884,15 @@ def setAsync(self, value): self.addTearDownHook(lambda: self.dbg.SetAsync(old_async)) def cleanupSubprocesses(self): - # Ensure any subprocesses are cleaned up - for p in self.subprocesses: + # Terminate subprocesses in reverse order from how they were created. + for p in reversed(self.subprocesses): p.terminate() del p del self.subprocesses[:] - # Ensure any forked processes are cleaned up - for pid in self.forkedProcessPids: - try: - os.kill(pid, signal.SIGTERM) - except OSError: - pass def spawnSubprocess(self, executable, args=[], install_remote=True): """ Creates a subprocess.Popen object with the specified executable and arguments, saves it in self.subprocesses, and returns the object. - NOTE: if using this function, ensure you also call: - - self.addTearDownHook(self.cleanupSubprocesses) - - otherwise the test suite will leak processes. """ proc = _RemoteProcess( install_remote) if lldb.remote_platform else _LocalProcess(self.TraceOn()) @@ -910,28 +900,6 @@ def spawnSubprocess(self, executable, args=[], install_remote=True): self.subprocesses.append(proc) return proc - def forkSubprocess(self, executable, args=[]): - """ Fork a subprocess with its own group ID. - NOTE: if using this function, ensure you also call: - - self.addTearDownHook(self.cleanupSubprocesses) - - otherwise the test suite will leak processes. - """ - child_pid = os.fork() - if child_pid == 0: - # If more I/O support is required, this can be beefed up. - fd = os.open(os.devnull, os.O_RDWR) - os.dup2(fd, 1) - os.dup2(fd, 2) - # This call causes the child to have its of group ID - os.setpgid(0, 0) - os.execvp(executable, [executable] + args) - # Give the child time to get through the execvp() call - time.sleep(0.1) - self.forkedProcessPids.append(child_pid) - return child_pid - def HideStdout(self): """Hide output to stdout from the user. @@ -1020,9 +988,6 @@ def deletePexpectChild(self): def tearDown(self): """Fixture for unittest test case teardown.""" - #import traceback - # traceback.print_stack() - self.deletePexpectChild() # Check and run any hook functions. @@ -1049,6 +1014,9 @@ def tearDown(self): for dict in reversed(self.dicts): self.cleanup(dictionary=dict) + # Remove subprocesses created by the test. + self.cleanupSubprocesses() + # This must be the last statement, otherwise teardown hooks or other # lines might depend on this still being active. lldb.SBDebugger.Destroy(self.dbg) @@ -1218,14 +1186,13 @@ def dumpSessionInfo(self): del self.session # process the log files - log_files_for_this_test = glob.glob(self.log_basename + "*") - if prefix != 'Success' or lldbtest_config.log_success: # keep all log files, rename them to include prefix + src_log_basename = self.getLogBasenameForCurrentTest(None) dst_log_basename = self.getLogBasenameForCurrentTest(prefix) - for src in log_files_for_this_test: + for src in self.log_files: if os.path.isfile(src): - dst = src.replace(self.log_basename, dst_log_basename) + dst = src.replace(src_log_basename, dst_log_basename) if os.name == "nt" and os.path.isfile(dst): # On Windows, renaming a -> b will throw an exception if # b exists. On non-Windows platforms it silently @@ -1239,8 +1206,9 @@ def dumpSessionInfo(self): os.rename(src, dst) else: # success! (and we don't want log files) delete log files - for log_file in log_files_for_this_test: - remove_file(log_file) + for log_file in self.log_files: + if os.path.isfile(log_file): + remove_file(log_file) # ==================================================== # Config. methods supported through a plugin interface @@ -1633,20 +1601,6 @@ def findBuiltClang(self): return os.environ["CC"] - def findYaml2obj(self): - """ - Get the path to the yaml2obj executable, which can be used to create - test object files from easy to write yaml instructions. - - Throws an Exception if the executable cannot be found. - """ - # Tries to find yaml2obj at the same folder as clang - clang_dir = os.path.dirname(self.findBuiltClang()) - path = distutils.spawn.find_executable("yaml2obj", clang_dir) - if path is not None: - return path - raise Exception("yaml2obj executable not found") - def yaml2obj(self, yaml_path, obj_path): """ @@ -1654,8 +1608,10 @@ def yaml2obj(self, yaml_path, obj_path): Throws subprocess.CalledProcessError if the object could not be created. """ - yaml2obj = self.findYaml2obj() - command = [yaml2obj, "-o=%s" % obj_path, yaml_path] + yaml2obj_bin = configuration.get_yaml2obj_path() + if not yaml2obj_bin: + self.assertTrue(False, "No valid FileCheck executable specified") + command = [yaml2obj_bin, "-o=%s" % obj_path, yaml_path] system([command]) def getBuildFlags( @@ -1885,9 +1841,6 @@ def generateSource(self, source): self.addTearDownHook(lambda: os.remove(src)) def setUp(self): - #import traceback - # traceback.print_stack() - # Works with the test driver to conditionally skip tests via # decorators. Base.setUp(self) @@ -2006,9 +1959,6 @@ def get_process_working_directory(self): return self.getBuildDir() def tearDown(self): - #import traceback - # traceback.print_stack() - # Ensure all the references to SB objects have gone away so that we can # be sure that all test-specific resources have been freed before we # attempt to delete the targets. @@ -2479,7 +2429,12 @@ def expect_expr( options.SetLanguage(frame.GuessLanguage()) eval_result = self.frame().EvaluateExpression(expr, options) else: - eval_result = self.target().EvaluateExpression(expr, options) + target = self.target() + # If there is no selected target, run the expression in the dummy + # target. + if not target.IsValid(): + target = self.dbg.GetDummyTarget() + eval_result = target.EvaluateExpression(expr, options) self.assertSuccess(eval_result.GetError()) diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules index 5316c51899c7a..b9a6937650d05 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules @@ -82,11 +82,9 @@ endif # we strictly required double-quotes #---------------------------------------------------------------------- ifeq "$(HOST_OS)" "Windows_NT" - JOIN_CMD = & QUOTE = " FIXUP_SYNTAX_HIGHLIGHTING_IN_MY_EDITOR = " else - JOIN_CMD = ; QUOTE = ' FIXUP_SYNTAX_HIGHLIGHTING_IN_MY_EDITOR = ' endif @@ -729,28 +727,28 @@ endif # and the -MM option will list all non-system dependencies. #---------------------------------------------------------------------- %.d: %.c - @rm -f $@ $(JOIN_CMD) \ - $(CC) -M $(CFLAGS) $< > $@.tmp && \ - sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ $(JOIN_CMD) \ - rm -f $@.tmp + @rm -f $@ + @$(CC) -M $(CFLAGS) $< > $@.tmp && \ + sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ + @rm -f $@.tmp %.d: %.cpp - @rm -f $@ $(JOIN_CMD) \ - $(CXX) -M $(CXXFLAGS) $< > $@.tmp && \ - sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ $(JOIN_CMD) \ - rm -f $@.tmp + @rm -f $@ + @$(CXX) -M $(CXXFLAGS) $< > $@.tmp && \ + sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ + @rm -f $@.tmp %.d: %.m - @rm -f $@ $(JOIN_CMD) \ - $(CC) -M $(CFLAGS) $< > $@.tmp && \ - sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ $(JOIN_CMD) \ - rm -f $@.tmp + @rm -f $@ + @$(CC) -M $(CFLAGS) $< > $@.tmp && \ + sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ + @rm -f $@.tmp %.d: %.mm - @rm -f $@ $(JOIN_CMD) \ - $(CXX) -M $(CXXFLAGS) $< > $@.tmp && \ - sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ $(JOIN_CMD) \ - rm -f $@.tmp + @rm -f $@ + @$(CXX) -M $(CXXFLAGS) $< > $@.tmp && \ + sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ + @rm -f $@.tmp #---------------------------------------------------------------------- # Include all of the makefiles for each source file so we don't have diff --git a/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py b/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py index e54431eb1fe7f..a8114b3e05924 100644 --- a/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py +++ b/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py @@ -254,7 +254,7 @@ def buildGModules( getModuleCacheSpec(), getCmdLine(dictionary)]) - lldbtest.system(commands, sender=sender) + runBuildCommands(commands, sender=sender) # True signifies that we can handle building with gmodules. return True diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py index ac611bcca1695..d702d8ee68208 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py @@ -31,10 +31,10 @@ class GdbRemoteTestCaseBase(TestBase): NO_DEBUG_INFO_TESTCASE = True - _TIMEOUT_SECONDS = 120 * (10 if ('ASAN_OPTIONS' in os.environ) else 1) - _DEFAULT_TIMEOUT = 10 * (10 if ('ASAN_OPTIONS' in os.environ) else 1) - _READ_TIMEOUT = 5 * (10 if ('ASAN_OPTIONS' in os.environ) else 1) - _WAIT_TIMEOUT = 5 * (10 if ('ASAN_OPTIONS' in os.environ) else 1) + # Default time out in seconds. The timeout is increased tenfold under Asan. + DEFAULT_TIMEOUT = 10 * (10 if ('ASAN_OPTIONS' in os.environ) else 1) + # Default sleep time in seconds. The sleep time is doubled under Asan. + DEFAULT_SLEEP = 5 * (2 if ('ASAN_OPTIONS' in os.environ) else 1) _GDBREMOTE_KILL_PACKET = "$k#6b" @@ -204,10 +204,10 @@ def shutdown_named_pipe(): return (named_pipe_path, named_pipe, named_pipe_fd) - def get_stub_port_from_named_socket(self, read_timeout_seconds): + def get_stub_port_from_named_socket(self): # Wait for something to read with a max timeout. (ready_readers, _, _) = select.select( - [self.named_pipe_fd], [], [], read_timeout_seconds) + [self.named_pipe_fd], [], [], self.DEFAULT_TIMEOUT) self.assertIsNotNone( ready_readers, "write side of pipe has not written anything - stub isn't writing to pipe.") @@ -402,13 +402,12 @@ def launch_debug_monitor(self, attach_pid=None, logfile=None): self.debug_monitor_exe, commandline_args, install_remote=False) - self.addTearDownHook(self.cleanupSubprocesses) self.assertIsNotNone(server) # If we're receiving the stub's listening port from the named pipe, do # that here. if self.named_pipe: - self.port = self.get_stub_port_from_named_socket(self._READ_TIMEOUT) + self.port = self.get_stub_port_from_named_socket() return server @@ -418,15 +417,6 @@ def connect_to_debug_monitor(self, attach_pid=None): server = self.launch_debug_monitor(attach_pid=attach_pid) self.assertIsNotNone(server) - def shutdown_debug_monitor(): - try: - server.terminate() - except: - logger.warning( - "failed to terminate server for debug monitor: {}; ignoring".format( - sys.exc_info()[0])) - self.addTearDownHook(shutdown_debug_monitor) - # Schedule debug monitor to be shut down during teardown. logger = self.logger @@ -445,15 +435,6 @@ def shutdown_debug_monitor(): # Schedule debug monitor to be shut down during teardown. logger = self.logger - def shutdown_debug_monitor(): - try: - server.terminate() - except: - logger.warning( - "failed to terminate server for debug monitor: {}; ignoring".format( - sys.exc_info()[0])) - self.addTearDownHook(shutdown_debug_monitor) - connect_attemps = 0 MAX_CONNECT_ATTEMPTS = 10 @@ -506,17 +487,7 @@ def launch_process_for_attach( if sleep_seconds: args.append("sleep:%d" % sleep_seconds) - inferior = self.spawnSubprocess(exe_path, args) - - def shutdown_process_for_attach(): - try: - inferior.terminate() - except: - logger.warning( - "failed to terminate inferior process for attach: {}; ignoring".format( - sys.exc_info()[0])) - self.addTearDownHook(shutdown_process_for_attach) - return inferior + return self.spawnSubprocess(exe_path, args) def prep_debug_monitor_and_inferior( self, @@ -597,14 +568,14 @@ def prep_debug_monitor_and_inferior( def expect_socket_recv( self, sock, - expected_content_regex, - timeout_seconds): + expected_content_regex + ): response = "" - timeout_time = time.time() + timeout_seconds + timeout_time = time.time() + self.DEFAULT_TIMEOUT while not expected_content_regex.match( response) and time.time() < timeout_time: - can_read, _, _ = select.select([sock], [], [], timeout_seconds) + can_read, _, _ = select.select([sock], [], [], self.DEFAULT_TIMEOUT) if can_read and sock in can_read: recv_bytes = sock.recv(4096) if recv_bytes: @@ -612,24 +583,21 @@ def expect_socket_recv( self.assertTrue(expected_content_regex.match(response)) - def expect_socket_send(self, sock, content, timeout_seconds): + def expect_socket_send(self, sock, content): request_bytes_remaining = content - timeout_time = time.time() + timeout_seconds + timeout_time = time.time() + self.DEFAULT_TIMEOUT while len(request_bytes_remaining) > 0 and time.time() < timeout_time: - _, can_write, _ = select.select([], [sock], [], timeout_seconds) + _, can_write, _ = select.select([], [sock], [], self.DEFAULT_TIMEOUT) if can_write and sock in can_write: written_byte_count = sock.send(request_bytes_remaining.encode()) request_bytes_remaining = request_bytes_remaining[ written_byte_count:] self.assertEqual(len(request_bytes_remaining), 0) - def do_handshake(self, stub_socket, timeout_seconds=None): - if not timeout_seconds: - timeout_seconds = self._WAIT_TIMEOUT - + def do_handshake(self, stub_socket): # Write the ack. - self.expect_socket_send(stub_socket, "+", timeout_seconds) + self.expect_socket_send(stub_socket, "+") # Send the start no ack mode packet. NO_ACK_MODE_REQUEST = "$QStartNoAckMode#b0" @@ -638,10 +606,10 @@ def do_handshake(self, stub_socket, timeout_seconds=None): # Receive the ack and "OK" self.expect_socket_recv(stub_socket, re.compile( - r"^\+\$OK#[0-9a-fA-F]{2}$"), timeout_seconds) + r"^\+\$OK#[0-9a-fA-F]{2}$")) # Send the final ack. - self.expect_socket_send(stub_socket, "+", timeout_seconds) + self.expect_socket_send(stub_socket, "+") def add_no_ack_remote_stream(self): self.test_sequence.add_log_lines( @@ -729,15 +697,13 @@ def parse_register_info_packets(self, context): return [parse_reg_info_response(reg_info_response) for reg_info_response in reg_info_responses] - def expect_gdbremote_sequence(self, timeout_seconds=None): - if not timeout_seconds: - timeout_seconds = self._TIMEOUT_SECONDS + def expect_gdbremote_sequence(self): return expect_lldb_gdbserver_replay( self, self.sock, self.test_sequence, self._pump_queues, - timeout_seconds, + self.DEFAULT_TIMEOUT, self.logger) _KNOWN_REGINFO_KEYS = [ @@ -885,11 +851,9 @@ def parse_threadinfo_packets(self, context): thread_ids.extend(new_thread_infos) return thread_ids - def wait_for_thread_count(self, thread_count, timeout_seconds=None): - if not timeout_seconds: - timeout_seconds = self._WAIT_TIMEOUT + def wait_for_thread_count(self, thread_count): start_time = time.time() - timeout_time = start_time + timeout_seconds + timeout_time = start_time + self.DEFAULT_TIMEOUT actual_thread_count = 0 while actual_thread_count < thread_count: @@ -907,7 +871,7 @@ def wait_for_thread_count(self, thread_count, timeout_seconds=None): if time.time() > timeout_time: raise Exception( 'timed out after {} seconds while waiting for theads: waiting for at least {} threads, found {}'.format( - timeout_seconds, thread_count, actual_thread_count)) + self.DEFAULT_TIMEOUT, thread_count, actual_thread_count)) return threads diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py index 676e08d5a38cc..c1b33c220b4bd 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py @@ -2,6 +2,7 @@ from lldbsuite.test.lldbtest import * import os import vscode +import time class VSCodeTestCaseBase(TestBase): @@ -52,6 +53,12 @@ def set_function_breakpoints(self, functions, condition=None, breakpoint_ids.append('%i' % (breakpoint['id'])) return breakpoint_ids + def waitUntil(self, condition): + while True: + if condition(): + break + time.sleep(0.5) + def verify_breakpoint_hit(self, breakpoint_ids): '''Wait for the process we are debugging to stop, and verify we hit any breakpoint location in the "breakpoint_ids" array. diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py index 1ad168e794cff..6b1c1c961b545 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py @@ -113,6 +113,7 @@ def __init__(self, recv, send, init_commands): self.initialize_body = None self.thread_stop_reasons = {} self.breakpoint_events = [] + self.module_events = {} self.sequence = 1 self.threads = None self.recv_thread.start() @@ -133,6 +134,9 @@ def validate_response(cls, command, response): if command['seq'] != response['request_seq']: raise ValueError('seq mismatch in response') + def get_active_modules(self): + return self.module_events + def get_output(self, category, timeout=0.0, clear=True): self.output_condition.acquire() output = None @@ -218,6 +222,15 @@ def handle_recv_packet(self, packet): self.breakpoint_events.append(packet) # no need to add 'breakpoint' event packets to our packets list return keepGoing + elif event == 'module': + reason = body['reason'] + if (reason == 'new' or reason == 'changed'): + self.module_events[body['module']['name']] = body['module'] + elif reason == 'removed': + if body['module']['name'] in self.module_events: + self.module_events.pop(body['module']['name']) + return keepGoing + elif packet_type == 'response': if packet['command'] == 'disconnect': keepGoing = False @@ -747,6 +760,16 @@ def request_setFunctionBreakpoints(self, names, condition=None, } return self.send_recv(command_dict) + def request_getCompileUnits(self, moduleId): + args_dict = {'moduleId': moduleId} + command_dict = { + 'command': 'getCompileUnits', + 'type': 'request', + 'arguments': args_dict + } + response = self.send_recv(command_dict) + return response + def request_completions(self, text): args_dict = { 'text': text, diff --git a/lldb/packages/Python/lldbsuite/test_event/formatter/__init__.py b/lldb/packages/Python/lldbsuite/test_event/formatter/__init__.py index 1fe6ecd3ef82f..d6609d353c856 100644 --- a/lldb/packages/Python/lldbsuite/test_event/formatter/__init__.py +++ b/lldb/packages/Python/lldbsuite/test_event/formatter/__init__.py @@ -17,17 +17,6 @@ # LLDB modules -# Ignore method count on DTOs. -# pylint: disable=too-few-public-methods -class FormatterConfig(object): - """Provides formatter configuration info to create_results_formatter().""" - - def __init__(self): - self.filename = None - self.formatter_name = None - self.formatter_options = None - - # Ignore method count on DTOs. # pylint: disable=too-few-public-methods class CreatedFormatter(object): @@ -38,7 +27,7 @@ def __init__(self, formatter, cleanup_func): self.cleanup_func = cleanup_func -def create_results_formatter(config): +def create_results_formatter(formatter_name): """Sets up a test results formatter. @param config an instance of FormatterConfig @@ -47,75 +36,31 @@ def create_results_formatter(config): @return an instance of CreatedFormatter. """ - default_formatter_name = None - results_file_object = None - cleanup_func = None - - if config.filename: - # Open the results file for writing. - if config.filename == 'stdout': - results_file_object = sys.stdout - cleanup_func = None - elif config.filename == 'stderr': - results_file_object = sys.stderr - cleanup_func = None - else: - results_file_object = open(config.filename, "w") - cleanup_func = results_file_object.close - default_formatter_name = ( - "lldbsuite.test_event.formatter.xunit.XunitFormatter") - - # If we have a results formatter name specified and we didn't specify - # a results file, we should use stdout. - if config.formatter_name is not None and results_file_object is None: - # Use stdout. - results_file_object = sys.stdout - cleanup_func = None - - if results_file_object: - # We care about the formatter. Choose user-specified or, if - # none specified, use the default for the output type. - if config.formatter_name: - formatter_name = config.formatter_name - else: - formatter_name = default_formatter_name - - # Create an instance of the class. - # First figure out the package/module. - components = formatter_name.split(".") - module = importlib.import_module(".".join(components[:-1])) - - # Create the class name we need to load. - cls = getattr(module, components[-1]) - - # Handle formatter options for the results formatter class. - formatter_arg_parser = cls.arg_parser() - if config.formatter_options and len(config.formatter_options) > 0: - command_line_options = config.formatter_options - else: - command_line_options = [] - - formatter_options = formatter_arg_parser.parse_args( - command_line_options) - - # Create the TestResultsFormatter given the processed options. - results_formatter_object = cls( - results_file_object, - formatter_options) - - def shutdown_formatter(): - """Shuts down the formatter when it is no longer needed.""" - # Tell the formatter to write out anything it may have - # been saving until the very end (e.g. xUnit results - # can't complete its output until this point). - results_formatter_object.send_terminate_as_needed() - - # And now close out the output file-like object. - if cleanup_func is not None: - cleanup_func() - - return CreatedFormatter( - results_formatter_object, - shutdown_formatter) - else: - return None + # Create an instance of the class. + # First figure out the package/module. + components = formatter_name.split(".") + module = importlib.import_module(".".join(components[:-1])) + + # Create the class name we need to load. + cls = getattr(module, components[-1]) + + # Handle formatter options for the results formatter class. + formatter_arg_parser = cls.arg_parser() + command_line_options = [] + + formatter_options = formatter_arg_parser.parse_args( + command_line_options) + + # Create the TestResultsFormatter given the processed options. + results_formatter_object = cls(sys.stdout, formatter_options) + + def shutdown_formatter(): + """Shuts down the formatter when it is no longer needed.""" + # Tell the formatter to write out anything it may have + # been saving until the very end (e.g. xUnit results + # can't complete its output until this point). + results_formatter_object.send_terminate_as_needed() + + return CreatedFormatter( + results_formatter_object, + shutdown_formatter) diff --git a/lldb/packages/Python/lldbsuite/test_event/formatter/xunit.py b/lldb/packages/Python/lldbsuite/test_event/formatter/xunit.py deleted file mode 100644 index e480df59a2f28..0000000000000 --- a/lldb/packages/Python/lldbsuite/test_event/formatter/xunit.py +++ /dev/null @@ -1,595 +0,0 @@ -""" -Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -See https://llvm.org/LICENSE.txt for license information. -SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -Provides an xUnit ResultsFormatter for integrating the LLDB -test suite with the Jenkins xUnit aggregator and other xUnit-compliant -test output processors. -""" -from __future__ import absolute_import -from __future__ import print_function - -# System modules -import re -import sys -import xml.sax.saxutils - -# Third-party modules -import six - -# Local modules -from ..event_builder import EventBuilder -from ..build_exception import BuildError -from .results_formatter import ResultsFormatter - - -class XunitFormatter(ResultsFormatter): - """Provides xUnit-style formatted output. - """ - - # Result mapping arguments - RM_IGNORE = 'ignore' - RM_SUCCESS = 'success' - RM_FAILURE = 'failure' - RM_PASSTHRU = 'passthru' - - @staticmethod - def _build_illegal_xml_regex(): - """Constructs a regex to match all illegal xml characters. - - Expects to be used against a unicode string.""" - # Construct the range pairs of invalid unicode characters. - illegal_chars_u = [ - (0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F), (0x7F, 0x84), - (0x86, 0x9F), (0xFDD0, 0xFDDF), (0xFFFE, 0xFFFF)] - - # For wide builds, we have more. - if sys.maxunicode >= 0x10000: - illegal_chars_u.extend( - [(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF), (0x3FFFE, 0x3FFFF), - (0x4FFFE, 0x4FFFF), (0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF), - (0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF), (0x9FFFE, 0x9FFFF), - (0xAFFFE, 0xAFFFF), (0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF), - (0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF), (0xFFFFE, 0xFFFFF), - (0x10FFFE, 0x10FFFF)]) - - # Build up an array of range expressions. - illegal_ranges = [ - "%s-%s" % (six.unichr(low), six.unichr(high)) - for (low, high) in illegal_chars_u] - - # Compile the regex - return re.compile(six.u('[%s]') % six.u('').join(illegal_ranges)) - - @staticmethod - def _quote_attribute(text): - """Returns the given text in a manner safe for usage in an XML attribute. - - @param text the text that should appear within an XML attribute. - @return the attribute-escaped version of the input text. - """ - return xml.sax.saxutils.quoteattr(text) - - def _replace_invalid_xml(self, str_or_unicode): - """Replaces invalid XML characters with a '?'. - - @param str_or_unicode a string to replace invalid XML - characters within. Can be unicode or not. If not unicode, - assumes it is a byte string in utf-8 encoding. - - @returns a utf-8-encoded byte string with invalid - XML replaced with '?'. - """ - # Get the content into unicode - if isinstance(str_or_unicode, str): - # If we hit decoding errors due to data corruption, replace the - # invalid characters with U+FFFD REPLACEMENT CHARACTER. - unicode_content = str_or_unicode.decode('utf-8', 'replace') - else: - unicode_content = str_or_unicode - return self.invalid_xml_re.sub( - six.u('?'), unicode_content).encode('utf-8') - - @classmethod - def arg_parser(cls): - """@return arg parser used to parse formatter-specific options.""" - parser = super(XunitFormatter, cls).arg_parser() - - # These are valid choices for results mapping. - results_mapping_choices = [ - XunitFormatter.RM_IGNORE, - XunitFormatter.RM_SUCCESS, - XunitFormatter.RM_FAILURE, - XunitFormatter.RM_PASSTHRU] - parser.add_argument( - "--assert-on-unknown-events", - action="store_true", - help=('cause unknown test events to generate ' - 'a python assert. Default is to ignore.')) - parser.add_argument( - "--ignore-skip-name", - "-n", - metavar='PATTERN', - action="append", - dest='ignore_skip_name_patterns', - help=('a python regex pattern, where ' - 'any skipped test with a test method name where regex ' - 'matches (via search) will be ignored for xUnit test ' - 'result purposes. Can be specified multiple times.')) - parser.add_argument( - "--ignore-skip-reason", - "-r", - metavar='PATTERN', - action="append", - dest='ignore_skip_reason_patterns', - help=('a python regex pattern, where ' - 'any skipped test with a skip reason where the regex ' - 'matches (via search) will be ignored for xUnit test ' - 'result purposes. Can be specified multiple times.')) - parser.add_argument( - "--xpass", action="store", choices=results_mapping_choices, - default=XunitFormatter.RM_FAILURE, - help=('specify mapping from unexpected success to jUnit/xUnit ' - 'result type')) - parser.add_argument( - "--xfail", action="store", choices=results_mapping_choices, - default=XunitFormatter.RM_IGNORE, - help=('specify mapping from expected failure to jUnit/xUnit ' - 'result type')) - return parser - - @staticmethod - def _build_regex_list_from_patterns(patterns): - """Builds a list of compiled regular expressions from option value. - - @param patterns contains a list of regular expression - patterns. - - @return list of compiled regular expressions, empty if no - patterns provided. - """ - regex_list = [] - if patterns is not None: - for pattern in patterns: - regex_list.append(re.compile(pattern)) - return regex_list - - def __init__(self, out_file, options): - """Initializes the XunitFormatter instance. - @param out_file file-like object where formatted output is written. - @param options specifies a dictionary of options for the - formatter. - """ - # Initialize the parent - super(XunitFormatter, self).__init__(out_file, options) - self.text_encoding = "UTF-8" - self.invalid_xml_re = XunitFormatter._build_illegal_xml_regex() - self.total_test_count = 0 - self.ignore_skip_name_regexes = ( - XunitFormatter._build_regex_list_from_patterns( - options.ignore_skip_name_patterns)) - self.ignore_skip_reason_regexes = ( - XunitFormatter._build_regex_list_from_patterns( - options.ignore_skip_reason_patterns)) - - self.elements = { - "successes": [], - "errors": [], - "failures": [], - "skips": [], - "unexpected_successes": [], - "expected_failures": [], - "all": [] - } - - self.status_handlers = { - EventBuilder.STATUS_SUCCESS: self._handle_success, - EventBuilder.STATUS_FAILURE: self._handle_failure, - EventBuilder.STATUS_ERROR: self._handle_error, - EventBuilder.STATUS_SKIP: self._handle_skip, - EventBuilder.STATUS_EXPECTED_FAILURE: - self._handle_expected_failure, - EventBuilder.STATUS_EXPECTED_TIMEOUT: - self._handle_expected_timeout, - EventBuilder.STATUS_UNEXPECTED_SUCCESS: - self._handle_unexpected_success, - EventBuilder.STATUS_EXCEPTIONAL_EXIT: - self._handle_exceptional_exit, - EventBuilder.STATUS_TIMEOUT: - self._handle_timeout - } - - RESULT_TYPES = { - EventBuilder.TYPE_TEST_RESULT, - EventBuilder.TYPE_JOB_RESULT} - - def handle_event(self, test_event): - super(XunitFormatter, self).handle_event(test_event) - - event_type = test_event["event"] - if event_type is None: - return - - if event_type == "terminate": - # Process all the final result events into their - # XML counterparts. - for result_event in self.result_events.values(): - self._process_test_result(result_event) - self._finish_output() - else: - # This is an unknown event. - if self.options.assert_on_unknown_events: - raise Exception("unknown event type {} from {}\n".format( - event_type, test_event)) - - def _handle_success(self, test_event): - """Handles a test success. - @param test_event the test event to handle. - """ - result = self._common_add_testcase_entry(test_event) - with self.lock: - self.elements["successes"].append(result) - - def _handle_failure(self, test_event): - """Handles a test failure. - @param test_event the test event to handle. - """ - message = self._replace_invalid_xml(test_event["issue_message"]) - backtrace = self._replace_invalid_xml( - "".join(test_event.get("issue_backtrace", []))) - - result = self._common_add_testcase_entry( - test_event, - inner_content=( - ''.format( - XunitFormatter._quote_attribute(test_event["issue_class"]), - XunitFormatter._quote_attribute(message), - backtrace) - )) - with self.lock: - self.elements["failures"].append(result) - - def _handle_error_build(self, test_event): - """Handles a test error. - @param test_event the test event to handle. - """ - message = self._replace_invalid_xml(test_event["issue_message"]) - build_issue_description = self._replace_invalid_xml( - BuildError.format_build_error( - test_event.get("build_command", ""), - test_event.get("build_error", ""))) - - result = self._common_add_testcase_entry( - test_event, - inner_content=( - ''.format( - XunitFormatter._quote_attribute(test_event["issue_class"]), - XunitFormatter._quote_attribute(message), - build_issue_description) - )) - with self.lock: - self.elements["errors"].append(result) - - def _handle_error_standard(self, test_event): - """Handles a test error. - @param test_event the test event to handle. - """ - message = self._replace_invalid_xml(test_event["issue_message"]) - backtrace = self._replace_invalid_xml( - "".join(test_event.get("issue_backtrace", []))) - - result = self._common_add_testcase_entry( - test_event, - inner_content=( - ''.format( - XunitFormatter._quote_attribute(test_event["issue_class"]), - XunitFormatter._quote_attribute(message), - backtrace) - )) - with self.lock: - self.elements["errors"].append(result) - - def _handle_error(self, test_event): - if test_event.get("issue_phase", None) == "build": - self._handle_error_build(test_event) - else: - self._handle_error_standard(test_event) - - def _handle_exceptional_exit(self, test_event): - """Handles an exceptional exit. - @param test_event the test method or job result event to handle. - """ - if "test_name" in test_event: - name = test_event["test_name"] - else: - name = test_event.get("test_filename", "") - - message_text = "ERROR: {} ({}): {}".format( - test_event.get("exception_code", 0), - test_event.get("exception_description", ""), - name) - message = self._replace_invalid_xml(message_text) - - result = self._common_add_testcase_entry( - test_event, - inner_content=( - ''.format( - "exceptional_exit", - XunitFormatter._quote_attribute(message)) - )) - with self.lock: - self.elements["errors"].append(result) - - def _handle_timeout(self, test_event): - """Handles a test method or job timeout. - @param test_event the test method or job result event to handle. - """ - if "test_name" in test_event: - name = test_event["test_name"] - else: - name = test_event.get("test_filename", "") - - message_text = "TIMEOUT: {}".format(name) - message = self._replace_invalid_xml(message_text) - - result = self._common_add_testcase_entry( - test_event, - inner_content=( - ''.format( - XunitFormatter._quote_attribute("timeout"), - XunitFormatter._quote_attribute(message)) - )) - with self.lock: - self.elements["errors"].append(result) - - @staticmethod - def _ignore_based_on_regex_list(test_event, test_key, regex_list): - """Returns whether to ignore a test event based on patterns. - - @param test_event the test event dictionary to check. - @param test_key the key within the dictionary to check. - @param regex_list a list of zero or more regexes. May contain - zero or more compiled regexes. - - @return True if any o the regex list match based on the - re.search() method; false otherwise. - """ - for regex in regex_list: - match = regex.search(test_event.get(test_key, '')) - if match: - return True - return False - - def _handle_skip(self, test_event): - """Handles a skipped test. - @param test_event the test event to handle. - """ - - # Are we ignoring this test based on test name? - if XunitFormatter._ignore_based_on_regex_list( - test_event, 'test_name', self.ignore_skip_name_regexes): - return - - # Are we ignoring this test based on skip reason? - if XunitFormatter._ignore_based_on_regex_list( - test_event, 'skip_reason', self.ignore_skip_reason_regexes): - return - - # We're not ignoring this test. Process the skip. - reason = self._replace_invalid_xml(test_event.get("skip_reason", "")) - result = self._common_add_testcase_entry( - test_event, - inner_content=''.format( - XunitFormatter._quote_attribute(reason))) - with self.lock: - self.elements["skips"].append(result) - - def _handle_expected_failure(self, test_event): - """Handles a test that failed as expected. - @param test_event the test event to handle. - """ - if self.options.xfail == XunitFormatter.RM_PASSTHRU: - # This is not a natively-supported junit/xunit - # testcase mode, so it might fail a validating - # test results viewer. - if "bugnumber" in test_event: - bug_id_attribute = 'bug-id={} '.format( - XunitFormatter._quote_attribute(test_event["bugnumber"])) - else: - bug_id_attribute = '' - - result = self._common_add_testcase_entry( - test_event, - inner_content=( - ''.format( - bug_id_attribute, - XunitFormatter._quote_attribute( - test_event["issue_class"]), - XunitFormatter._quote_attribute( - test_event["issue_message"])) - )) - with self.lock: - self.elements["expected_failures"].append(result) - elif self.options.xfail == XunitFormatter.RM_SUCCESS: - result = self._common_add_testcase_entry(test_event) - with self.lock: - self.elements["successes"].append(result) - elif self.options.xfail == XunitFormatter.RM_FAILURE: - result = self._common_add_testcase_entry( - test_event, - inner_content=''.format( - XunitFormatter._quote_attribute(test_event["issue_class"]), - XunitFormatter._quote_attribute( - test_event["issue_message"]))) - with self.lock: - self.elements["failures"].append(result) - elif self.options.xfail == XunitFormatter.RM_IGNORE: - pass - else: - raise Exception( - "unknown xfail option: {}".format(self.options.xfail)) - - @staticmethod - def _handle_expected_timeout(test_event): - """Handles expected_timeout. - @param test_event the test event to handle. - """ - # We don't do anything with expected timeouts, not even report. - pass - - def _handle_unexpected_success(self, test_event): - """Handles a test that passed but was expected to fail. - @param test_event the test event to handle. - """ - if self.options.xpass == XunitFormatter.RM_PASSTHRU: - # This is not a natively-supported junit/xunit - # testcase mode, so it might fail a validating - # test results viewer. - result = self._common_add_testcase_entry( - test_event, - inner_content="") - with self.lock: - self.elements["unexpected_successes"].append(result) - elif self.options.xpass == XunitFormatter.RM_SUCCESS: - # Treat the xpass as a success. - result = self._common_add_testcase_entry(test_event) - with self.lock: - self.elements["successes"].append(result) - elif self.options.xpass == XunitFormatter.RM_FAILURE: - # Treat the xpass as a failure. - if "bugnumber" in test_event: - message = "unexpected success (bug_id:{})".format( - test_event["bugnumber"]) - else: - message = "unexpected success (bug_id:none)" - result = self._common_add_testcase_entry( - test_event, - inner_content=''.format( - XunitFormatter._quote_attribute("unexpected_success"), - XunitFormatter._quote_attribute(message))) - with self.lock: - self.elements["failures"].append(result) - elif self.options.xpass == XunitFormatter.RM_IGNORE: - # Ignore the xpass result as far as xUnit reporting goes. - pass - else: - raise Exception("unknown xpass option: {}".format( - self.options.xpass)) - - def _process_test_result(self, test_event): - """Processes the test_event known to be a test result. - - This categorizes the event appropriately and stores the data needed - to generate the final xUnit report. This method skips events that - cannot be represented in xUnit output. - """ - if "status" not in test_event: - raise Exception("test event dictionary missing 'status' key") - - status = test_event["status"] - if status not in self.status_handlers: - raise Exception("test event status '{}' unsupported".format( - status)) - - # Call the status handler for the test result. - self.status_handlers[status](test_event) - - def _common_add_testcase_entry(self, test_event, inner_content=None): - """Registers a testcase result, and returns the text created. - - The caller is expected to manage failure/skip/success counts - in some kind of appropriate way. This call simply constructs - the XML and appends the returned result to the self.all_results - list. - - @param test_event the test event dictionary. - - @param inner_content if specified, gets included in the - inner section, at the point before stdout and stderr would be - included. This is where a , , , etc. - could go. - - @return the text of the xml testcase element. - """ - - # Get elapsed time. - test_class = test_event.get("test_class", "") - test_name = test_event.get("test_name", "") - event_time = test_event["event_time"] - time_taken = self.elapsed_time_for_test( - test_class, test_name, event_time) - - # Plumb in stdout/stderr once we shift over to only test results. - test_stdout = '' - test_stderr = '' - - # Formulate the output xml. - if not inner_content: - inner_content = "" - result = ( - '' - '{}{}{}'.format( - test_class, - test_name, - time_taken, - inner_content, - test_stdout, - test_stderr)) - - # Save the result, update total test count. - with self.lock: - self.total_test_count += 1 - self.elements["all"].append(result) - - return result - - def _finish_output_no_lock(self): - """Flushes out the report of test executions to form valid xml output. - - xUnit output is in XML. The reporting system cannot complete the - formatting of the output without knowing when there is no more input. - This call addresses notification of the completed test run and thus is - when we can finish off the report output. - """ - - # Figure out the counts line for the testsuite. If we have - # been counting either unexpected successes or expected - # failures, we'll output those in the counts, at the risk of - # being invalidated by a validating test results viewer. - # These aren't counted by default so they won't show up unless - # the user specified a formatter option to include them. - xfail_count = len(self.elements["expected_failures"]) - xpass_count = len(self.elements["unexpected_successes"]) - if xfail_count > 0 or xpass_count > 0: - extra_testsuite_attributes = ( - ' expected-failures="{}"' - ' unexpected-successes="{}"'.format(xfail_count, xpass_count)) - else: - extra_testsuite_attributes = "" - - # Output the header. - self.out_file.write( - '\n' - '' - '\n'.format( - self.text_encoding, - "LLDB test suite", - self.total_test_count, - len(self.elements["errors"]), - len(self.elements["failures"]), - len(self.elements["skips"]), - extra_testsuite_attributes)) - - # Output each of the test result entries. - for result in self.elements["all"]: - self.out_file.write(result + '\n') - - # Close off the test suite. - self.out_file.write('\n') - - def _finish_output(self): - """Finish writing output as all incoming events have arrived.""" - with self.lock: - self._finish_output_no_lock() diff --git a/lldb/scripts/analyze-project-deps.py b/lldb/scripts/analyze-project-deps.py index a120260abfe2c..89da3dc9df7b3 100755 --- a/lldb/scripts/analyze-project-deps.py +++ b/lldb/scripts/analyze-project-deps.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python +#!/usr/bin/env python import argparse import itertools diff --git a/lldb/scripts/reproducer-replay.py b/lldb/scripts/reproducer-replay.py index 4dd3470583428..40d7cebca05d4 100755 --- a/lldb/scripts/reproducer-replay.py +++ b/lldb/scripts/reproducer-replay.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python3 +#!/usr/bin/env python3 from multiprocessing import Pool import multiprocessing diff --git a/lldb/scripts/use_lldb_suite.py b/lldb/scripts/use_lldb_suite.py index a1a2e8b936797..84380f6a5592d 100644 --- a/lldb/scripts/use_lldb_suite.py +++ b/lldb/scripts/use_lldb_suite.py @@ -8,20 +8,18 @@ def find_lldb_root(): while True: parent = os.path.dirname(lldb_root) if parent == lldb_root: # dirname('/') == '/' - break + raise Exception("use_lldb_suite_root.py not found") lldb_root = parent test_path = os.path.join(lldb_root, "use_lldb_suite_root.py") if os.path.isfile(test_path): return lldb_root - return None lldb_root = find_lldb_root() -if lldb_root is not None: - import imp - fp, pathname, desc = imp.find_module("use_lldb_suite_root", [lldb_root]) - try: - imp.load_module("use_lldb_suite_root", fp, pathname, desc) - finally: - if fp: - fp.close() +import imp +fp, pathname, desc = imp.find_module("use_lldb_suite_root", [lldb_root]) +try: + imp.load_module("use_lldb_suite_root", fp, pathname, desc) +finally: + if fp: + fp.close() diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp index ca75e91bd9069..b84e9f10fafe9 100644 --- a/lldb/source/API/SBTarget.cpp +++ b/lldb/source/API/SBTarget.cpp @@ -566,7 +566,7 @@ lldb::SBProcess SBTarget::ConnectRemote(SBListener &listener, const char *url, if (process_sp) { sb_process.SetSP(process_sp); - error.SetError(process_sp->ConnectRemote(nullptr, url)); + error.SetError(process_sp->ConnectRemote(url)); } else { error.SetErrorString("unable to create lldb_private::Process"); } diff --git a/lldb/source/Commands/CommandObjectFrame.cpp b/lldb/source/Commands/CommandObjectFrame.cpp index 6ebad9b5c488d..b42020d76751e 100644 --- a/lldb/source/Commands/CommandObjectFrame.cpp +++ b/lldb/source/Commands/CommandObjectFrame.cpp @@ -898,12 +898,14 @@ bool CommandObjectFrameRecognizerAdd::DoExecute(Args &command, RegularExpressionSP(new RegularExpression(m_options.m_module)); auto func = RegularExpressionSP(new RegularExpression(m_options.m_symbols.front())); - StackFrameRecognizerManager::AddRecognizer(recognizer_sp, module, func); + GetSelectedOrDummyTarget().GetFrameRecognizerManager().AddRecognizer( + recognizer_sp, module, func); } else { auto module = ConstString(m_options.m_module); std::vector symbols(m_options.m_symbols.begin(), m_options.m_symbols.end()); - StackFrameRecognizerManager::AddRecognizer(recognizer_sp, module, symbols); + GetSelectedOrDummyTarget().GetFrameRecognizerManager().AddRecognizer( + recognizer_sp, module, symbols); } #endif @@ -921,7 +923,9 @@ class CommandObjectFrameRecognizerClear : public CommandObjectParsed { protected: bool DoExecute(Args &command, CommandReturnObject &result) override { - StackFrameRecognizerManager::RemoveAllRecognizers(); + GetSelectedOrDummyTarget() + .GetFrameRecognizerManager() + .RemoveAllRecognizers(); result.SetStatus(eReturnStatusSuccessFinishResult); return result.Succeeded(); } @@ -941,7 +945,7 @@ class CommandObjectFrameRecognizerDelete : public CommandObjectParsed { if (request.GetCursorIndex() != 0) return; - StackFrameRecognizerManager::ForEach( + GetSelectedOrDummyTarget().GetFrameRecognizerManager().ForEach( [&request](uint32_t rid, std::string rname, std::string module, llvm::ArrayRef symbols, bool regexp) { @@ -973,7 +977,9 @@ class CommandObjectFrameRecognizerDelete : public CommandObjectParsed { return false; } - StackFrameRecognizerManager::RemoveAllRecognizers(); + GetSelectedOrDummyTarget() + .GetFrameRecognizerManager() + .RemoveAllRecognizers(); result.SetStatus(eReturnStatusSuccessFinishResult); return result.Succeeded(); } @@ -993,7 +999,9 @@ class CommandObjectFrameRecognizerDelete : public CommandObjectParsed { return false; } - StackFrameRecognizerManager::RemoveRecognizerWithID(recognizer_id); + GetSelectedOrDummyTarget() + .GetFrameRecognizerManager() + .RemoveRecognizerWithID(recognizer_id); result.SetStatus(eReturnStatusSuccessFinishResult); return result.Succeeded(); } @@ -1011,7 +1019,7 @@ class CommandObjectFrameRecognizerList : public CommandObjectParsed { protected: bool DoExecute(Args &command, CommandReturnObject &result) override { bool any_printed = false; - StackFrameRecognizerManager::ForEach( + GetSelectedOrDummyTarget().GetFrameRecognizerManager().ForEach( [&result, &any_printed]( uint32_t recognizer_id, std::string name, std::string module, llvm::ArrayRef symbols, bool regexp) { @@ -1106,8 +1114,9 @@ class CommandObjectFrameRecognizerInfo : public CommandObjectParsed { return false; } - auto recognizer = - StackFrameRecognizerManager::GetRecognizerForFrame(frame_sp); + auto recognizer = GetSelectedOrDummyTarget() + .GetFrameRecognizerManager() + .GetRecognizerForFrame(frame_sp); Stream &output_stream = result.GetOutputStream(); output_stream.Printf("frame %d ", frame_index); diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp index 3659f0db832ce..f86779d85b5fa 100644 --- a/lldb/source/Commands/CommandObjectProcess.cpp +++ b/lldb/source/Commands/CommandObjectProcess.cpp @@ -820,9 +820,15 @@ class CommandObjectProcessConnect : public CommandObjectParsed { Status error; Debugger &debugger = GetDebugger(); PlatformSP platform_sp = m_interpreter.GetPlatform(true); - ProcessSP process_sp = platform_sp->ConnectProcess( - command.GetArgumentAtIndex(0), plugin_name, debugger, - debugger.GetSelectedTarget().get(), error); + ProcessSP process_sp = + debugger.GetAsyncExecution() + ? platform_sp->ConnectProcess( + command.GetArgumentAtIndex(0), plugin_name, debugger, + debugger.GetSelectedTarget().get(), error) + : platform_sp->ConnectProcessSynchronous( + command.GetArgumentAtIndex(0), plugin_name, debugger, + result.GetOutputStream(), debugger.GetSelectedTarget().get(), + error); if (error.Fail() || process_sp == nullptr) { result.AppendError(error.AsCString("Error connecting to the process")); result.SetStatus(eReturnStatusFailed); diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index 7bb71f4d518cc..e50415f930b3b 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -4332,7 +4332,6 @@ class CommandObjectTargetSymbolsAdd : public CommandObjectParsed { module_spec.GetSymbolFileSpec() = symfile_spec; } - ArchSpec arch; bool symfile_exists = FileSystem::Instance().Exists(module_spec.GetSymbolFileSpec()); diff --git a/lldb/source/Core/DumpDataExtractor.cpp b/lldb/source/Core/DumpDataExtractor.cpp index 233a1b3735508..dbfedfae27a8c 100644 --- a/lldb/source/Core/DumpDataExtractor.cpp +++ b/lldb/source/Core/DumpDataExtractor.cpp @@ -128,6 +128,104 @@ static lldb::offset_t DumpAPInt(Stream *s, const DataExtractor &data, return offset; } +/// Dumps decoded instructions to a stream. +static lldb::offset_t DumpInstructions(const DataExtractor &DE, Stream *s, + ExecutionContextScope *exe_scope, + offset_t start_offset, + uint64_t base_addr, + size_t number_of_instructions) { + offset_t offset = start_offset; + + TargetSP target_sp; + if (exe_scope) + target_sp = exe_scope->CalculateTarget(); + if (target_sp) { + DisassemblerSP disassembler_sp( + Disassembler::FindPlugin(target_sp->GetArchitecture(), + target_sp->GetDisassemblyFlavor(), nullptr)); + if (disassembler_sp) { + lldb::addr_t addr = base_addr + start_offset; + lldb_private::Address so_addr; + bool data_from_file = true; + if (target_sp->GetSectionLoadList().ResolveLoadAddress(addr, so_addr)) { + data_from_file = false; + } else { + if (target_sp->GetSectionLoadList().IsEmpty() || + !target_sp->GetImages().ResolveFileAddress(addr, so_addr)) + so_addr.SetRawAddress(addr); + } + + size_t bytes_consumed = disassembler_sp->DecodeInstructions( + so_addr, DE, start_offset, number_of_instructions, false, + data_from_file); + + if (bytes_consumed) { + offset += bytes_consumed; + const bool show_address = base_addr != LLDB_INVALID_ADDRESS; + const bool show_bytes = true; + ExecutionContext exe_ctx; + exe_scope->CalculateExecutionContext(exe_ctx); + disassembler_sp->GetInstructionList().Dump(s, show_address, show_bytes, + &exe_ctx); + } + } + } else + s->Printf("invalid target"); + + return offset; +} + +/// Prints the specific escape sequence of the given character to the stream. +/// If the character doesn't have a known specific escape sequence (e.g., '\a', +/// '\n' but not generic escape sequences such as'\x12'), this function will +/// not modify the stream and return false. +static bool TryDumpSpecialEscapedChar(Stream &s, const char c) { + switch (c) { + case '\033': + // Common non-standard escape code for 'escape'. + s.Printf("\\e"); + return true; + case '\a': + s.Printf("\\a"); + return true; + case '\b': + s.Printf("\\b"); + return true; + case '\f': + s.Printf("\\f"); + return true; + case '\n': + s.Printf("\\n"); + return true; + case '\r': + s.Printf("\\r"); + return true; + case '\t': + s.Printf("\\t"); + return true; + case '\v': + s.Printf("\\v"); + return true; + case '\0': + s.Printf("\\0"); + return true; + default: + return false; + } +} + +/// Dump the character to a stream. A character that is not printable will be +/// represented by its escape sequence. +static void DumpCharacter(Stream &s, const char c) { + if (TryDumpSpecialEscapedChar(s, c)) + return; + if (llvm::isPrint(c)) { + s.PutChar(c); + return; + } + s.Printf("\\x%2.2x", c); +} + lldb::offset_t lldb_private::DumpDataExtractor( const DataExtractor &DE, Stream *s, offset_t start_offset, lldb::Format item_format, size_t item_byte_size, size_t item_count, @@ -147,44 +245,9 @@ lldb::offset_t lldb_private::DumpDataExtractor( offset_t offset = start_offset; - if (item_format == eFormatInstruction) { - TargetSP target_sp; - if (exe_scope) - target_sp = exe_scope->CalculateTarget(); - if (target_sp) { - DisassemblerSP disassembler_sp(Disassembler::FindPlugin( - target_sp->GetArchitecture(), - target_sp->GetDisassemblyFlavor(), nullptr)); - if (disassembler_sp) { - lldb::addr_t addr = base_addr + start_offset; - lldb_private::Address so_addr; - bool data_from_file = true; - if (target_sp->GetSectionLoadList().ResolveLoadAddress(addr, so_addr)) { - data_from_file = false; - } else { - if (target_sp->GetSectionLoadList().IsEmpty() || - !target_sp->GetImages().ResolveFileAddress(addr, so_addr)) - so_addr.SetRawAddress(addr); - } - - size_t bytes_consumed = disassembler_sp->DecodeInstructions( - so_addr, DE, start_offset, item_count, false, data_from_file); - - if (bytes_consumed) { - offset += bytes_consumed; - const bool show_address = base_addr != LLDB_INVALID_ADDRESS; - const bool show_bytes = true; - ExecutionContext exe_ctx; - exe_scope->CalculateExecutionContext(exe_ctx); - disassembler_sp->GetInstructionList().Dump(s, show_address, - show_bytes, &exe_ctx); - } - } - } else - s->Printf("invalid target"); - - return offset; - } + if (item_format == eFormatInstruction) + return DumpInstructions(DE, s, exe_scope, start_offset, base_addr, + item_count); if ((item_format == eFormatOSType || item_format == eFormatAddressInfo) && item_byte_size > 8) @@ -287,40 +350,11 @@ lldb::offset_t lldb_private::DumpDataExtractor( if (llvm::isPrint(ch)) s->Printf("%c", (char)ch); else if (item_format != eFormatCharPrintable) { - switch (ch) { - case '\033': - s->Printf("\\e"); - break; - case '\a': - s->Printf("\\a"); - break; - case '\b': - s->Printf("\\b"); - break; - case '\f': - s->Printf("\\f"); - break; - case '\n': - s->Printf("\\n"); - break; - case '\r': - s->Printf("\\r"); - break; - case '\t': - s->Printf("\\t"); - break; - case '\v': - s->Printf("\\v"); - break; - case '\0': - s->Printf("\\0"); - break; - default: + if (!TryDumpSpecialEscapedChar(*s, ch)) { if (item_byte_size == 1) s->Printf("\\x%2.2x", (uint8_t)ch); else s->Printf("%" PRIu64, ch); - break; } } else { s->PutChar(NON_PRINTABLE_CHAR); @@ -375,42 +409,7 @@ lldb::offset_t lldb_private::DumpDataExtractor( s->PutChar('\''); for (uint32_t i = 0; i < item_byte_size; ++i) { uint8_t ch = (uint8_t)(uval64 >> ((item_byte_size - i - 1) * 8)); - if (llvm::isPrint(ch)) - s->Printf("%c", ch); - else { - switch (ch) { - case '\033': - s->Printf("\\e"); - break; - case '\a': - s->Printf("\\a"); - break; - case '\b': - s->Printf("\\b"); - break; - case '\f': - s->Printf("\\f"); - break; - case '\n': - s->Printf("\\n"); - break; - case '\r': - s->Printf("\\r"); - break; - case '\t': - s->Printf("\\t"); - break; - case '\v': - s->Printf("\\v"); - break; - case '\0': - s->Printf("\\0"); - break; - default: - s->Printf("\\x%2.2x", ch); - break; - } - } + DumpCharacter(*s, ch); } s->PutChar('\''); } break; @@ -425,40 +424,7 @@ lldb::offset_t lldb_private::DumpDataExtractor( s->PutChar('\"'); while (const char c = *cstr) { - if (llvm::isPrint(c)) { - s->PutChar(c); - } else { - switch (c) { - case '\033': - s->Printf("\\e"); - break; - case '\a': - s->Printf("\\a"); - break; - case '\b': - s->Printf("\\b"); - break; - case '\f': - s->Printf("\\f"); - break; - case '\n': - s->Printf("\\n"); - break; - case '\r': - s->Printf("\\r"); - break; - case '\t': - s->Printf("\\t"); - break; - case '\v': - s->Printf("\\v"); - break; - default: - s->Printf("\\x%2.2x", c); - break; - } - } - + DumpCharacter(*s, c); ++cstr; } diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index ed69796d88c2a..b76659ee3e074 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -147,11 +147,16 @@ Module::Module(const ModuleSpec &module_spec) : module_spec.GetObjectName().AsCString(""), module_spec.GetObjectName().IsEmpty() ? "" : ")"); + auto data_sp = module_spec.GetData(); + lldb::offset_t file_size = 0; + if (data_sp) + file_size = data_sp->GetByteSize(); + // First extract all module specifications from the file using the local file // path. If there are no specifications, then don't fill anything in ModuleSpecList modules_specs; - if (ObjectFile::GetModuleSpecifications(module_spec.GetFileSpec(), 0, 0, - modules_specs) == 0) + if (ObjectFile::GetModuleSpecifications( + module_spec.GetFileSpec(), 0, file_size, modules_specs, data_sp) == 0) return; // Now make sure that one of the module specifications matches what we just @@ -170,11 +175,20 @@ Module::Module(const ModuleSpec &module_spec) return; } - if (module_spec.GetFileSpec()) - m_mod_time = FileSystem::Instance().GetModificationTime(module_spec.GetFileSpec()); - else if (matching_module_spec.GetFileSpec()) - m_mod_time = - FileSystem::Instance().GetModificationTime(matching_module_spec.GetFileSpec()); + // Set m_data_sp if it was initially provided in the ModuleSpec. Note that + // we cannot use the data_sp variable here, because it will have been + // modified by GetModuleSpecifications(). + if (auto module_spec_data_sp = module_spec.GetData()) { + m_data_sp = module_spec_data_sp; + m_mod_time = {}; + } else { + if (module_spec.GetFileSpec()) + m_mod_time = + FileSystem::Instance().GetModificationTime(module_spec.GetFileSpec()); + else if (matching_module_spec.GetFileSpec()) + m_mod_time = FileSystem::Instance().GetModificationTime( + matching_module_spec.GetFileSpec()); + } // Copy the architecture from the actual spec if we got one back, else use // the one that was specified @@ -1110,6 +1124,10 @@ void Module::ReportError(const char *format, ...) { } bool Module::FileHasChanged() const { + // We have provided the DataBuffer for this module to avoid accessing the + // filesystem. We never want to reload those files. + if (m_data_sp) + return false; if (!m_file_has_changed) m_file_has_changed = (FileSystem::Instance().GetModificationTime(m_file) != m_mod_time); @@ -1229,12 +1247,19 @@ ObjectFile *Module::GetObjectFile() { static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); Timer scoped_timer(func_cat, "Module::GetObjectFile () module = %s", GetFileSpec().GetFilename().AsCString("")); - DataBufferSP data_sp; lldb::offset_t data_offset = 0; - const lldb::offset_t file_size = - FileSystem::Instance().GetByteSize(m_file); + lldb::offset_t file_size = 0; + + if (m_data_sp) + file_size = m_data_sp->GetByteSize(); + else if (m_file) + file_size = FileSystem::Instance().GetByteSize(m_file); + if (file_size > m_object_offset) { m_did_load_objfile = true; + // FindPlugin will modify its data_sp argument. Do not let it + // modify our m_data_sp member. + auto data_sp = m_data_sp; m_objfile_sp = ObjectFile::FindPlugin( shared_from_this(), &m_file, m_object_offset, file_size - m_object_offset, data_sp, data_offset); diff --git a/lldb/source/Expression/CMakeLists.txt b/lldb/source/Expression/CMakeLists.txt index 7e2f19ed5b09a..bf94361dd6c19 100644 --- a/lldb/source/Expression/CMakeLists.txt +++ b/lldb/source/Expression/CMakeLists.txt @@ -1,7 +1,3 @@ -if(NOT LLDB_BUILT_STANDALONE) - set(tablegen_deps intrinsics_gen) -endif() - add_lldb_library(lldbExpression DiagnosticManager.cpp DWARFExpression.cpp @@ -18,7 +14,7 @@ add_lldb_library(lldbExpression UtilityFunction.cpp DEPENDS - ${tablegen_deps} + intrinsics_gen LINK_LIBS lldbCore diff --git a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm index 60eacb1e49b2c..b325bd2c5b745 100644 --- a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm +++ b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm @@ -12,8 +12,10 @@ #include "lldb/Host/HostInfo.h" #include "lldb/Utility/Args.h" #include "lldb/Utility/Log.h" +#include "Utility/UuidCompatibility.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -457,3 +459,64 @@ FileSpec path( auto it_new = g_sdk_path.insert({sdk.GetString(), GetXcodeSDK(sdk)}); return it_new.first->second; } + +namespace { +struct dyld_shared_cache_dylib_text_info { + uint64_t version; // current version 1 + // following fields all exist in version 1 + uint64_t loadAddressUnslid; + uint64_t textSegmentSize; + uuid_t dylibUuid; + const char *path; // pointer invalid at end of iterations + // following fields all exist in version 2 + uint64_t textSegmentOffset; // offset from start of cache +}; +typedef struct dyld_shared_cache_dylib_text_info + dyld_shared_cache_dylib_text_info; +} + +extern "C" int dyld_shared_cache_iterate_text( + const uuid_t cacheUuid, + void (^callback)(const dyld_shared_cache_dylib_text_info *info)); +extern "C" uint8_t *_dyld_get_shared_cache_range(size_t *length); +extern "C" bool _dyld_get_shared_cache_uuid(uuid_t uuid); + +namespace { +class SharedCacheInfo { +public: + const UUID &GetUUID() const { return m_uuid; }; + const llvm::StringMap &GetImages() const { + return m_images; + }; + + SharedCacheInfo(); + +private: + llvm::StringMap m_images; + UUID m_uuid; +}; +} + +SharedCacheInfo::SharedCacheInfo() { + size_t shared_cache_size; + uint8_t *shared_cache_start = + _dyld_get_shared_cache_range(&shared_cache_size); + uuid_t dsc_uuid; + _dyld_get_shared_cache_uuid(dsc_uuid); + m_uuid = UUID::fromData(dsc_uuid); + + dyld_shared_cache_iterate_text( + dsc_uuid, ^(const dyld_shared_cache_dylib_text_info *info) { + m_images[info->path] = SharedCacheImageInfo{ + UUID::fromData(info->dylibUuid, 16), + std::make_shared( + shared_cache_start + info->textSegmentOffset, + shared_cache_size - info->textSegmentOffset)}; + }); +} + +SharedCacheImageInfo +HostInfoMacOSX::GetSharedCacheImageInfo(llvm::StringRef image_name) { + static SharedCacheInfo g_shared_cache_info; + return g_shared_cache_info.GetImages().lookup(image_name); +} diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp index 7b0d6f343c030..569d84d39c807 100644 --- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp +++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp @@ -16,6 +16,7 @@ #include "lldb/Core/Section.h" #include "lldb/Expression/DiagnosticManager.h" #include "lldb/Host/FileSystem.h" +#include "lldb/Host/HostInfo.h" #include "lldb/Symbol/Function.h" #include "lldb/Symbol/ObjectFile.h" #include "lldb/Target/ABI.h" @@ -123,19 +124,39 @@ ModuleSP DynamicLoaderDarwin::FindTargetModuleForImageInfo( module_sp.reset(); } - if (!module_sp) { - if (can_create) { - // We'll call Target::ModulesDidLoad after all the modules have been - // added to the target, don't let it be called for every one. - module_sp = target.GetOrCreateModule(module_spec, false /* notify */); - if (!module_sp || module_sp->GetObjectFile() == nullptr) - module_sp = m_process->ReadModuleFromMemory(image_info.file_spec, - image_info.address); - - if (did_create_ptr) - *did_create_ptr = (bool)module_sp; + if (module_sp || !can_create) + return module_sp; + + if (HostInfo::GetArchitecture().IsCompatibleMatch(target.GetArchitecture())) { + // When debugging on the host, we are most likely using the same shared + // cache as our inferior. The dylibs from the shared cache might not + // exist on the filesystem, so let's use the images in our own memory + // to create the modules. + // Check if the requested image is in our shared cache. + SharedCacheImageInfo image_info = + HostInfo::GetSharedCacheImageInfo(module_spec.GetFileSpec().GetPath()); + + // If we found it and it has the correct UUID, let's proceed with + // creating a module from the memory contents. + if (image_info.uuid && + (!module_spec.GetUUID() || module_spec.GetUUID() == image_info.uuid)) { + ModuleSpec shared_cache_spec(module_spec.GetFileSpec(), image_info.uuid, + image_info.data_sp); + module_sp = + target.GetOrCreateModule(shared_cache_spec, false /* notify */); } } + // We'll call Target::ModulesDidLoad after all the modules have been + // added to the target, don't let it be called for every one. + if (!module_sp) + module_sp = target.GetOrCreateModule(module_spec, false /* notify */); + if (!module_sp || module_sp->GetObjectFile() == nullptr) + module_sp = m_process->ReadModuleFromMemory(image_info.file_spec, + image_info.address); + + if (did_create_ptr) + *did_create_ptr = (bool)module_sp; + return module_sp; } @@ -556,7 +577,8 @@ void DynamicLoaderDarwin::UpdateSpecialBinariesFromNewImageInfos( } } - if (exe_idx != UINT32_MAX) { + // Set the target executable if we haven't found one so far. + if (exe_idx != UINT32_MAX && !target.GetExecutableModule()) { const bool can_create = true; ModuleSP exe_module_sp(FindTargetModuleForImageInfo(image_infos[exe_idx], can_create, nullptr)); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt b/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt index 69696b9aa76ad..04f6cdf9d9bd5 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt +++ b/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt @@ -1,7 +1,3 @@ -if(NOT LLDB_BUILT_STANDALONE) - set(tablegen_deps intrinsics_gen) -endif() - add_lldb_library(lldbPluginExpressionParserClang ASTResultSynthesizer.cpp ASTStructExtractor.cpp @@ -29,7 +25,7 @@ add_lldb_library(lldbPluginExpressionParserClang NameSearchContext.cpp DEPENDS - ${tablegen_deps} + intrinsics_gen LINK_LIBS lldbCore diff --git a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp index da910f48e59a2..648fc4adf24fd 100644 --- a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp +++ b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp @@ -818,13 +818,14 @@ bool lldb_private::formatters::NSDateSummaryProvider( static const ConstString g___NSDate("__NSDate"); static const ConstString g___NSTaggedDate("__NSTaggedDate"); static const ConstString g_NSCalendarDate("NSCalendarDate"); + static const ConstString g_NSConstantDate("NSConstantDate"); if (class_name.IsEmpty()) return false; uint64_t info_bits = 0, value_bits = 0; if ((class_name == g_NSDate) || (class_name == g___NSDate) || - (class_name == g___NSTaggedDate)) { + (class_name == g___NSTaggedDate) || (class_name == g_NSConstantDate)) { if (descriptor->GetTaggedPointerInfo(&info_bits, &value_bits)) { date_value_bits = ((value_bits << 8) | (info_bits << 4)); memcpy(&date_value, &date_value_bits, sizeof(date_value_bits)); @@ -850,8 +851,14 @@ bool lldb_private::formatters::NSDateSummaryProvider( } else return false; - if (date_value == -63114076800) { - stream.Printf("0001-12-30 00:00:00 +0000"); + // FIXME: It seems old dates are not formatted according to NSDate's calendar + // so we hardcode distantPast's value so that it looks like LLDB is doing + // the right thing. + + // The relative time in seconds from Cocoa Epoch to [NSDate distantPast]. + const double RelSecondsFromCocoaEpochToNSDateDistantPast = -63114076800; + if (date_value == RelSecondsFromCocoaEpochToNSDateDistantPast) { + stream.Printf("0001-01-01 00:00:00 UTC"); return true; } diff --git a/lldb/source/Plugins/Language/ObjC/NSArray.cpp b/lldb/source/Plugins/Language/ObjC/NSArray.cpp index e1c789ce26d8d..8d648d8a08614 100644 --- a/lldb/source/Plugins/Language/ObjC/NSArray.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSArray.cpp @@ -98,42 +98,46 @@ class GenericNSArrayMSyntheticFrontEnd : public NSArrayMSyntheticFrontEndBase { }; namespace Foundation1010 { - struct DataDescriptor_32 { - uint32_t _used; - uint32_t _offset; - uint32_t _size : 28; - uint64_t _priv1 : 4; - uint32_t _priv2; - uint32_t _data; - }; - - struct DataDescriptor_64 { - uint64_t _used; - uint64_t _offset; - uint64_t _size : 60; - uint64_t _priv1 : 4; - uint32_t _priv2; - uint64_t _data; - }; + namespace { + struct DataDescriptor_32 { + uint32_t _used; + uint32_t _offset; + uint32_t _size : 28; + uint64_t _priv1 : 4; + uint32_t _priv2; + uint32_t _data; + }; + + struct DataDescriptor_64 { + uint64_t _used; + uint64_t _offset; + uint64_t _size : 60; + uint64_t _priv1 : 4; + uint32_t _priv2; + uint64_t _data; + }; + } using NSArrayMSyntheticFrontEnd = GenericNSArrayMSyntheticFrontEnd; } namespace Foundation1428 { - struct DataDescriptor_32 { - uint32_t _used; - uint32_t _offset; - uint32_t _size; - uint32_t _data; - }; - - struct DataDescriptor_64 { - uint64_t _used; - uint64_t _offset; - uint64_t _size; - uint64_t _data; - }; + namespace { + struct DataDescriptor_32 { + uint32_t _used; + uint32_t _offset; + uint32_t _size; + uint32_t _data; + }; + + struct DataDescriptor_64 { + uint64_t _used; + uint64_t _offset; + uint64_t _size; + uint64_t _data; + }; + } using NSArrayMSyntheticFrontEnd = GenericNSArrayMSyntheticFrontEnd; diff --git a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp index e4e51de9ddfcd..3dc07678f92f5 100644 --- a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp @@ -278,64 +278,67 @@ namespace Foundation1100 { } namespace Foundation1428 { - struct DataDescriptor_32 { - uint32_t _used : 26; - uint32_t _kvo : 1; - uint32_t _size; - uint32_t _buffer; - uint64_t GetSize() { return _size; } - }; - - struct DataDescriptor_64 { - uint64_t _used : 58; - uint32_t _kvo : 1; - uint64_t _size; - uint64_t _buffer; - uint64_t GetSize() { return _size; } - }; - - - + namespace { + struct DataDescriptor_32 { + uint32_t _used : 26; + uint32_t _kvo : 1; + uint32_t _size; + uint32_t _buffer; + uint64_t GetSize() { return _size; } + }; + + struct DataDescriptor_64 { + uint64_t _used : 58; + uint32_t _kvo : 1; + uint64_t _size; + uint64_t _buffer; + uint64_t GetSize() { return _size; } + }; + } + using NSDictionaryMSyntheticFrontEnd = GenericNSDictionaryMSyntheticFrontEnd; } namespace Foundation1437 { - static const uint64_t NSDictionaryCapacities[] = { - 0, 3, 7, 13, 23, 41, 71, 127, 191, 251, 383, 631, 1087, 1723, - 2803, 4523, 7351, 11959, 19447, 31231, 50683, 81919, 132607, - 214519, 346607, 561109, 907759, 1468927, 2376191, 3845119, - 6221311, 10066421, 16287743, 26354171, 42641881, 68996069, - 111638519, 180634607, 292272623, 472907251 - }; - - static const size_t NSDictionaryNumSizeBuckets = sizeof(NSDictionaryCapacities) / sizeof(uint64_t); - - struct DataDescriptor_32 { - uint32_t _buffer; - uint32_t _muts; - uint32_t _used : 25; - uint32_t _kvo : 1; - uint32_t _szidx : 6; + namespace { + static const uint64_t NSDictionaryCapacities[] = { + 0, 3, 7, 13, 23, 41, 71, 127, 191, 251, 383, 631, 1087, 1723, + 2803, 4523, 7351, 11959, 19447, 31231, 50683, 81919, 132607, + 214519, 346607, 561109, 907759, 1468927, 2376191, 3845119, + 6221311, 10066421, 16287743, 26354171, 42641881, 68996069, + 111638519, 180634607, 292272623, 472907251 + }; + + static const size_t NSDictionaryNumSizeBuckets = + sizeof(NSDictionaryCapacities) / sizeof(uint64_t); + + struct DataDescriptor_32 { + uint32_t _buffer; + uint32_t _muts; + uint32_t _used : 25; + uint32_t _kvo : 1; + uint32_t _szidx : 6; - uint64_t GetSize() { - return (_szidx) >= NSDictionaryNumSizeBuckets ? - 0 : NSDictionaryCapacities[_szidx]; - } - }; - - struct DataDescriptor_64 { - uint64_t _buffer; - uint32_t _muts; - uint32_t _used : 25; - uint32_t _kvo : 1; - uint32_t _szidx : 6; + uint64_t GetSize() { + return (_szidx) >= NSDictionaryNumSizeBuckets ? + 0 : NSDictionaryCapacities[_szidx]; + } + }; + + struct DataDescriptor_64 { + uint64_t _buffer; + uint32_t _muts; + uint32_t _used : 25; + uint32_t _kvo : 1; + uint32_t _szidx : 6; - uint64_t GetSize() { - return (_szidx) >= NSDictionaryNumSizeBuckets ? - 0 : NSDictionaryCapacities[_szidx]; - } - }; + uint64_t GetSize() { + return (_szidx) >= NSDictionaryNumSizeBuckets ? + 0 : NSDictionaryCapacities[_szidx]; + } + }; + } using NSDictionaryMSyntheticFrontEnd = GenericNSDictionaryMSyntheticFrontEnd; diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp index ac9a093940217..ae77dfeb4ad4b 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp @@ -407,7 +407,7 @@ ExtractRuntimeGlobalSymbol(Process *process, ConstString name, } } -static void RegisterObjCExceptionRecognizer(); +static void RegisterObjCExceptionRecognizer(Process *process); AppleObjCRuntimeV2::AppleObjCRuntimeV2(Process *process, const ModuleSP &objc_module_sp) @@ -429,7 +429,7 @@ AppleObjCRuntimeV2::AppleObjCRuntimeV2(Process *process, m_has_object_getClass = (objc_module_sp->FindFirstSymbolWithNameAndType( g_gdb_object_getClass, eSymbolTypeCode) != nullptr); - RegisterObjCExceptionRecognizer(); + RegisterObjCExceptionRecognizer(process); } bool AppleObjCRuntimeV2::GetDynamicTypeAndAddress( @@ -2711,16 +2711,14 @@ class ObjCExceptionThrowFrameRecognizer : public StackFrameRecognizer { }; }; -static void RegisterObjCExceptionRecognizer() { - static llvm::once_flag g_once_flag; - llvm::call_once(g_once_flag, []() { - FileSpec module; - ConstString function; - std::tie(module, function) = AppleObjCRuntime::GetExceptionThrowLocation(); - std::vector symbols = {function}; - StackFrameRecognizerManager::AddRecognizer( - StackFrameRecognizerSP(new ObjCExceptionThrowFrameRecognizer()), - module.GetFilename(), symbols, - /*first_instruction_only*/ true); - }); +static void RegisterObjCExceptionRecognizer(Process *process) { + FileSpec module; + ConstString function; + std::tie(module, function) = AppleObjCRuntime::GetExceptionThrowLocation(); + std::vector symbols = {function}; + + process->GetTarget().GetFrameRecognizerManager().AddRecognizer( + StackFrameRecognizerSP(new ObjCExceptionThrowFrameRecognizer()), + module.GetFilename(), symbols, + /*first_instruction_only*/ true); } diff --git a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/CMakeLists.txt b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/CMakeLists.txt index c122e09e8febe..9efb2c44d846e 100644 --- a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/CMakeLists.txt +++ b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/CMakeLists.txt @@ -1,8 +1,3 @@ -if(NOT LLDB_BUILT_STANDALONE) - set(tablegen_deps intrinsics_gen) -endif() - - add_lldb_library(lldbPluginRenderScriptRuntime PLUGIN RenderScriptRuntime.cpp RenderScriptExpressionOpts.cpp @@ -10,7 +5,7 @@ add_lldb_library(lldbPluginRenderScriptRuntime PLUGIN RenderScriptScriptGroup.cpp DEPENDS - ${tablegen_deps} + intrinsics_gen LINK_LIBS lldbBreakpoint diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index beee2f5b6df79..bca575b7f8842 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -541,7 +541,8 @@ size_t ObjectFileELF::GetModuleSpecifications( __FUNCTION__, file.GetPath().c_str()); } - data_sp = MapFileData(file, -1, file_offset); + if (data_sp->GetByteSize() < length) + data_sp = MapFileData(file, -1, file_offset); if (data_sp) data.SetData(data_sp); // In case there is header extension in the section #0, the header we @@ -580,8 +581,7 @@ size_t ObjectFileELF::GetModuleSpecifications( func_cat, "Calculating module crc32 %s with size %" PRIu64 " KiB", file.GetLastPathComponent().AsCString(), - (FileSystem::Instance().GetByteSize(file) - file_offset) / - 1024); + (length - file_offset) / 1024); // For core files - which usually don't happen to have a // gnu_debuglink, and are pretty bulky - calculating whole diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 2bb4b21adeaec..ab1a6a8bb5f3e 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -47,8 +47,8 @@ #include "ObjectFileMachO.h" -#if defined(__APPLE__) && \ - (defined(__arm__) || defined(__arm64__) || defined(__aarch64__)) +#if defined(__APPLE__) +#include // GetLLDBSharedCacheUUID() needs to call dlsym() #include #endif @@ -1328,6 +1328,19 @@ void ObjectFileMachO::SanitizeSegmentCommand(segment_command_64 &seg_cmd, if (m_length == 0 || seg_cmd.filesize == 0) return; + if ((m_header.flags & MH_DYLIB_IN_CACHE) && !IsInMemory()) { + // In shared cache images, the load commands are relative to the + // shared cache file, and not the the specific image we are + // examining. Let's fix this up so that it looks like a normal + // image. + if (strncmp(seg_cmd.segname, "__TEXT", sizeof(seg_cmd.segname)) == 0) + m_text_address = seg_cmd.vmaddr; + if (strncmp(seg_cmd.segname, "__LINKEDIT", sizeof(seg_cmd.segname)) == 0) + m_linkedit_original_offset = seg_cmd.fileoff; + + seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address; + } + if (seg_cmd.fileoff > m_length) { // We have a load command that says it extends past the end of the file. // This is likely a corrupt file. We don't have any way to return an error @@ -1664,6 +1677,10 @@ void ObjectFileMachO::ProcessSegmentCommand(const load_command &load_cmd_, if (m_data.GetU32(&offset, §64.offset, num_u32s) == nullptr) break; + if ((m_header.flags & MH_DYLIB_IN_CACHE) && !IsInMemory()) { + sect64.offset = sect64.addr - m_text_address; + } + // Keep a list of mach sections around in case we need to get at data that // isn't stored in the abstracted Sections. m_mach_sections.push_back(sect64); @@ -2264,14 +2281,17 @@ size_t ObjectFileMachO::ParseSymtab() { Process *process = process_sp.get(); uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete; + bool is_shared_cache_image = m_header.flags & MH_DYLIB_IN_CACHE; + bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory(); + SectionSP linkedit_section_sp( + section_list->FindSectionByName(GetSegmentNameLINKEDIT())); - if (process && m_header.filetype != llvm::MachO::MH_OBJECT) { + if (process && m_header.filetype != llvm::MachO::MH_OBJECT && + !is_local_shared_cache_image) { Target &target = process->GetTarget(); memory_module_load_level = target.GetMemoryModuleLoadLevel(); - SectionSP linkedit_section_sp( - section_list->FindSectionByName(GetSegmentNameLINKEDIT())); // Reading mach file from memory in a process or core file... if (linkedit_section_sp) { @@ -2293,62 +2313,6 @@ size_t ObjectFileMachO::ParseSymtab() { strtab_addr = linkedit_load_addr + symtab_load_command.stroff - linkedit_file_offset; - bool data_was_read = false; - -#if defined(__APPLE__) && \ - (defined(__arm__) || defined(__arm64__) || defined(__aarch64__)) - if (m_header.flags & MH_DYLIB_IN_CACHE && - process->GetAddressByteSize() == sizeof(void *)) { - // This mach-o memory file is in the dyld shared cache. If this - // program is not remote and this is iOS, then this process will - // share the same shared cache as the process we are debugging and we - // can read the entire __LINKEDIT from the address space in this - // process. This is a needed optimization that is used for local iOS - // debugging only since all shared libraries in the shared cache do - // not have corresponding files that exist in the file system of the - // device. They have been combined into a single file. This means we - // always have to load these files from memory. All of the symbol and - // string tables from all of the __LINKEDIT sections from the shared - // libraries in the shared cache have been merged into a single large - // symbol and string table. Reading all of this symbol and string - // table data across can slow down debug launch times, so we optimize - // this by reading the memory for the __LINKEDIT section from this - // process. - - UUID lldb_shared_cache; - addr_t lldb_shared_cache_addr; - GetLLDBSharedCacheUUID(lldb_shared_cache_addr, lldb_shared_cache); - UUID process_shared_cache; - addr_t process_shared_cache_addr; - GetProcessSharedCacheUUID(process, process_shared_cache_addr, - process_shared_cache); - bool use_lldb_cache = true; - if (lldb_shared_cache.IsValid() && process_shared_cache.IsValid() && - (lldb_shared_cache != process_shared_cache || - process_shared_cache_addr != lldb_shared_cache_addr)) { - use_lldb_cache = false; - } - - PlatformSP platform_sp(target.GetPlatform()); - if (platform_sp && platform_sp->IsHost() && use_lldb_cache) { - data_was_read = true; - nlist_data.SetData((void *)symoff_addr, nlist_data_byte_size, - eByteOrderLittle); - strtab_data.SetData((void *)strtab_addr, strtab_data_byte_size, - eByteOrderLittle); - if (function_starts_load_command.cmd) { - const addr_t func_start_addr = - linkedit_load_addr + function_starts_load_command.dataoff - - linkedit_file_offset; - function_starts_data.SetData((void *)func_start_addr, - function_starts_load_command.datasize, - eByteOrderLittle); - } - } - } -#endif - - if (!data_was_read) { // Always load dyld - the dynamic linker - from memory if we didn't // find a binary anywhere else. lldb will not register // dylib/framework/bundle loads/unloads if we don't have the dyld @@ -2379,7 +2343,7 @@ size_t ObjectFileMachO::ParseSymtab() { // problem. For binaries outside the shared cache, it's faster to // read the entire strtab at once instead of piece-by-piece as we // process the nlist records. - if ((m_header.flags & MH_DYLIB_IN_CACHE) == 0) { + if (!is_shared_cache_image) { DataBufferSP strtab_data_sp( ReadMemory(process_sp, strtab_addr, strtab_data_byte_size)); if (strtab_data_sp) { @@ -2388,7 +2352,6 @@ size_t ObjectFileMachO::ParseSymtab() { } } } - } if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) { if (function_starts_load_command.cmd) { const addr_t func_start_addr = @@ -2405,6 +2368,24 @@ size_t ObjectFileMachO::ParseSymtab() { } } } else { + if (is_local_shared_cache_image) { + // The load commands in shared cache images are relative to the + // beginning of the shared cache, not the library image. The + // data we get handed when creating the ObjectFileMachO starts + // at the beginning of a specific library and spans to the end + // of the cache to be able to reach the shared LINKEDIT + // segments. We need to convert the load command offsets to be + // relative to the beginning of our specific image. + lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset(); + lldb::offset_t linkedit_slide = + linkedit_offset - m_linkedit_original_offset; + symtab_load_command.symoff += linkedit_slide; + symtab_load_command.stroff += linkedit_slide; + dyld_info.export_off += linkedit_slide; + m_dysymtab.indirectsymoff += linkedit_slide; + function_starts_load_command.dataoff += linkedit_slide; + } + nlist_data.SetData(m_data, symtab_load_command.symoff, nlist_data_byte_size); strtab_data.SetData(m_data, symtab_load_command.stroff, @@ -5807,8 +5788,7 @@ void ObjectFileMachO::GetLLDBSharedCacheUUID(addr_t &base_addr, UUID &uuid) { uuid.Clear(); base_addr = LLDB_INVALID_ADDRESS; -#if defined(__APPLE__) && \ - (defined(__arm__) || defined(__arm64__) || defined(__aarch64__)) +#if defined(__APPLE__) uint8_t *(*dyld_get_all_image_infos)(void); dyld_get_all_image_infos = (uint8_t * (*)()) dlsym(RTLD_DEFAULT, "_dyld_get_all_image_infos"); diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h index 979e637ef6fd8..0c1d178b19215 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h @@ -225,6 +225,8 @@ class ObjectFileMachO : public lldb_private::ObjectFile { typedef lldb_private::RangeVector FileRangeArray; lldb_private::Address m_entry_point_address; FileRangeArray m_thread_context_offsets; + lldb::offset_t m_linkedit_original_offset; + lldb::addr_t m_text_address; bool m_thread_context_offsets_valid; lldb_private::FileSpecList m_reexported_dylibs; bool m_allow_assembly_emulation_unwind_plans; diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp index d2227bde47e92..5feec8167186d 100644 --- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp +++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp @@ -169,8 +169,9 @@ size_t ObjectFilePECOFF::GetModuleSpecifications( Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT)); - if (DataBufferSP full_sp = MapFileData(file, -1, file_offset)) - data_sp = std::move(full_sp); + if (data_sp->GetByteSize() < length) + if (DataBufferSP full_sp = MapFileData(file, -1, file_offset)) + data_sp = std::move(full_sp); auto binary = llvm::object::createBinary(llvm::MemoryBufferRef( toStringRef(data_sp->GetData()), file.GetFilename().GetStringRef())); @@ -539,12 +540,9 @@ DataExtractor ObjectFilePECOFF::ReadImageData(uint32_t offset, size_t size) { if (!size) return {}; - if (m_file) { - // A bit of a hack, but we intend to write to this buffer, so we can't - // mmap it. - auto buffer_sp = MapFileData(m_file, size, offset); - return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize()); - } + if (m_data.ValidOffsetForDataOfSize(offset, size)) + return DataExtractor(m_data, offset, size); + ProcessSP process_sp(m_process_wp.lock()); DataExtractor data; if (process_sp) { @@ -562,13 +560,11 @@ DataExtractor ObjectFilePECOFF::ReadImageData(uint32_t offset, size_t size) { } DataExtractor ObjectFilePECOFF::ReadImageDataByRVA(uint32_t rva, size_t size) { - if (m_file) { - Address addr = GetAddress(rva); - SectionSP sect = addr.GetSection(); - if (!sect) - return {}; - rva = sect->GetFileOffset() + addr.GetOffset(); - } + Address addr = GetAddress(rva); + SectionSP sect = addr.GetSection(); + if (!sect) + return {}; + rva = sect->GetFileOffset() + addr.GetOffset(); return ReadImageData(rva, size); } @@ -650,12 +646,6 @@ Symtab *ObjectFilePECOFF::GetSymtab() { DataExtractor strtab_data = ReadImageData( m_coff_header.symoff + symbol_data_size, strtab_size); - // First 4 bytes should be zeroed after strtab_size has been read, - // because it is used as offset 0 to encode a NULL string. - uint32_t *strtab_data_start = const_cast( - reinterpret_cast(strtab_data.GetDataStart())); - strtab_data_start[0] = 0; - offset = 0; std::string symbol_name; Symbol *symbols = m_symtab_up->Resize(num_syms); diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp index f5ec08a1a199c..d31559bc90183 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp @@ -237,6 +237,30 @@ lldb_private::Status PlatformDarwin::GetSharedModuleWithLocalCache( Status err; + if (IsHost()) { + // When debugging on the host, we are most likely using the same shared + // cache as our inferior. The dylibs from the shared cache might not + // exist on the filesystem, so let's use the images in our own memory + // to create the modules. + + // Check if the requested image is in our shared cache. + SharedCacheImageInfo image_info = + HostInfo::GetSharedCacheImageInfo(module_spec.GetFileSpec().GetPath()); + + // If we found it and it has the correct UUID, let's proceed with + // creating a module from the memory contents. + if (image_info.uuid && + (!module_spec.GetUUID() || module_spec.GetUUID() == image_info.uuid)) { + ModuleSpec shared_cache_spec(module_spec.GetFileSpec(), image_info.uuid, + image_info.data_sp); + err = ModuleList::GetSharedModule(shared_cache_spec, module_sp, + module_search_paths_ptr, + old_module_sp_ptr, did_create_ptr); + if (module_sp) + return err; + } + } + err = ModuleList::GetSharedModule(module_spec, module_sp, module_search_paths_ptr, old_module_sp_ptr, did_create_ptr); diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp index 18631a0c53156..21bf7f4ac46d3 100644 --- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp +++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp @@ -503,10 +503,10 @@ lldb::ProcessSP PlatformRemoteGDBServer::DebugProcess( "gdb-remote", nullptr); if (process_sp) { - error = process_sp->ConnectRemote(nullptr, connect_url.c_str()); + error = process_sp->ConnectRemote(connect_url.c_str()); // Retry the connect remote one time... if (error.Fail()) - error = process_sp->ConnectRemote(nullptr, connect_url.c_str()); + error = process_sp->ConnectRemote(connect_url.c_str()); if (error.Success()) error = process_sp->Launch(launch_info); else if (debugserver_pid != LLDB_INVALID_PROCESS_ID) { @@ -589,7 +589,7 @@ lldb::ProcessSP PlatformRemoteGDBServer::Attach( target->CreateProcess(attach_info.GetListenerForProcess(debugger), "gdb-remote", nullptr); if (process_sp) { - error = process_sp->ConnectRemote(nullptr, connect_url.c_str()); + error = process_sp->ConnectRemote(connect_url.c_str()); if (error.Success()) { ListenerSP listener_sp = attach_info.GetHijackListener(); if (listener_sp) diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp index 5b728a5f2960f..dde25184a8c59 100644 --- a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp +++ b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp @@ -217,7 +217,7 @@ bool ProcessKDP::GetHostArchitecture(ArchSpec &arch) { return false; } -Status ProcessKDP::DoConnectRemote(Stream *strm, llvm::StringRef remote_url) { +Status ProcessKDP::DoConnectRemote(llvm::StringRef remote_url) { Status error; // Don't let any JIT happen when doing KDP as we can't allocate memory and we @@ -873,7 +873,7 @@ class CommandObjectProcessKDPPacketSend : public CommandObjectParsed { OptionGroupUInt64 m_command_byte; OptionGroupString m_packet_data; - virtual Options *GetOptions() { return &m_option_group; } + Options *GetOptions() override { return &m_option_group; } public: CommandObjectProcessKDPPacketSend(CommandInterpreter &interpreter) @@ -900,7 +900,7 @@ class CommandObjectProcessKDPPacketSend : public CommandObjectParsed { ~CommandObjectProcessKDPPacketSend() {} - bool DoExecute(Args &command, CommandReturnObject &result) { + bool DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); if (argc == 0) { if (!m_command_byte.GetOptionValue().OptionWasSet()) { diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.h b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.h index 67f8ac0698204..52af56134404c 100644 --- a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.h +++ b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.h @@ -67,8 +67,7 @@ class ProcessKDP : public lldb_private::Process { WillAttachToProcessWithName(const char *process_name, bool wait_for_launch) override; - lldb_private::Status DoConnectRemote(lldb_private::Stream *strm, - llvm::StringRef remote_url) override; + lldb_private::Status DoConnectRemote(llvm::StringRef remote_url) override; lldb_private::Status DoAttachToProcessWithID( lldb::pid_t pid, diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm.h b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm.h index 616aff8afda7e..35ae0d03e2bbd 100644 --- a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm.h +++ b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm.h @@ -20,21 +20,21 @@ class RegisterContextKDP_arm : public RegisterContextDarwin_arm { virtual ~RegisterContextKDP_arm(); protected: - virtual int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr); + int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override; - int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu); + int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override; - int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc); + int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override; - int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg); + int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override; - int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr); + int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override; - int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu); + int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override; - int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc); + int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override; - int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg); + int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override; ThreadKDP &m_kdp_thread; }; diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm64.h b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm64.h index 998a78a6b8af7..be387d69c6bcb 100644 --- a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm64.h +++ b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm64.h @@ -21,21 +21,21 @@ class RegisterContextKDP_arm64 : public RegisterContextDarwin_arm64 { virtual ~RegisterContextKDP_arm64(); protected: - virtual int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr); + int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override; - int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu); + int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override; - int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc); + int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override; - int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg); + int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override; - int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr); + int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override; - int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu); + int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override; - int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc); + int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override; - int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg); + int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override; ThreadKDP &m_kdp_thread; }; diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_i386.h b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_i386.h index f32c88e6cfc5b..9ee6af7cc573d 100644 --- a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_i386.h +++ b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_i386.h @@ -20,17 +20,17 @@ class RegisterContextKDP_i386 : public RegisterContextDarwin_i386 { virtual ~RegisterContextKDP_i386(); protected: - virtual int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr); + int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override; - int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu); + int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override; - int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc); + int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override; - int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr); + int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override; - int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu); + int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override; - int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc); + int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override; ThreadKDP &m_kdp_thread; }; diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_x86_64.h b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_x86_64.h index c4aad972ab56d..3d5139d0b613d 100644 --- a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_x86_64.h +++ b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_x86_64.h @@ -20,17 +20,17 @@ class RegisterContextKDP_x86_64 : public RegisterContextDarwin_x86_64 { virtual ~RegisterContextKDP_x86_64(); protected: - virtual int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr); + int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override; - int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu); + int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override; - int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc); + int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override; - int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr); + int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override; - int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu); + int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override; - int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc); + int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override; ThreadKDP &m_kdp_thread; }; diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/ThreadKDP.h b/lldb/source/Plugins/Process/MacOSX-Kernel/ThreadKDP.h index c75540a77302f..7f13fcbeb4a55 100644 --- a/lldb/source/Plugins/Process/MacOSX-Kernel/ThreadKDP.h +++ b/lldb/source/Plugins/Process/MacOSX-Kernel/ThreadKDP.h @@ -22,16 +22,16 @@ class ThreadKDP : public lldb_private::Thread { virtual ~ThreadKDP(); - virtual void RefreshStateAfterStop(); + void RefreshStateAfterStop() override; - virtual const char *GetName(); + const char *GetName() override; - virtual const char *GetQueueName(); + const char *GetQueueName() override; - virtual lldb::RegisterContextSP GetRegisterContext(); + lldb::RegisterContextSP GetRegisterContext() override; - virtual lldb::RegisterContextSP - CreateRegisterContextForFrame(lldb_private::StackFrame *frame); + lldb::RegisterContextSP + CreateRegisterContextForFrame(lldb_private::StackFrame *frame) override; void Dump(lldb_private::Log *log, uint32_t index); @@ -41,7 +41,7 @@ class ThreadKDP : public lldb_private::Thread { const char *GetBasicInfoAsString(); - void SetName(const char *name) { + void SetName(const char *name) override { if (name && name[0]) m_thread_name.assign(name); else @@ -66,7 +66,7 @@ class ThreadKDP : public lldb_private::Thread { lldb::addr_t m_thread_dispatch_qaddr; lldb::StopInfoSP m_cached_stop_info_sp; // Protected member functions. - virtual bool CalculateStopInfo(); + bool CalculateStopInfo() override; }; #endif // LLDB_SOURCE_PLUGINS_PROCESS_MACOSX_KERNEL_THREADKDP_H diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextMach_arm.h b/lldb/source/Plugins/Process/Utility/RegisterContextMach_arm.h index e7c180dbdd27e..1ceca65c97c37 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextMach_arm.h +++ b/lldb/source/Plugins/Process/Utility/RegisterContextMach_arm.h @@ -19,21 +19,21 @@ class RegisterContextMach_arm : public RegisterContextDarwin_arm { virtual ~RegisterContextMach_arm(); protected: - virtual int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr); + int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override; - int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu); + int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override; - int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc); + int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override; - int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg); + int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override; - int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr); + int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override; - int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu); + int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override; - int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc); + int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override; - int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg); + int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override; }; #endif // LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERCONTEXTMACH_ARM_H diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextMach_i386.h b/lldb/source/Plugins/Process/Utility/RegisterContextMach_i386.h index 09966be60c921..da5411eb2de24 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextMach_i386.h +++ b/lldb/source/Plugins/Process/Utility/RegisterContextMach_i386.h @@ -19,17 +19,17 @@ class RegisterContextMach_i386 : public RegisterContextDarwin_i386 { virtual ~RegisterContextMach_i386(); protected: - virtual int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr); + int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override; - int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu); + int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override; - int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc); + int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override; - int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr); + int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override; - int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu); + int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override; - int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc); + int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override; }; #endif // LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERCONTEXTMACH_I386_H diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextMach_x86_64.h b/lldb/source/Plugins/Process/Utility/RegisterContextMach_x86_64.h index 2a8a2cca2f8a8..c131c8282bd22 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextMach_x86_64.h +++ b/lldb/source/Plugins/Process/Utility/RegisterContextMach_x86_64.h @@ -20,17 +20,17 @@ class RegisterContextMach_x86_64 : public RegisterContextDarwin_x86_64 { virtual ~RegisterContextMach_x86_64(); protected: - virtual int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr); + int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override; - int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu); + int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override; - int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc); + int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override; - int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr); + int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override; - int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu); + int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override; - int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc); + int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override; }; #endif // LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERCONTEXTMACH_X86_64_H diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index ff263fa162587..1fed8e0642670 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -629,8 +629,7 @@ Status ProcessGDBRemote::WillAttachToProcessWithName(const char *process_name, return WillLaunchOrAttach(); } -Status ProcessGDBRemote::DoConnectRemote(Stream *strm, - llvm::StringRef remote_url) { +Status ProcessGDBRemote::DoConnectRemote(llvm::StringRef remote_url) { Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); Status error(WillLaunchOrAttach()); diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h index 22d86d6cdd75d..ba967727ae3b8 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h @@ -85,7 +85,7 @@ class ProcessGDBRemote : public Process, Status WillAttachToProcessWithName(const char *process_name, bool wait_for_launch) override; - Status DoConnectRemote(Stream *strm, llvm::StringRef remote_url) override; + Status DoConnectRemote(llvm::StringRef remote_url) override; Status WillLaunchOrAttach(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 7de88274ccf6e..2d1db66e7fd91 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -1958,9 +1958,9 @@ bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die, ClangASTImporter::LayoutInfo layout_info; if (die.HasChildren()) { - LanguageType class_language = eLanguageTypeUnknown; - if (TypeSystemClang::IsObjCObjectOrInterfaceType(clang_type)) { - class_language = eLanguageTypeObjC; + const bool type_is_objc_object_or_interface = + TypeSystemClang::IsObjCObjectOrInterfaceType(clang_type); + if (type_is_objc_object_or_interface) { // For objective C we don't start the definition when the class is // created. TypeSystemClang::StartTagDeclarationDefinition(clang_type); @@ -1986,16 +1986,15 @@ bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die, std::vector member_function_dies; DelayedPropertyList delayed_properties; - ParseChildMembers(die, clang_type, class_language, bases, - member_accessibilities, member_function_dies, - delayed_properties, default_accessibility, is_a_class, - layout_info); + ParseChildMembers(die, clang_type, bases, member_accessibilities, + member_function_dies, delayed_properties, + default_accessibility, is_a_class, layout_info); // Now parse any methods if there were any... for (const DWARFDIE &die : member_function_dies) dwarf->ResolveType(die); - if (class_language == eLanguageTypeObjC) { + if (type_is_objc_object_or_interface) { ConstString class_name(clang_type.GetTypeName()); if (class_name) { dwarf->GetObjCMethods(class_name, [&](DWARFDIE method_die) { @@ -2012,7 +2011,7 @@ bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die, // If we have a DW_TAG_structure_type instead of a DW_TAG_class_type we // need to tell the clang type it is actually a class. - if (class_language != eLanguageTypeObjC) { + if (!type_is_objc_object_or_interface) { if (is_a_class && tag_decl_kind != clang::TTK_Class) m_ast.SetTagTypeKind(ClangUtil::GetQualType(clang_type), clang::TTK_Class); @@ -2346,7 +2345,6 @@ Function *DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit, void DWARFASTParserClang::ParseSingleMember( const DWARFDIE &die, const DWARFDIE &parent_die, const lldb_private::CompilerType &class_clang_type, - const lldb::LanguageType class_language, std::vector &member_accessibilities, lldb::AccessType default_accessibility, DelayedPropertyList &delayed_properties, @@ -2362,399 +2360,400 @@ void DWARFASTParserClang::ParseSingleMember( DWARFAttributes attributes; const size_t num_attributes = die.GetAttributes(attributes); - if (num_attributes > 0) { - const char *name = nullptr; - const char *prop_name = nullptr; - const char *prop_getter_name = nullptr; - const char *prop_setter_name = nullptr; - uint32_t prop_attributes = 0; - - bool is_artificial = false; - DWARFFormValue encoding_form; - AccessType accessibility = eAccessNone; - uint32_t member_byte_offset = - (parent_die.Tag() == DW_TAG_union_type) ? 0 : UINT32_MAX; - llvm::Optional byte_size; - int64_t bit_offset = 0; - uint64_t data_bit_offset = UINT64_MAX; - size_t bit_size = 0; - bool is_external = - false; // On DW_TAG_members, this means the member is static - uint32_t i; - for (i = 0; i < num_attributes && !is_artificial; ++i) { - const dw_attr_t attr = attributes.AttributeAtIndex(i); - DWARFFormValue form_value; - if (attributes.ExtractFormValueAtIndex(i, form_value)) { - // DW_AT_data_member_location indicates the byte offset of the - // word from the base address of the structure. - // - // DW_AT_bit_offset indicates how many bits into the word - // (according to the host endianness) the low-order bit of the - // field starts. AT_bit_offset can be negative. - // - // DW_AT_bit_size indicates the size of the field in bits. - switch (attr) { - case DW_AT_name: - name = form_value.AsCString(); - break; - case DW_AT_type: - encoding_form = form_value; - break; - case DW_AT_bit_offset: - bit_offset = form_value.Signed(); - break; - case DW_AT_bit_size: - bit_size = form_value.Unsigned(); - break; - case DW_AT_byte_size: - byte_size = form_value.Unsigned(); - break; - case DW_AT_data_bit_offset: - data_bit_offset = form_value.Unsigned(); - break; - case DW_AT_data_member_location: - if (form_value.BlockData()) { - Value initialValue(0); - Value memberOffset(0); - const DWARFDataExtractor &debug_info_data = die.GetData(); - uint32_t block_length = form_value.Unsigned(); - uint32_t block_offset = - form_value.BlockData() - debug_info_data.GetDataStart(); - if (DWARFExpression::Evaluate( - nullptr, // ExecutionContext * - nullptr, // RegisterContext * - module_sp, - DataExtractor(debug_info_data, block_offset, block_length), - die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, - memberOffset, nullptr)) { - member_byte_offset = memberOffset.ResolveValue(nullptr).UInt(); - } - } else { - // With DWARF 3 and later, if the value is an integer constant, - // this form value is the offset in bytes from the beginning of - // the containing entity. - member_byte_offset = form_value.Unsigned(); + if (num_attributes == 0) + return; + + const char *name = nullptr; + const char *prop_name = nullptr; + const char *prop_getter_name = nullptr; + const char *prop_setter_name = nullptr; + uint32_t prop_attributes = 0; + + bool is_artificial = false; + DWARFFormValue encoding_form; + AccessType accessibility = eAccessNone; + uint32_t member_byte_offset = + (parent_die.Tag() == DW_TAG_union_type) ? 0 : UINT32_MAX; + llvm::Optional byte_size; + int64_t bit_offset = 0; + uint64_t data_bit_offset = UINT64_MAX; + size_t bit_size = 0; + bool is_external = + false; // On DW_TAG_members, this means the member is static + uint32_t i; + for (i = 0; i < num_attributes && !is_artificial; ++i) { + const dw_attr_t attr = attributes.AttributeAtIndex(i); + DWARFFormValue form_value; + if (attributes.ExtractFormValueAtIndex(i, form_value)) { + // DW_AT_data_member_location indicates the byte offset of the + // word from the base address of the structure. + // + // DW_AT_bit_offset indicates how many bits into the word + // (according to the host endianness) the low-order bit of the + // field starts. AT_bit_offset can be negative. + // + // DW_AT_bit_size indicates the size of the field in bits. + switch (attr) { + case DW_AT_name: + name = form_value.AsCString(); + break; + case DW_AT_type: + encoding_form = form_value; + break; + case DW_AT_bit_offset: + bit_offset = form_value.Signed(); + break; + case DW_AT_bit_size: + bit_size = form_value.Unsigned(); + break; + case DW_AT_byte_size: + byte_size = form_value.Unsigned(); + break; + case DW_AT_data_bit_offset: + data_bit_offset = form_value.Unsigned(); + break; + case DW_AT_data_member_location: + if (form_value.BlockData()) { + Value initialValue(0); + Value memberOffset(0); + const DWARFDataExtractor &debug_info_data = die.GetData(); + uint32_t block_length = form_value.Unsigned(); + uint32_t block_offset = + form_value.BlockData() - debug_info_data.GetDataStart(); + if (DWARFExpression::Evaluate( + nullptr, // ExecutionContext * + nullptr, // RegisterContext * + module_sp, + DataExtractor(debug_info_data, block_offset, block_length), + die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, + memberOffset, nullptr)) { + member_byte_offset = memberOffset.ResolveValue(nullptr).UInt(); } - break; + } else { + // With DWARF 3 and later, if the value is an integer constant, + // this form value is the offset in bytes from the beginning of + // the containing entity. + member_byte_offset = form_value.Unsigned(); + } + break; - case DW_AT_accessibility: - accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned()); - break; - case DW_AT_artificial: - is_artificial = form_value.Boolean(); - break; - case DW_AT_APPLE_property_name: - prop_name = form_value.AsCString(); - break; - case DW_AT_APPLE_property_getter: - prop_getter_name = form_value.AsCString(); - break; - case DW_AT_APPLE_property_setter: - prop_setter_name = form_value.AsCString(); - break; - case DW_AT_APPLE_property_attribute: - prop_attributes = form_value.Unsigned(); - break; - case DW_AT_external: - is_external = form_value.Boolean(); - break; + case DW_AT_accessibility: + accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned()); + break; + case DW_AT_artificial: + is_artificial = form_value.Boolean(); + break; + case DW_AT_APPLE_property_name: + prop_name = form_value.AsCString(); + break; + case DW_AT_APPLE_property_getter: + prop_getter_name = form_value.AsCString(); + break; + case DW_AT_APPLE_property_setter: + prop_setter_name = form_value.AsCString(); + break; + case DW_AT_APPLE_property_attribute: + prop_attributes = form_value.Unsigned(); + break; + case DW_AT_external: + is_external = form_value.Boolean(); + break; - default: - case DW_AT_declaration: - case DW_AT_description: - case DW_AT_mutable: - case DW_AT_visibility: - case DW_AT_sibling: - break; - } + default: + case DW_AT_declaration: + case DW_AT_description: + case DW_AT_mutable: + case DW_AT_visibility: + case DW_AT_sibling: + break; } } + } - if (prop_name) { - ConstString fixed_setter; + if (prop_name) { + ConstString fixed_setter; - // Check if the property getter/setter were provided as full names. - // We want basenames, so we extract them. + // Check if the property getter/setter were provided as full names. + // We want basenames, so we extract them. - if (prop_getter_name && prop_getter_name[0] == '-') { - ObjCLanguage::MethodName prop_getter_method(prop_getter_name, true); - prop_getter_name = prop_getter_method.GetSelector().GetCString(); - } + if (prop_getter_name && prop_getter_name[0] == '-') { + ObjCLanguage::MethodName prop_getter_method(prop_getter_name, true); + prop_getter_name = prop_getter_method.GetSelector().GetCString(); + } - if (prop_setter_name && prop_setter_name[0] == '-') { - ObjCLanguage::MethodName prop_setter_method(prop_setter_name, true); - prop_setter_name = prop_setter_method.GetSelector().GetCString(); - } + if (prop_setter_name && prop_setter_name[0] == '-') { + ObjCLanguage::MethodName prop_setter_method(prop_setter_name, true); + prop_setter_name = prop_setter_method.GetSelector().GetCString(); + } - // If the names haven't been provided, they need to be filled in. + // If the names haven't been provided, they need to be filled in. - if (!prop_getter_name) { - prop_getter_name = prop_name; - } - if (!prop_setter_name && prop_name[0] && - !(prop_attributes & DW_APPLE_PROPERTY_readonly)) { - StreamString ss; + if (!prop_getter_name) { + prop_getter_name = prop_name; + } + if (!prop_setter_name && prop_name[0] && + !(prop_attributes & DW_APPLE_PROPERTY_readonly)) { + StreamString ss; - ss.Printf("set%c%s:", toupper(prop_name[0]), &prop_name[1]); + ss.Printf("set%c%s:", toupper(prop_name[0]), &prop_name[1]); - fixed_setter.SetString(ss.GetString()); - prop_setter_name = fixed_setter.GetCString(); - } + fixed_setter.SetString(ss.GetString()); + prop_setter_name = fixed_setter.GetCString(); } + } - // Clang has a DWARF generation bug where sometimes it represents - // fields that are references with bad byte size and bit size/offset - // information such as: - // - // DW_AT_byte_size( 0x00 ) - // DW_AT_bit_size( 0x40 ) - // DW_AT_bit_offset( 0xffffffffffffffc0 ) - // - // So check the bit offset to make sure it is sane, and if the values - // are not sane, remove them. If we don't do this then we will end up - // with a crash if we try to use this type in an expression when clang - // becomes unhappy with its recycled debug info. + // Clang has a DWARF generation bug where sometimes it represents + // fields that are references with bad byte size and bit size/offset + // information such as: + // + // DW_AT_byte_size( 0x00 ) + // DW_AT_bit_size( 0x40 ) + // DW_AT_bit_offset( 0xffffffffffffffc0 ) + // + // So check the bit offset to make sure it is sane, and if the values + // are not sane, remove them. If we don't do this then we will end up + // with a crash if we try to use this type in an expression when clang + // becomes unhappy with its recycled debug info. - if (byte_size.getValueOr(0) == 0 && bit_offset < 0) { - bit_size = 0; - bit_offset = 0; - } + if (byte_size.getValueOr(0) == 0 && bit_offset < 0) { + bit_size = 0; + bit_offset = 0; + } - // FIXME: Make Clang ignore Objective-C accessibility for expressions - if (class_language == eLanguageTypeObjC || - class_language == eLanguageTypeObjC_plus_plus) - accessibility = eAccessNone; + const bool class_is_objc_object_or_interface = + TypeSystemClang::IsObjCObjectOrInterfaceType(class_clang_type); - // Handle static members - if (is_external && member_byte_offset == UINT32_MAX) { - Type *var_type = die.ResolveTypeUID(encoding_form.Reference()); + // FIXME: Make Clang ignore Objective-C accessibility for expressions + if (class_is_objc_object_or_interface) + accessibility = eAccessNone; - if (var_type) { - if (accessibility == eAccessNone) - accessibility = eAccessPublic; - TypeSystemClang::AddVariableToRecordType( - class_clang_type, name, var_type->GetLayoutCompilerType(), - accessibility); - } - return; + // Handle static members + if (is_external && member_byte_offset == UINT32_MAX) { + Type *var_type = die.ResolveTypeUID(encoding_form.Reference()); + + if (var_type) { + if (accessibility == eAccessNone) + accessibility = eAccessPublic; + TypeSystemClang::AddVariableToRecordType( + class_clang_type, name, var_type->GetLayoutCompilerType(), + accessibility); } + return; + } - if (!is_artificial) { - Type *member_type = die.ResolveTypeUID(encoding_form.Reference()); + if (!is_artificial) { + Type *member_type = die.ResolveTypeUID(encoding_form.Reference()); - clang::FieldDecl *field_decl = nullptr; - const uint64_t character_width = 8; - const uint64_t word_width = 32; - if (tag == DW_TAG_member) { - if (member_type) { - CompilerType member_clang_type = member_type->GetLayoutCompilerType(); + clang::FieldDecl *field_decl = nullptr; + const uint64_t character_width = 8; + const uint64_t word_width = 32; + if (tag == DW_TAG_member) { + if (member_type) { + CompilerType member_clang_type = member_type->GetLayoutCompilerType(); - if (accessibility == eAccessNone) - accessibility = default_accessibility; - member_accessibilities.push_back(accessibility); + if (accessibility == eAccessNone) + accessibility = default_accessibility; + member_accessibilities.push_back(accessibility); - uint64_t field_bit_offset = - (member_byte_offset == UINT32_MAX ? 0 : (member_byte_offset * 8)); + uint64_t field_bit_offset = + (member_byte_offset == UINT32_MAX ? 0 : (member_byte_offset * 8)); - if (bit_size > 0) { - FieldInfo this_field_info; - this_field_info.bit_offset = field_bit_offset; - this_field_info.bit_size = bit_size; + if (bit_size > 0) { + FieldInfo this_field_info; + this_field_info.bit_offset = field_bit_offset; + this_field_info.bit_size = bit_size; - if (data_bit_offset != UINT64_MAX) { - this_field_info.bit_offset = data_bit_offset; - } else { - if (!byte_size) - byte_size = member_type->GetByteSize(); + if (data_bit_offset != UINT64_MAX) { + this_field_info.bit_offset = data_bit_offset; + } else { + if (!byte_size) + byte_size = member_type->GetByteSize(); - ObjectFile *objfile = die.GetDWARF()->GetObjectFile(); - if (objfile->GetByteOrder() == eByteOrderLittle) { - this_field_info.bit_offset += byte_size.getValueOr(0) * 8; - this_field_info.bit_offset -= (bit_offset + bit_size); - } else { - this_field_info.bit_offset += bit_offset; - } + ObjectFile *objfile = die.GetDWARF()->GetObjectFile(); + if (objfile->GetByteOrder() == eByteOrderLittle) { + this_field_info.bit_offset += byte_size.getValueOr(0) * 8; + this_field_info.bit_offset -= (bit_offset + bit_size); + } else { + this_field_info.bit_offset += bit_offset; } + } - if ((this_field_info.bit_offset >= parent_bit_size) || - (last_field_info.IsBitfield() && - !last_field_info.NextBitfieldOffsetIsValid( - this_field_info.bit_offset))) { - ObjectFile *objfile = die.GetDWARF()->GetObjectFile(); - objfile->GetModule()->ReportWarning( - "0x%8.8" PRIx64 ": %s bitfield named \"%s\" has invalid " - "bit offset (0x%8.8" PRIx64 - ") member will be ignored. Please file a bug against the " - "compiler and include the preprocessed output for %s\n", - die.GetID(), DW_TAG_value_to_name(tag), name, - this_field_info.bit_offset, GetUnitName(parent_die).c_str()); - return; - } + if ((this_field_info.bit_offset >= parent_bit_size) || + (last_field_info.IsBitfield() && + !last_field_info.NextBitfieldOffsetIsValid( + this_field_info.bit_offset))) { + ObjectFile *objfile = die.GetDWARF()->GetObjectFile(); + objfile->GetModule()->ReportWarning( + "0x%8.8" PRIx64 ": %s bitfield named \"%s\" has invalid " + "bit offset (0x%8.8" PRIx64 + ") member will be ignored. Please file a bug against the " + "compiler and include the preprocessed output for %s\n", + die.GetID(), DW_TAG_value_to_name(tag), name, + this_field_info.bit_offset, GetUnitName(parent_die).c_str()); + return; + } - // Update the field bit offset we will report for layout - field_bit_offset = this_field_info.bit_offset; - - // Objective-C has invalid DW_AT_bit_offset values in older - // versions of clang, so we have to be careful and only insert - // unnamed bitfields if we have a new enough clang. - bool detect_unnamed_bitfields = true; - - if (class_language == eLanguageTypeObjC || - class_language == eLanguageTypeObjC_plus_plus) - detect_unnamed_bitfields = - die.GetCU()->Supports_unnamed_objc_bitfields(); - - if (detect_unnamed_bitfields) { - clang::Optional unnamed_field_info; - uint64_t last_field_end = 0; - - last_field_end = - last_field_info.bit_offset + last_field_info.bit_size; - - if (!last_field_info.IsBitfield()) { - // The last field was not a bit-field... - // but if it did take up the entire word then we need to extend - // last_field_end so the bit-field does not step into the last - // fields padding. - if (last_field_end != 0 && ((last_field_end % word_width) != 0)) - last_field_end += word_width - (last_field_end % word_width); - } + // Update the field bit offset we will report for layout + field_bit_offset = this_field_info.bit_offset; - // If we have a gap between the last_field_end and the current - // field we have an unnamed bit-field. - // If we have a base class, we assume there is no unnamed - // bit-field if this is the first field since the gap can be - // attributed to the members from the base class. This assumption - // is not correct if the first field of the derived class is - // indeed an unnamed bit-field. We currently do not have the - // machinary to track the offset of the last field of classes we - // have seen before, so we are not handling this case. - if (this_field_info.bit_offset != last_field_end && - this_field_info.bit_offset > last_field_end && - !(last_field_info.bit_offset == 0 && - last_field_info.bit_size == 0 && - layout_info.base_offsets.size() != 0)) { - unnamed_field_info = FieldInfo{}; - unnamed_field_info->bit_size = - this_field_info.bit_offset - last_field_end; - unnamed_field_info->bit_offset = last_field_end; - } + // Objective-C has invalid DW_AT_bit_offset values in older + // versions of clang, so we have to be careful and only insert + // unnamed bitfields if we have a new enough clang. + bool detect_unnamed_bitfields = true; - if (unnamed_field_info) { - clang::FieldDecl *unnamed_bitfield_decl = - TypeSystemClang::AddFieldToRecordType( - class_clang_type, llvm::StringRef(), - m_ast.GetBuiltinTypeForEncodingAndBitSize(eEncodingSint, - word_width), - accessibility, unnamed_field_info->bit_size); + if (class_is_objc_object_or_interface) + detect_unnamed_bitfields = + die.GetCU()->Supports_unnamed_objc_bitfields(); - layout_info.field_offsets.insert(std::make_pair( - unnamed_bitfield_decl, unnamed_field_info->bit_offset)); - } + if (detect_unnamed_bitfields) { + clang::Optional unnamed_field_info; + uint64_t last_field_end = 0; + + last_field_end = + last_field_info.bit_offset + last_field_info.bit_size; + + if (!last_field_info.IsBitfield()) { + // The last field was not a bit-field... + // but if it did take up the entire word then we need to extend + // last_field_end so the bit-field does not step into the last + // fields padding. + if (last_field_end != 0 && ((last_field_end % word_width) != 0)) + last_field_end += word_width - (last_field_end % word_width); } - last_field_info = this_field_info; - last_field_info.SetIsBitfield(true); - } else { - last_field_info.bit_offset = field_bit_offset; + // If we have a gap between the last_field_end and the current + // field we have an unnamed bit-field. + // If we have a base class, we assume there is no unnamed + // bit-field if this is the first field since the gap can be + // attributed to the members from the base class. This assumption + // is not correct if the first field of the derived class is + // indeed an unnamed bit-field. We currently do not have the + // machinary to track the offset of the last field of classes we + // have seen before, so we are not handling this case. + if (this_field_info.bit_offset != last_field_end && + this_field_info.bit_offset > last_field_end && + !(last_field_info.bit_offset == 0 && + last_field_info.bit_size == 0 && + layout_info.base_offsets.size() != 0)) { + unnamed_field_info = FieldInfo{}; + unnamed_field_info->bit_size = + this_field_info.bit_offset - last_field_end; + unnamed_field_info->bit_offset = last_field_end; + } + + if (unnamed_field_info) { + clang::FieldDecl *unnamed_bitfield_decl = + TypeSystemClang::AddFieldToRecordType( + class_clang_type, llvm::StringRef(), + m_ast.GetBuiltinTypeForEncodingAndBitSize(eEncodingSint, + word_width), + accessibility, unnamed_field_info->bit_size); - if (llvm::Optional clang_type_size = - member_clang_type.GetByteSize(nullptr)) { - last_field_info.bit_size = *clang_type_size * character_width; + layout_info.field_offsets.insert(std::make_pair( + unnamed_bitfield_decl, unnamed_field_info->bit_offset)); } + } - last_field_info.SetIsBitfield(false); + last_field_info = this_field_info; + last_field_info.SetIsBitfield(true); + } else { + last_field_info.bit_offset = field_bit_offset; + + if (llvm::Optional clang_type_size = + member_clang_type.GetByteSize(nullptr)) { + last_field_info.bit_size = *clang_type_size * character_width; } - if (!member_clang_type.IsCompleteType()) - member_clang_type.GetCompleteType(); - - { - // Older versions of clang emit array[0] and array[1] in the - // same way (). If the current field - // is at the end of the structure, then there is definitely no - // room for extra elements and we override the type to - // array[0]. - - CompilerType member_array_element_type; - uint64_t member_array_size; - bool member_array_is_incomplete; - - if (member_clang_type.IsArrayType(&member_array_element_type, - &member_array_size, - &member_array_is_incomplete) && - !member_array_is_incomplete) { - uint64_t parent_byte_size = - parent_die.GetAttributeValueAsUnsigned(DW_AT_byte_size, - UINT64_MAX); - - if (member_byte_offset >= parent_byte_size) { - if (member_array_size != 1 && - (member_array_size != 0 || - member_byte_offset > parent_byte_size)) { - module_sp->ReportError( - "0x%8.8" PRIx64 - ": DW_TAG_member '%s' refers to type 0x%8.8x" - " which extends beyond the bounds of 0x%8.8" PRIx64, - die.GetID(), name, encoding_form.Reference().GetOffset(), - parent_die.GetID()); - } + last_field_info.SetIsBitfield(false); + } - member_clang_type = - m_ast.CreateArrayType(member_array_element_type, 0, false); + if (!member_clang_type.IsCompleteType()) + member_clang_type.GetCompleteType(); + + { + // Older versions of clang emit array[0] and array[1] in the + // same way (). If the current field + // is at the end of the structure, then there is definitely no + // room for extra elements and we override the type to + // array[0]. + + CompilerType member_array_element_type; + uint64_t member_array_size; + bool member_array_is_incomplete; + + if (member_clang_type.IsArrayType(&member_array_element_type, + &member_array_size, + &member_array_is_incomplete) && + !member_array_is_incomplete) { + uint64_t parent_byte_size = + parent_die.GetAttributeValueAsUnsigned(DW_AT_byte_size, + UINT64_MAX); + + if (member_byte_offset >= parent_byte_size) { + if (member_array_size != 1 && + (member_array_size != 0 || + member_byte_offset > parent_byte_size)) { + module_sp->ReportError( + "0x%8.8" PRIx64 + ": DW_TAG_member '%s' refers to type 0x%8.8x" + " which extends beyond the bounds of 0x%8.8" PRIx64, + die.GetID(), name, encoding_form.Reference().GetOffset(), + parent_die.GetID()); } + + member_clang_type = + m_ast.CreateArrayType(member_array_element_type, 0, false); } } + } - CompleteType(member_clang_type); + CompleteType(member_clang_type); - field_decl = TypeSystemClang::AddFieldToRecordType( - class_clang_type, name, member_clang_type, accessibility, - bit_size); + field_decl = TypeSystemClang::AddFieldToRecordType( + class_clang_type, name, member_clang_type, accessibility, + bit_size); - m_ast.SetMetadataAsUserID(field_decl, die.GetID()); + m_ast.SetMetadataAsUserID(field_decl, die.GetID()); - layout_info.field_offsets.insert( - std::make_pair(field_decl, field_bit_offset)); - } else { - if (name) - module_sp->ReportError( - "0x%8.8" PRIx64 ": DW_TAG_member '%s' refers to type 0x%8.8x" - " which was unable to be parsed", - die.GetID(), name, encoding_form.Reference().GetOffset()); - else - module_sp->ReportError( - "0x%8.8" PRIx64 ": DW_TAG_member refers to type 0x%8.8x" - " which was unable to be parsed", - die.GetID(), encoding_form.Reference().GetOffset()); - } + layout_info.field_offsets.insert( + std::make_pair(field_decl, field_bit_offset)); + } else { + if (name) + module_sp->ReportError( + "0x%8.8" PRIx64 ": DW_TAG_member '%s' refers to type 0x%8.8x" + " which was unable to be parsed", + die.GetID(), name, encoding_form.Reference().GetOffset()); + else + module_sp->ReportError( + "0x%8.8" PRIx64 ": DW_TAG_member refers to type 0x%8.8x" + " which was unable to be parsed", + die.GetID(), encoding_form.Reference().GetOffset()); } + } - if (prop_name != nullptr && member_type) { - clang::ObjCIvarDecl *ivar_decl = nullptr; + if (prop_name != nullptr && member_type) { + clang::ObjCIvarDecl *ivar_decl = nullptr; - if (field_decl) { - ivar_decl = clang::dyn_cast(field_decl); - assert(ivar_decl != nullptr); - } + if (field_decl) { + ivar_decl = clang::dyn_cast(field_decl); + assert(ivar_decl != nullptr); + } - ClangASTMetadata metadata; - metadata.SetUserID(die.GetID()); - delayed_properties.push_back(DelayedAddObjCClassProperty( - class_clang_type, prop_name, member_type->GetLayoutCompilerType(), - ivar_decl, prop_setter_name, prop_getter_name, prop_attributes, - &metadata)); + ClangASTMetadata metadata; + metadata.SetUserID(die.GetID()); + delayed_properties.push_back(DelayedAddObjCClassProperty( + class_clang_type, prop_name, member_type->GetLayoutCompilerType(), + ivar_decl, prop_setter_name, prop_getter_name, prop_attributes, + &metadata)); - if (ivar_decl) - m_ast.SetMetadataAsUserID(ivar_decl, die.GetID()); - } + if (ivar_decl) + m_ast.SetMetadataAsUserID(ivar_decl, die.GetID()); } } } bool DWARFASTParserClang::ParseChildMembers( const DWARFDIE &parent_die, CompilerType &class_clang_type, - const LanguageType class_language, std::vector> &base_classes, std::vector &member_accessibilities, std::vector &member_function_dies, @@ -2778,7 +2777,7 @@ bool DWARFASTParserClang::ParseChildMembers( switch (tag) { case DW_TAG_member: case DW_TAG_APPLE_property: - ParseSingleMember(die, parent_die, class_clang_type, class_language, + ParseSingleMember(die, parent_die, class_clang_type, member_accessibilities, default_accessibility, delayed_properties, layout_info, last_field_info); break; @@ -2868,7 +2867,7 @@ bool DWARFASTParserClang::ParseChildMembers( CompilerType base_class_clang_type = base_class_type->GetFullCompilerType(); assert(base_class_clang_type); - if (class_language == eLanguageTypeObjC) { + if (TypeSystemClang::IsObjCObjectOrInterfaceType(class_clang_type)) { ast->SetObjCSuperClass(class_clang_type, base_class_clang_type); } else { std::unique_ptr result = diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index cb718a207d2d4..2ef49abc1da16 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -110,7 +110,6 @@ class DWARFASTParserClang : public DWARFASTParser { bool ParseChildMembers( const DWARFDIE &die, lldb_private::CompilerType &class_compiler_type, - const lldb::LanguageType class_language, std::vector> &base_classes, std::vector &member_accessibilities, std::vector &member_function_dies, @@ -195,7 +194,6 @@ class DWARFASTParserClang : public DWARFASTParser { void ParseSingleMember(const DWARFDIE &die, const DWARFDIE &parent_die, const lldb_private::CompilerType &class_clang_type, - const lldb::LanguageType class_language, std::vector &member_accessibilities, lldb::AccessType default_accessibility, DelayedPropertyList &delayed_properties, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 9f64e5255fd5c..0b7e31ae2d1df 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -1036,18 +1036,20 @@ bool SymbolFileDWARF::ParseLineTable(CompileUnit &comp_unit) { // FIXME: Rather than parsing the whole line table and then copying it over // into LLDB, we should explore using a callback to populate the line table // while we parse to reduce memory usage. - std::unique_ptr sequence = - LineTable::CreateLineSequenceContainer(); std::vector> sequences; - for (auto &row : line_table->Rows) { - LineTable::AppendLineEntryToSequence( - sequence.get(), row.Address.Address, row.Line, row.Column, row.File, - row.IsStmt, row.BasicBlock, row.PrologueEnd, row.EpilogueBegin, - row.EndSequence); - if (row.EndSequence) { - sequences.push_back(std::move(sequence)); - sequence = LineTable::CreateLineSequenceContainer(); + // The Sequences view contains only valid line sequences. Don't iterate over + // the Rows directly. + for (const llvm::DWARFDebugLine::Sequence &seq : line_table->Sequences) { + std::unique_ptr sequence = + LineTable::CreateLineSequenceContainer(); + for (unsigned idx = seq.FirstRowIndex; idx < seq.LastRowIndex; ++idx) { + const llvm::DWARFDebugLine::Row &row = line_table->Rows[idx]; + LineTable::AppendLineEntryToSequence( + sequence.get(), row.Address.Address, row.Line, row.Column, row.File, + row.IsStmt, row.BasicBlock, row.PrologueEnd, row.EpilogueBegin, + row.EndSequence); } + sequences.push_back(std::move(sequence)); } std::unique_ptr line_table_up = diff --git a/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.h b/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.h index 7d2de4ac29ac2..b15ea74d07dbb 100644 --- a/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.h +++ b/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.h @@ -33,9 +33,9 @@ class SymbolVendorMacOSX : public lldb_private::SymbolVendor { virtual ~SymbolVendorMacOSX(); // PluginInterface protocol - virtual lldb_private::ConstString GetPluginName(); + lldb_private::ConstString GetPluginName() override; - virtual uint32_t GetPluginVersion(); + uint32_t GetPluginVersion() override; private: SymbolVendorMacOSX(const SymbolVendorMacOSX &) = delete; diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index bc06ea8164d43..8825b473cd33d 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -2499,6 +2499,7 @@ RemoveWrappingTypes(QualType type, ArrayRef mask = {}) { case clang::Type::Decltype: case clang::Type::Elaborated: case clang::Type::Paren: + case clang::Type::TemplateSpecialization: case clang::Type::Typedef: case clang::Type::TypeOf: case clang::Type::TypeOfExpr: diff --git a/lldb/source/Symbol/FuncUnwinders.cpp b/lldb/source/Symbol/FuncUnwinders.cpp index 30266120d05e9..9a2671a08e86c 100644 --- a/lldb/source/Symbol/FuncUnwinders.cpp +++ b/lldb/source/Symbol/FuncUnwinders.cpp @@ -183,11 +183,11 @@ class RegisterContextToInfo: public SymbolFile::RegisterInfoResolver { public: RegisterContextToInfo(RegisterContext &ctx) : m_ctx(ctx) {} - const RegisterInfo *ResolveName(llvm::StringRef name) const { + const RegisterInfo *ResolveName(llvm::StringRef name) const override { return m_ctx.GetRegisterInfoByName(name); } const RegisterInfo *ResolveNumber(lldb::RegisterKind kind, - uint32_t number) const { + uint32_t number) const override { return m_ctx.GetRegisterInfo(kind, number); } diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index 5a4680b011034..6b552dd0c19ef 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -47,8 +47,8 @@ ObjectFile::FindPlugin(const lldb::ModuleSP &module_sp, const FileSpec *file, FileSpec archive_file; ObjectContainerCreateInstance create_object_container_callback; - const bool file_exists = FileSystem::Instance().Exists(*file); if (!data_sp) { + const bool file_exists = FileSystem::Instance().Exists(*file); // We have an object name which most likely means we have a .o file in // a static archive (.a file). Try and see if we have a cached archive // first without reading any data first @@ -207,9 +207,11 @@ ObjectFileSP ObjectFile::FindPlugin(const lldb::ModuleSP &module_sp, size_t ObjectFile::GetModuleSpecifications(const FileSpec &file, lldb::offset_t file_offset, lldb::offset_t file_size, - ModuleSpecList &specs) { - DataBufferSP data_sp = - FileSystem::Instance().CreateDataBuffer(file.GetPath(), 512, file_offset); + ModuleSpecList &specs, + DataBufferSP data_sp) { + if (!data_sp) + data_sp = FileSystem::Instance().CreateDataBuffer(file.GetPath(), 512, + file_offset); if (data_sp) { if (file_size == 0) { const lldb::offset_t actual_file_size = diff --git a/lldb/source/Target/AssertFrameRecognizer.cpp b/lldb/source/Target/AssertFrameRecognizer.cpp index d87459ac2fdd4..fe5fa3a362f82 100644 --- a/lldb/source/Target/AssertFrameRecognizer.cpp +++ b/lldb/source/Target/AssertFrameRecognizer.cpp @@ -86,20 +86,17 @@ bool GetAssertLocation(llvm::Triple::OSType os, SymbolLocation &location) { } void RegisterAssertFrameRecognizer(Process *process) { - static llvm::once_flag g_once_flag; - llvm::call_once(g_once_flag, [process]() { - Target &target = process->GetTarget(); - llvm::Triple::OSType os = target.GetArchitecture().GetTriple().getOS(); - SymbolLocation location; - - if (!GetAbortLocation(os, location)) - return; - - StackFrameRecognizerManager::AddRecognizer( - StackFrameRecognizerSP(new AssertFrameRecognizer()), - location.module_spec.GetFilename(), location.symbols, - /*first_instruction_only*/ false); - }); + Target &target = process->GetTarget(); + llvm::Triple::OSType os = target.GetArchitecture().GetTriple().getOS(); + SymbolLocation location; + + if (!GetAbortLocation(os, location)) + return; + + target.GetFrameRecognizerManager().AddRecognizer( + StackFrameRecognizerSP(new AssertFrameRecognizer()), + location.module_spec.GetFilename(), location.symbols, + /*first_instruction_only*/ false); } } // namespace lldb_private diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp index 8d4bea2148147..16787141bee0a 100644 --- a/lldb/source/Target/Platform.cpp +++ b/lldb/source/Target/Platform.cpp @@ -12,9 +12,6 @@ #include #include -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" - #include "lldb/Breakpoint/BreakpointIDList.h" #include "lldb/Breakpoint/BreakpointLocation.h" #include "lldb/Core/Debugger.h" @@ -40,8 +37,8 @@ #include "lldb/Utility/Log.h" #include "lldb/Utility/Status.h" #include "lldb/Utility/StructuredData.h" - #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" // Define these constants from POSIX mman.h rather than include the file so // that they will be correct even when compiled on Linux. @@ -1774,9 +1771,23 @@ Status Platform::UnloadImage(lldb_private::Process *process, lldb::ProcessSP Platform::ConnectProcess(llvm::StringRef connect_url, llvm::StringRef plugin_name, - lldb_private::Debugger &debugger, - lldb_private::Target *target, - lldb_private::Status &error) { + Debugger &debugger, Target *target, + Status &error) { + return DoConnectProcess(connect_url, plugin_name, debugger, nullptr, target, + error); +} + +lldb::ProcessSP Platform::ConnectProcessSynchronous( + llvm::StringRef connect_url, llvm::StringRef plugin_name, + Debugger &debugger, Stream &stream, Target *target, Status &error) { + return DoConnectProcess(connect_url, plugin_name, debugger, &stream, target, + error); +} + +lldb::ProcessSP Platform::DoConnectProcess(llvm::StringRef connect_url, + llvm::StringRef plugin_name, + Debugger &debugger, Stream *stream, + Target *target, Status &error) { error.Clear(); if (!target) { @@ -1803,12 +1814,34 @@ lldb::ProcessSP Platform::ConnectProcess(llvm::StringRef connect_url, lldb::ProcessSP process_sp = target->CreateProcess(debugger.GetListener(), plugin_name, nullptr); + if (!process_sp) return nullptr; - error = process_sp->ConnectRemote(&debugger.GetOutputStream(), connect_url); - if (error.Fail()) + // If this private method is called with a stream we are synchronous. + const bool synchronous = stream != nullptr; + + ListenerSP listener_sp( + Listener::MakeListener("lldb.Process.ConnectProcess.hijack")); + if (synchronous) + process_sp->HijackProcessEvents(listener_sp); + + error = process_sp->ConnectRemote(connect_url); + if (error.Fail()) { + if (synchronous) + process_sp->RestoreProcessEvents(); return nullptr; + } + + if (synchronous) { + EventSP event_sp; + process_sp->WaitForProcessToStop(llvm::None, &event_sp, true, listener_sp, + nullptr); + process_sp->RestoreProcessEvents(); + bool pop_process_io_handler = false; + Process::HandleProcessStateChangedEvent(event_sp, stream, + pop_process_io_handler); + } return process_sp; } diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index 78f75981a94dd..d777a27139119 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -1270,7 +1270,7 @@ void Process::UpdateThreadListIfNeeded() { for (size_t i = 0; i < num_old_threads; ++i) old_thread_list.GetThreadAtIndex(i, false)->ClearBackingThread(); // See if the OS plugin reports all threads. If it does, then - // it is safe to clear unseen thread's plans here. Otherwise we + // it is safe to clear unseen thread's plans here. Otherwise we // should preserve them in case they show up again: clear_unused_threads = GetOSPluginReportsAllThreads(); @@ -3096,14 +3096,14 @@ void Process::CompleteAttach() { } } -Status Process::ConnectRemote(Stream *strm, llvm::StringRef remote_url) { +Status Process::ConnectRemote(llvm::StringRef remote_url) { m_abi_sp.reset(); m_process_input_reader.reset(); // Find the process and its architecture. Make sure it matches the // architecture of the current Target, and if not adjust it. - Status error(DoConnectRemote(strm, remote_url)); + Status error(DoConnectRemote(remote_url)); if (error.Success()) { if (GetID() != LLDB_INVALID_PROCESS_ID) { EventSP event_sp; diff --git a/lldb/source/Target/StackFrame.cpp b/lldb/source/Target/StackFrame.cpp index 3d6cc5dc90b30..098aed9cd8125 100644 --- a/lldb/source/Target/StackFrame.cpp +++ b/lldb/source/Target/StackFrame.cpp @@ -1956,8 +1956,11 @@ bool StackFrame::GetStatus(Stream &strm, bool show_frame_info, bool show_source, RecognizedStackFrameSP StackFrame::GetRecognizedFrame() { if (!m_recognized_frame_sp) { - m_recognized_frame_sp = - StackFrameRecognizerManager::RecognizeFrame(CalculateStackFrame()); + m_recognized_frame_sp = GetThread() + ->GetProcess() + ->GetTarget() + .GetFrameRecognizerManager() + .RecognizeFrame(CalculateStackFrame()); } return m_recognized_frame_sp; } diff --git a/lldb/source/Target/StackFrameRecognizer.cpp b/lldb/source/Target/StackFrameRecognizer.cpp index 7dc6e9d1e490a..6fa09a387ad2e 100644 --- a/lldb/source/Target/StackFrameRecognizer.cpp +++ b/lldb/source/Target/StackFrameRecognizer.cpp @@ -6,12 +6,11 @@ // //===----------------------------------------------------------------------===// -#include +#include "lldb/Target/StackFrameRecognizer.h" #include "lldb/Core/Module.h" #include "lldb/Interpreter/ScriptInterpreter.h" #include "lldb/Symbol/Symbol.h" #include "lldb/Target/StackFrame.h" -#include "lldb/Target/StackFrameRecognizer.h" #include "lldb/Utility/RegularExpression.h" using namespace lldb; @@ -48,158 +47,106 @@ ScriptedStackFrameRecognizer::RecognizeFrame(lldb::StackFrameSP frame) { new ScriptedRecognizedStackFrame(args_synthesized)); } -class StackFrameRecognizerManagerImpl { -public: - void AddRecognizer(StackFrameRecognizerSP recognizer, ConstString module, - llvm::ArrayRef symbols, - bool first_instruction_only) { - m_recognizers.push_front({(uint32_t)m_recognizers.size(), false, recognizer, - false, module, RegularExpressionSP(), symbols, - RegularExpressionSP(), first_instruction_only}); - } - - void AddRecognizer(StackFrameRecognizerSP recognizer, - RegularExpressionSP module, RegularExpressionSP symbol, - bool first_instruction_only) { - m_recognizers.push_front( - {(uint32_t)m_recognizers.size(), false, recognizer, true, ConstString(), - module, std::vector(), symbol, first_instruction_only}); - } - - void ForEach(std::function< - void(uint32_t recognized_id, std::string recognizer_name, - std::string module, llvm::ArrayRef symbols, - bool regexp)> const &callback) { - for (auto entry : m_recognizers) { - if (entry.is_regexp) { - std::string module_name; - std::string symbol_name; - - if (entry.module_regexp) - module_name = entry.module_regexp->GetText().str(); - if (entry.symbol_regexp) - symbol_name = entry.symbol_regexp->GetText().str(); - - callback(entry.recognizer_id, entry.recognizer->GetName(), module_name, - llvm::makeArrayRef(ConstString(symbol_name)), true); - - } else { - callback(entry.recognizer_id, entry.recognizer->GetName(), - entry.module.GetCString(), entry.symbols, false); - } - } - } - - bool RemoveRecognizerWithID(uint32_t recognizer_id) { - if (recognizer_id >= m_recognizers.size()) return false; - if (m_recognizers[recognizer_id].deleted) return false; - m_recognizers[recognizer_id].deleted = true; - return true; - } +void StackFrameRecognizerManager::AddRecognizer( + StackFrameRecognizerSP recognizer, ConstString module, + llvm::ArrayRef symbols, bool first_instruction_only) { + m_recognizers.push_front({(uint32_t)m_recognizers.size(), false, recognizer, + false, module, RegularExpressionSP(), symbols, + RegularExpressionSP(), first_instruction_only}); +} - void RemoveAllRecognizers() { - m_recognizers.clear(); - } +void StackFrameRecognizerManager::AddRecognizer( + StackFrameRecognizerSP recognizer, RegularExpressionSP module, + RegularExpressionSP symbol, bool first_instruction_only) { + m_recognizers.push_front( + {(uint32_t)m_recognizers.size(), false, recognizer, true, ConstString(), + module, std::vector(), symbol, first_instruction_only}); +} - StackFrameRecognizerSP GetRecognizerForFrame(StackFrameSP frame) { - const SymbolContext &symctx = frame->GetSymbolContext( - eSymbolContextModule | eSymbolContextFunction | eSymbolContextSymbol); - ConstString function_name = symctx.GetFunctionName(); - ModuleSP module_sp = symctx.module_sp; - if (!module_sp) return StackFrameRecognizerSP(); - ConstString module_name = module_sp->GetFileSpec().GetFilename(); - Symbol *symbol = symctx.symbol; - if (!symbol) return StackFrameRecognizerSP(); - Address start_addr = symbol->GetAddress(); - Address current_addr = frame->GetFrameCodeAddress(); - - for (auto entry : m_recognizers) { - if (entry.deleted) continue; - if (entry.module) - if (entry.module != module_name) continue; +void StackFrameRecognizerManager::ForEach( + const std::function, bool)> &callback) { + for (auto entry : m_recognizers) { + if (entry.is_regexp) { + std::string module_name; + std::string symbol_name; if (entry.module_regexp) - if (!entry.module_regexp->Execute(module_name.GetStringRef())) continue; - - if (!entry.symbols.empty()) - if (!llvm::is_contained(entry.symbols, function_name)) - continue; - + module_name = entry.module_regexp->GetText().str(); if (entry.symbol_regexp) - if (!entry.symbol_regexp->Execute(function_name.GetStringRef())) - continue; + symbol_name = entry.symbol_regexp->GetText().str(); - if (entry.first_instruction_only) - if (start_addr != current_addr) continue; + callback(entry.recognizer_id, entry.recognizer->GetName(), module_name, + llvm::makeArrayRef(ConstString(symbol_name)), true); - return entry.recognizer; + } else { + callback(entry.recognizer_id, entry.recognizer->GetName(), + entry.module.GetCString(), entry.symbols, false); } - return StackFrameRecognizerSP(); } - - RecognizedStackFrameSP RecognizeFrame(StackFrameSP frame) { - auto recognizer = GetRecognizerForFrame(frame); - if (!recognizer) return RecognizedStackFrameSP(); - return recognizer->RecognizeFrame(frame); - } - - private: - struct RegisteredEntry { - uint32_t recognizer_id; - bool deleted; - StackFrameRecognizerSP recognizer; - bool is_regexp; - ConstString module; - RegularExpressionSP module_regexp; - std::vector symbols; - RegularExpressionSP symbol_regexp; - bool first_instruction_only; - }; - - std::deque m_recognizers; -}; - -StackFrameRecognizerManagerImpl &GetStackFrameRecognizerManagerImpl() { - static StackFrameRecognizerManagerImpl instance = - StackFrameRecognizerManagerImpl(); - return instance; } -void StackFrameRecognizerManager::AddRecognizer( - StackFrameRecognizerSP recognizer, ConstString module, - llvm::ArrayRef symbols, bool first_instruction_only) { - GetStackFrameRecognizerManagerImpl().AddRecognizer( - recognizer, module, symbols, first_instruction_only); +bool StackFrameRecognizerManager::RemoveRecognizerWithID( + uint32_t recognizer_id) { + if (recognizer_id >= m_recognizers.size()) + return false; + if (m_recognizers[recognizer_id].deleted) + return false; + m_recognizers[recognizer_id].deleted = true; + return true; } -void StackFrameRecognizerManager::AddRecognizer( - StackFrameRecognizerSP recognizer, RegularExpressionSP module, - RegularExpressionSP symbol, bool first_instruction_only) { - GetStackFrameRecognizerManagerImpl().AddRecognizer(recognizer, module, symbol, - first_instruction_only); +void StackFrameRecognizerManager::RemoveAllRecognizers() { + m_recognizers.clear(); } -void StackFrameRecognizerManager::ForEach( - std::function symbols, - bool regexp)> const &callback) { - GetStackFrameRecognizerManagerImpl().ForEach(callback); -} +StackFrameRecognizerSP +StackFrameRecognizerManager::GetRecognizerForFrame(StackFrameSP frame) { + const SymbolContext &symctx = frame->GetSymbolContext( + eSymbolContextModule | eSymbolContextFunction | eSymbolContextSymbol); + ConstString function_name = symctx.GetFunctionName(); + ModuleSP module_sp = symctx.module_sp; + if (!module_sp) + return StackFrameRecognizerSP(); + ConstString module_name = module_sp->GetFileSpec().GetFilename(); + Symbol *symbol = symctx.symbol; + if (!symbol) + return StackFrameRecognizerSP(); + Address start_addr = symbol->GetAddress(); + Address current_addr = frame->GetFrameCodeAddress(); -void StackFrameRecognizerManager::RemoveAllRecognizers() { - GetStackFrameRecognizerManagerImpl().RemoveAllRecognizers(); -} + for (auto entry : m_recognizers) { + if (entry.deleted) + continue; + if (entry.module) + if (entry.module != module_name) + continue; -bool StackFrameRecognizerManager::RemoveRecognizerWithID(uint32_t recognizer_id) { - return GetStackFrameRecognizerManagerImpl().RemoveRecognizerWithID(recognizer_id); -} + if (entry.module_regexp) + if (!entry.module_regexp->Execute(module_name.GetStringRef())) + continue; -RecognizedStackFrameSP StackFrameRecognizerManager::RecognizeFrame( - StackFrameSP frame) { - return GetStackFrameRecognizerManagerImpl().RecognizeFrame(frame); + if (!entry.symbols.empty()) + if (!llvm::is_contained(entry.symbols, function_name)) + continue; + + if (entry.symbol_regexp) + if (!entry.symbol_regexp->Execute(function_name.GetStringRef())) + continue; + + if (entry.first_instruction_only) + if (start_addr != current_addr) + continue; + + return entry.recognizer; + } + return StackFrameRecognizerSP(); } -StackFrameRecognizerSP StackFrameRecognizerManager::GetRecognizerForFrame( - lldb::StackFrameSP frame) { - return GetStackFrameRecognizerManagerImpl().GetRecognizerForFrame(frame); +RecognizedStackFrameSP +StackFrameRecognizerManager::RecognizeFrame(StackFrameSP frame) { + auto recognizer = GetRecognizerForFrame(frame); + if (!recognizer) + return RecognizedStackFrameSP(); + return recognizer->RecognizeFrame(frame); } diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index dad56376005c6..364997f139b13 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -45,6 +45,7 @@ #include "lldb/Target/Process.h" #include "lldb/Target/SectionLoadList.h" #include "lldb/Target/StackFrame.h" +#include "lldb/Target/StackFrameRecognizer.h" #include "lldb/Target/SystemRuntime.h" #include "lldb/Target/Thread.h" #include "lldb/Target/ThreadSpec.h" @@ -94,6 +95,8 @@ Target::Target(Debugger &debugger, const ArchSpec &target_arch, m_source_manager_up(), m_stop_hooks(), m_stop_hook_next_id(0), m_valid(true), m_suppress_stop_hooks(false), m_is_dummy_target(is_dummy_target), + m_frame_recognizer_manager_up( + std::make_unique()), m_stats_storage(static_cast(StatisticKind::StatisticMax)) { @@ -143,6 +146,9 @@ void Target::PrimeFromDummyTarget(Target *target) { BreakpointName *new_bp_name = new BreakpointName(*bp_name_entry.second); AddBreakpointName(new_bp_name); } + + m_frame_recognizer_manager_up = std::make_unique( + *target->m_frame_recognizer_manager_up); } void Target::Dump(Stream *s, lldb::DescriptionLevel description_level) { diff --git a/lldb/source/Utility/Scalar.cpp b/lldb/source/Utility/Scalar.cpp index 87ac6c23892d8..6c48bbde532f9 100644 --- a/lldb/source/Utility/Scalar.cpp +++ b/lldb/source/Utility/Scalar.cpp @@ -24,6 +24,7 @@ using namespace lldb; using namespace lldb_private; using llvm::APFloat; +using llvm::APInt; namespace { enum class Category { Void, Integral, Float }; @@ -1002,116 +1003,60 @@ Status Scalar::SetValueFromCString(const char *value_str, Encoding encoding, error.SetErrorString("Invalid encoding."); break; - case eEncodingUint: - if (byte_size <= sizeof(uint64_t)) { - uint64_t uval64; - if (!llvm::to_integer(value_str, uval64)) - error.SetErrorStringWithFormat( - "'%s' is not a valid unsigned integer string value", value_str); - else if (!UIntValueIsValidForSize(uval64, byte_size)) - error.SetErrorStringWithFormat( - "value 0x%" PRIx64 " is too large to fit in a %" PRIu64 - " byte unsigned integer value", - uval64, static_cast(byte_size)); - else { - m_type = Scalar::GetValueTypeForUnsignedIntegerWithByteSize(byte_size); - switch (m_type) { - case e_uint: - m_integer = llvm::APInt(sizeof(uint_t) * 8, uval64, false); - break; - case e_ulong: - m_integer = llvm::APInt(sizeof(ulong_t) * 8, uval64, false); - break; - case e_ulonglong: - m_integer = llvm::APInt(sizeof(ulonglong_t) * 8, uval64, false); - break; - default: - error.SetErrorStringWithFormat( - "unsupported unsigned integer byte size: %" PRIu64 "", - static_cast(byte_size)); - break; - } - } - } else { - error.SetErrorStringWithFormat( - "unsupported unsigned integer byte size: %" PRIu64 "", - static_cast(byte_size)); - return error; - } - break; - case eEncodingSint: - if (byte_size <= sizeof(int64_t)) { - int64_t sval64; - if (!llvm::to_integer(value_str, sval64)) - error.SetErrorStringWithFormat( - "'%s' is not a valid signed integer string value", value_str); - else if (!SIntValueIsValidForSize(sval64, byte_size)) - error.SetErrorStringWithFormat( - "value 0x%" PRIx64 " is too large to fit in a %" PRIu64 - " byte signed integer value", - sval64, static_cast(byte_size)); - else { - m_type = Scalar::GetValueTypeForSignedIntegerWithByteSize(byte_size); - switch (m_type) { - case e_sint: - m_integer = llvm::APInt(sizeof(sint_t) * 8, sval64, true); - break; - case e_slong: - m_integer = llvm::APInt(sizeof(slong_t) * 8, sval64, true); - break; - case e_slonglong: - m_integer = llvm::APInt(sizeof(slonglong_t) * 8, sval64, true); - break; - default: - error.SetErrorStringWithFormat( - "unsupported signed integer byte size: %" PRIu64 "", - static_cast(byte_size)); - break; - } - } - } else { - error.SetErrorStringWithFormat( - "unsupported signed integer byte size: %" PRIu64 "", - static_cast(byte_size)); - return error; + case eEncodingUint: { + llvm::StringRef str = value_str; + bool is_signed = encoding == eEncodingSint; + bool is_negative = is_signed && str.consume_front("-"); + APInt integer; + if (str.getAsInteger(0, integer)) { + error.SetErrorStringWithFormatv( + "'{0}' is not a valid integer string value", value_str); + break; + } + bool fits; + if (is_signed) { + integer = integer.zext(integer.getBitWidth() + 1); + if (is_negative) + integer.negate(); + fits = integer.isSignedIntN(byte_size * 8); + } else + fits = integer.isIntN(byte_size * 8); + if (!fits) { + error.SetErrorStringWithFormatv( + "value {0} is too large to fit in a {1} byte integer value", + value_str, byte_size); + break; + } + m_type = GetBestTypeForBitSize(8 * byte_size, is_signed); + if (m_type == e_void) { + error.SetErrorStringWithFormatv("unsupported integer byte size: {0}", + byte_size); + break; } + if (is_signed) + m_integer = integer.sextOrTrunc(GetBitSize(m_type)); + else + m_integer = integer.zextOrTrunc(GetBitSize(m_type)); break; + } - case eEncodingIEEE754: - static float f_val; - static double d_val; - static long double l_val; - if (byte_size == sizeof(float)) { - if (::sscanf(value_str, "%f", &f_val) == 1) { - m_float = llvm::APFloat(f_val); - m_type = e_float; - } else - error.SetErrorStringWithFormat("'%s' is not a valid float string value", - value_str); - } else if (byte_size == sizeof(double)) { - if (::sscanf(value_str, "%lf", &d_val) == 1) { - m_float = llvm::APFloat(d_val); - m_type = e_double; - } else - error.SetErrorStringWithFormat("'%s' is not a valid float string value", - value_str); - } else if (byte_size == sizeof(long double)) { - if (::sscanf(value_str, "%Lf", &l_val) == 1) { - m_float = llvm::APFloat( - llvm::APFloat::x87DoubleExtended(), - llvm::APInt(BITWIDTH_INT128, NUM_OF_WORDS_INT128, - (reinterpret_cast(&l_val))->x)); - m_type = e_long_double; - } else - error.SetErrorStringWithFormat("'%s' is not a valid float string value", - value_str); - } else { - error.SetErrorStringWithFormat("unsupported float byte size: %" PRIu64 "", - static_cast(byte_size)); - return error; + case eEncodingIEEE754: { + Type type = GetValueTypeForFloatWithByteSize(byte_size); + if (type == e_void) { + error.SetErrorStringWithFormatv("unsupported float byte size: {0}", + byte_size); + break; } + APFloat f(GetFltSemantics(type)); + if (llvm::Expected op = + f.convertFromString(value_str, APFloat::rmNearestTiesToEven)) { + m_type = type; + m_float = std::move(f); + } else + error = op.takeError(); break; + } case eEncodingVector: error.SetErrorString("vector encoding unsupported."); diff --git a/lldb/test/API/CMakeLists.txt b/lldb/test/API/CMakeLists.txt index 9aad9fc750cae..34f3522c8dfec 100644 --- a/lldb/test/API/CMakeLists.txt +++ b/lldb/test/API/CMakeLists.txt @@ -49,6 +49,7 @@ set(LLDB_DEFAULT_TEST_EXECUTABLE "${LLVM_RUNTIME_OUTPUT_INTDIR}/lldb${CMAKE_EXEC # Set the paths to default llvm tools. set(LLDB_DEFAULT_TEST_DSYMUTIL "${LLVM_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin/dsymutil${CMAKE_EXECUTABLE_SUFFIX}") set(LLDB_DEFAULT_TEST_FILECHECK "${LLVM_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin/FileCheck${CMAKE_EXECUTABLE_SUFFIX}") +set(LLDB_DEFAULT_TEST_YAML2OBJ "${LLVM_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin/yaml2obj${CMAKE_EXECUTABLE_SUFFIX}") if (TARGET clang) set(LLDB_DEFAULT_TEST_COMPILER "${LLVM_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin/clang${CMAKE_EXECUTABLE_SUFFIX}") @@ -60,6 +61,7 @@ set(LLDB_TEST_EXECUTABLE "${LLDB_DEFAULT_TEST_EXECUTABLE}" CACHE PATH "lldb exec set(LLDB_TEST_COMPILER "${LLDB_DEFAULT_TEST_COMPILER}" CACHE PATH "C Compiler to use for building LLDB test inferiors") set(LLDB_TEST_DSYMUTIL "${LLDB_DEFAULT_TEST_DSYMUTIL}" CACHE PATH "dsymutil used for generating dSYM bundles") set(LLDB_TEST_FILECHECK "${LLDB_DEFAULT_TEST_FILECHECK}" CACHE PATH "FileCheck used for testing purposes") +set(LLDB_TEST_YAML2OBJ "${LLDB_DEFAULT_TEST_YAML2OBJ}" CACHE PATH "yaml2obj used for testing purposes") if ("${LLDB_TEST_COMPILER}" STREQUAL "") message(FATAL_ERROR "LLDB test compiler not specified. Tests will not run.") @@ -145,6 +147,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_COMPILER "${LLDB_TEST_COMPILER}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_DSYMUTIL "${LLDB_TEST_DSYMUTIL}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_FILECHECK "${LLDB_TEST_FILECHECK}") + string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_YAML2OBJ "${LLDB_TEST_YAML2OBJ}") # Remaining ones must be paths to the provided LLVM build-tree. if(LLVM_CONFIGURATION_TYPES) @@ -172,6 +175,7 @@ string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_EXECUTAB string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_COMPILER "${LLDB_TEST_COMPILER}") string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_DSYMUTIL "${LLDB_TEST_DSYMUTIL}") string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_FILECHECK "${LLDB_TEST_FILECHECK}") +string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_YAML2OBJ "${LLDB_TEST_YAML2OBJ}") # Configure the API test suite. configure_lit_site_cfg( diff --git a/lldb/test/API/commands/frame/recognizer/TestFrameRecognizer.py b/lldb/test/API/commands/frame/recognizer/TestFrameRecognizer.py index 218c7e85aae74..f807937f2f17b 100644 --- a/lldb/test/API/commands/frame/recognizer/TestFrameRecognizer.py +++ b/lldb/test/API/commands/frame/recognizer/TestFrameRecognizer.py @@ -145,6 +145,50 @@ def test_frame_recognizer_multi_symbol(self): self.expect("frame recognizer info 0", substrs=['frame 0 is recognized by recognizer.MyFrameRecognizer']) + @skipUnlessDarwin + def test_frame_recognizer_target_specific(self): + self.build() + exe = self.getBuildArtifact("a.out") + + # Clear internal & plugins recognizers that get initialized at launch + self.runCmd("frame recognizer clear") + + # Create a target. + target, process, thread, _ = lldbutil.run_to_name_breakpoint(self, "foo", + exe_name = exe) + + self.runCmd("command script import " + os.path.join(self.getSourceDir(), "recognizer.py")) + + # Check that this doesn't contain our own FrameRecognizer somehow. + self.expect("frame recognizer list", + matching=False, substrs=['MyFrameRecognizer']) + + # Add a frame recognizer in that target. + self.runCmd("frame recognizer add -l recognizer.MyFrameRecognizer -s a.out -n foo -n bar") + + self.expect("frame recognizer list", + substrs=['recognizer.MyFrameRecognizer, module a.out, symbol foo, symbol bar']) + + self.expect("frame recognizer info 0", + substrs=['frame 0 is recognized by recognizer.MyFrameRecognizer']) + + # Create a second target. That one shouldn't have the frame recognizer. + target, process, thread, _ = lldbutil.run_to_name_breakpoint(self, "bar", + exe_name = exe) + + self.expect("frame recognizer info 0", + substrs=['frame 0 not recognized by any recognizer']) + + # Add a frame recognizer to the new target. + self.runCmd("frame recognizer add -l recognizer.MyFrameRecognizer -s a.out -n bar") + + self.expect("frame recognizer list", + substrs=['recognizer.MyFrameRecognizer, module a.out, symbol bar']) + + # Now the new target should also recognize the frame. + self.expect("frame recognizer info 0", + substrs=['frame 0 is recognized by recognizer.MyFrameRecognizer']) + @no_debug_info_test def test_frame_recognizer_delete_invalid_arg(self): self.expect("frame recognizer delete a", error=True, diff --git a/lldb/test/API/commands/platform/process/list/TestProcessList.py b/lldb/test/API/commands/platform/process/list/TestProcessList.py index ba0193ab1a687..9fc84d4f26e0f 100644 --- a/lldb/test/API/commands/platform/process/list/TestProcessList.py +++ b/lldb/test/API/commands/platform/process/list/TestProcessList.py @@ -25,7 +25,6 @@ def test_process_list_with_args(self): # Spawn a new process popen = self.spawnSubprocess(exe, args=["arg1", "--arg2", "arg3"]) - self.addTearDownHook(self.cleanupSubprocesses) substrs = [str(popen.pid), "TestProcess arg1 --arg2 arg3"] diff --git a/lldb/test/API/commands/process/attach-resume/TestAttachResume.py b/lldb/test/API/commands/process/attach-resume/TestAttachResume.py index 48a281e096a93..ff1bb8c6921d2 100644 --- a/lldb/test/API/commands/process/attach-resume/TestAttachResume.py +++ b/lldb/test/API/commands/process/attach-resume/TestAttachResume.py @@ -33,7 +33,6 @@ def process_attach_continue_interrupt_detach(self): exe = self.getBuildArtifact(exe_name) popen = self.spawnSubprocess(exe) - self.addTearDownHook(self.cleanupSubprocesses) self.runCmd("process attach -p " + str(popen.pid)) diff --git a/lldb/test/API/commands/process/attach/TestProcessAttach.py b/lldb/test/API/commands/process/attach/TestProcessAttach.py index f9b273309956c..4e61675c6fc58 100644 --- a/lldb/test/API/commands/process/attach/TestProcessAttach.py +++ b/lldb/test/API/commands/process/attach/TestProcessAttach.py @@ -29,7 +29,6 @@ def test_attach_to_process_by_id(self): # Spawn a new process popen = self.spawnSubprocess(exe) - self.addTearDownHook(self.cleanupSubprocesses) self.runCmd("process attach -p " + str(popen.pid)) @@ -55,7 +54,6 @@ def test_attach_to_process_from_different_dir_by_id(self): # Spawn a new process popen = self.spawnSubprocess(exe) - self.addTearDownHook(self.cleanupSubprocesses) os.chdir(newdir) self.addTearDownHook(lambda: os.chdir(testdir)) @@ -74,7 +72,6 @@ def test_attach_to_process_by_name(self): # Spawn a new process popen = self.spawnSubprocess(exe) - self.addTearDownHook(self.cleanupSubprocesses) self.runCmd("process attach -n " + exe_name) diff --git a/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py b/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py index a7565ccfeb75a..4a2b0b7cf817d 100644 --- a/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py +++ b/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py @@ -37,7 +37,6 @@ def test_attach_to_process_by_id_denied(self): # Spawn a new process popen = self.spawnSubprocess(exe, [pid_file_path]) - self.addTearDownHook(self.cleanupSubprocesses) pid = lldbutil.wait_for_file_on_target(self, pid_file_path) diff --git a/lldb/test/API/commands/register/register/register_command/TestRegisters.py b/lldb/test/API/commands/register/register/register_command/TestRegisters.py index b0931a7d6977c..9441483816c5f 100644 --- a/lldb/test/API/commands/register/register/register_command/TestRegisters.py +++ b/lldb/test/API/commands/register/register/register_command/TestRegisters.py @@ -457,7 +457,6 @@ def convenience_registers_with_process_attach(self, test_16bit_regs): # Spawn a new process pid = self.spawnSubprocess(exe, ['wait_for_attach']).pid - self.addTearDownHook(self.cleanupSubprocesses) if self.TraceOn(): print("pid of spawned process: %d" % pid) diff --git a/lldb/test/API/commands/target/auto-install-main-executable/TestAutoInstallMainExecutable.py b/lldb/test/API/commands/target/auto-install-main-executable/TestAutoInstallMainExecutable.py index ae5822750d694..8ab84bd3203e4 100644 --- a/lldb/test/API/commands/target/auto-install-main-executable/TestAutoInstallMainExecutable.py +++ b/lldb/test/API/commands/target/auto-install-main-executable/TestAutoInstallMainExecutable.py @@ -48,7 +48,6 @@ def test_target_auto_install_main_executable(self): self.debug_monitor_exe, commandline_args, install_remote=False) - self.addTearDownHook(self.cleanupSubprocesses) # Wait for the new process gets ready. time.sleep(0.1) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSDate.py b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSDate.py index 61394c05f5d56..cdce4798e9863 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSDate.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSDate.py @@ -67,3 +67,6 @@ def nsdate_data_formatter_commands(self): substrs=[ '(CFMutableBitVectorRef) mut_bv = ', '1110 0110 1011 0000 1101 1010 1000 1111 0011 0101 1101 0001 00']) + + self.expect_expr("distant_past", result_summary="0001-01-01 00:00:00 UTC") + self.expect_expr("distant_future", result_summary="4001-01-01 00:00:00 UTC") diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m index a44a7837f7713..169b3aed4f222 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m @@ -663,6 +663,9 @@ int main(int argc, const char *argv[]) { NSDate *date_1970_plus_05 = [NSDate dateWithTimeIntervalSince1970:0.5]; NSDate *date_1970_plus_04 = [NSDate dateWithTimeIntervalSince1970:0.4]; + NSDate *distant_past = [NSDate distantPast]; + NSDate *distant_future = [NSDate distantFuture]; + CFAbsoluteTime date1_abs = CFDateGetAbsoluteTime(date1); CFAbsoluteTime date2_abs = CFDateGetAbsoluteTime(date2); CFAbsoluteTime date3_abs = CFDateGetAbsoluteTime(date3); diff --git a/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py b/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py index 78f3feae6ff63..c6ab4150a6bfd 100644 --- a/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py +++ b/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py @@ -35,7 +35,6 @@ def test(self): # Spawn a new process popen = self.spawnSubprocess(exe, [pid_file_path]) - self.addTearDownHook(self.cleanupSubprocesses) # Wait until process has fully started up. pid = lldbutil.wait_for_file_on_target(self, pid_file_path) diff --git a/lldb/test/API/functionalities/dlopen_other_executable/Makefile b/lldb/test/API/functionalities/dlopen_other_executable/Makefile new file mode 100644 index 0000000000000..113b9fd7d3f18 --- /dev/null +++ b/lldb/test/API/functionalities/dlopen_other_executable/Makefile @@ -0,0 +1,8 @@ +C_SOURCES := main.c +USE_LIBDL := 1 + +other: + $(MAKE) -f $(MAKEFILE_RULES) C_SOURCES=other.c EXE=other +all: other + +include Makefile.rules diff --git a/lldb/test/API/functionalities/dlopen_other_executable/TestDlopenOtherExecutable.py b/lldb/test/API/functionalities/dlopen_other_executable/TestDlopenOtherExecutable.py new file mode 100644 index 0000000000000..2ccfaeaea41af --- /dev/null +++ b/lldb/test/API/functionalities/dlopen_other_executable/TestDlopenOtherExecutable.py @@ -0,0 +1,42 @@ +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class TestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @skipIfRemote + @skipIfWindows + # glibc's dlopen doesn't support opening executables. + # https://sourceware.org/bugzilla/show_bug.cgi?id=11754 + @skipIfLinux + @no_debug_info_test + def test(self): + self.build() + # Launch and stop before the dlopen call. + lldbutil.run_to_source_breakpoint(self, "// break here", lldb.SBFileSpec("main.c")) + + # Delete the breakpoint we no longer need. + self.target().DeleteAllBreakpoints() + + # Check that the executable is the test binary. + self.assertEqual(self.target().GetExecutable().GetFilename(), "a.out") + + # Continue so that dlopen is called. + breakpoint = self.target().BreakpointCreateBySourceRegex( + "// break after dlopen", lldb.SBFileSpec("main.c")) + self.assertNotEqual(breakpoint.GetNumResolvedLocations(), 0) + stopped_threads = lldbutil.continue_to_breakpoint(self.process(), breakpoint) + self.assertEqual(len(stopped_threads), 1) + + # Check that the executable is still the test binary and not "other". + self.assertEqual(self.target().GetExecutable().GetFilename(), "a.out") + + # Kill the process and run the program again. + err = self.process().Kill() + self.assertTrue(err.Success(), str(err)) + + # Test that we hit the breakpoint after dlopen. + lldbutil.run_to_breakpoint_do_run(self, self.target(), breakpoint) diff --git a/lldb/test/API/functionalities/dlopen_other_executable/main.c b/lldb/test/API/functionalities/dlopen_other_executable/main.c new file mode 100644 index 0000000000000..8f21e862a2b58 --- /dev/null +++ b/lldb/test/API/functionalities/dlopen_other_executable/main.c @@ -0,0 +1,10 @@ +#include +#include + +int main() { + int i = 0; // break here + // dlopen the 'other' test executable. + int h = dlopen("other", RTLD_LAZY); + assert(h && "dlopen failed?"); + return i; // break after dlopen +} diff --git a/lldb/test/API/functionalities/dlopen_other_executable/other.c b/lldb/test/API/functionalities/dlopen_other_executable/other.c new file mode 100644 index 0000000000000..237c8ce181774 --- /dev/null +++ b/lldb/test/API/functionalities/dlopen_other_executable/other.c @@ -0,0 +1 @@ +int main() {} diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py b/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py new file mode 100644 index 0000000000000..14891b24249b5 --- /dev/null +++ b/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py @@ -0,0 +1,61 @@ +import lldb +import binascii +import os +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from gdbclientutils import * + + +class TestProcessConnect(GDBRemoteTestBase): + + NO_DEBUG_INFO_TESTCASE = True + + @skipIfWindows + def test_gdb_remote_sync(self): + """Test the gdb-remote command in synchronous mode""" + try: + self.dbg.SetAsync(False) + self.expect("gdb-remote %d" % self.server.port, + substrs=['Process', 'stopped']) + finally: + self.dbg.GetSelectedPlatform().DisconnectRemote() + + @skipIfWindows + @skipIfReproducer # Reproducer don't support async. + def test_gdb_remote_async(self): + """Test the gdb-remote command in asynchronous mode""" + try: + self.dbg.SetAsync(True) + self.expect("gdb-remote %d" % self.server.port, + matching=False, + substrs=['Process', 'stopped']) + lldbutil.expect_state_changes(self, self.dbg.GetListener(), + self.process(), [lldb.eStateStopped]) + finally: + self.dbg.GetSelectedPlatform().DisconnectRemote() + + @skipIfWindows + def test_process_connect_sync(self): + """Test the gdb-remote command in synchronous mode""" + try: + self.dbg.SetAsync(False) + self.expect("process connect connect://localhost:%d" % + self.server.port, + substrs=['Process', 'stopped']) + finally: + self.dbg.GetSelectedPlatform().DisconnectRemote() + + @skipIfWindows + @skipIfReproducer # Reproducer don't support async. + def test_process_connect_async(self): + """Test the gdb-remote command in asynchronous mode""" + try: + self.dbg.SetAsync(True) + self.expect("process connect connect://localhost:%d" % + self.server.port, + matching=False, + substrs=['Process', 'stopped']) + lldbutil.expect_state_changes(self, self.dbg.GetListener(), + self.process(), [lldb.eStateStopped]) + finally: + self.dbg.GetSelectedPlatform().DisconnectRemote() diff --git a/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py b/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py index 394c24b4a8805..3c949792983fe 100644 --- a/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py +++ b/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python import lldb import struct diff --git a/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system2.py b/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system2.py index 438538ca922ed..26864bbc6aa34 100644 --- a/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system2.py +++ b/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system2.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python import lldb import struct diff --git a/lldb/test/API/functionalities/plugins/python_os_plugin/stepping_plugin_threads/operating_system.py b/lldb/test/API/functionalities/plugins/python_os_plugin/stepping_plugin_threads/operating_system.py index ff9a57367a2aa..a91852965f92a 100644 --- a/lldb/test/API/functionalities/plugins/python_os_plugin/stepping_plugin_threads/operating_system.py +++ b/lldb/test/API/functionalities/plugins/python_os_plugin/stepping_plugin_threads/operating_system.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python import lldb import struct diff --git a/lldb/test/API/functionalities/process_group/TestChangeProcessGroup.py b/lldb/test/API/functionalities/process_group/TestChangeProcessGroup.py index 124d13ed97a41..51c0ae75c1a3c 100644 --- a/lldb/test/API/functionalities/process_group/TestChangeProcessGroup.py +++ b/lldb/test/API/functionalities/process_group/TestChangeProcessGroup.py @@ -38,7 +38,6 @@ def test_setpgid(self): (pid_file_path))) popen = self.spawnSubprocess(exe, [pid_file_path]) - self.addTearDownHook(self.cleanupSubprocesses) pid = lldbutil.wait_for_file_on_target(self, pid_file_path) diff --git a/lldb/test/API/functionalities/reproducers/attach/TestReproducerAttach.py b/lldb/test/API/functionalities/reproducers/attach/TestReproducerAttach.py index b02b170a7e3fa..e6bb9c6a16727 100644 --- a/lldb/test/API/functionalities/reproducers/attach/TestReproducerAttach.py +++ b/lldb/test/API/functionalities/reproducers/attach/TestReproducerAttach.py @@ -37,7 +37,6 @@ def test_reproducer_attach(self): pass self.build(dictionary={'EXE': exe}) - self.addTearDownHook(self.cleanupSubprocesses) inferior = self.spawnSubprocess(self.getBuildArtifact(exe), [token]) pid = inferior.pid diff --git a/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py b/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py index 59fb3b6fd3992..8ad9afe32afe9 100644 --- a/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py +++ b/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py @@ -14,29 +14,6 @@ class CreateAfterAttachTestCase(TestBase): mydir = TestBase.compute_mydir(__file__) - @skipIfFreeBSD # Hangs. May be the same as Linux issue llvm.org/pr16229 but - # not yet investigated. Revisit once required functionality - # is implemented for FreeBSD. - # Occasionally hangs on Windows, may be same as other issues. - @skipIfWindows - @skipIfiOSSimulator - @expectedFailureNetBSD - def test_create_after_attach_with_popen(self): - """Test thread creation after process attach.""" - self.build(dictionary=self.getBuildFlags(use_cpp11=False)) - self.create_after_attach(use_fork=False) - - @skipIfFreeBSD # Hangs. Revisit once required functionality is implemented - # for FreeBSD. - @skipIfRemote - @skipIfWindows # Windows doesn't have fork. - @skipIfiOSSimulator - @expectedFailureNetBSD - def test_create_after_attach_with_fork(self): - """Test thread creation after process attach.""" - self.build(dictionary=self.getBuildFlags(use_cpp11=False)) - self.create_after_attach(use_fork=True) - def setUp(self): # Call super's setUp(). TestBase.setUp(self) @@ -45,18 +22,21 @@ def setUp(self): self.break_2 = line_number('main.cpp', '// Set second breakpoint here') self.break_3 = line_number('main.cpp', '// Set third breakpoint here') - def create_after_attach(self, use_fork): + @skipIfFreeBSD # Hangs. May be the same as Linux issue llvm.org/pr16229 but + # not yet investigated. Revisit once required functionality + # is implemented for FreeBSD. + # Occasionally hangs on Windows, may be same as other issues. + @skipIfWindows + @skipIfiOSSimulator + @expectedFailureNetBSD + def test_create_after_attach(self): """Test thread creation after process attach.""" - + self.build(dictionary=self.getBuildFlags(use_cpp11=False)) exe = self.getBuildArtifact("a.out") # Spawn a new process - if use_fork: - pid = self.forkSubprocess(exe) - else: - popen = self.spawnSubprocess(exe) - pid = popen.pid - self.addTearDownHook(self.cleanupSubprocesses) + popen = self.spawnSubprocess(exe) + pid = popen.pid # Attach to the spawned process self.runCmd("process attach -p " + str(pid)) diff --git a/lldb/test/API/lang/cpp/template-specialization-type/Makefile b/lldb/test/API/lang/cpp/template-specialization-type/Makefile new file mode 100644 index 0000000000000..99998b20bcb05 --- /dev/null +++ b/lldb/test/API/lang/cpp/template-specialization-type/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/template-specialization-type/TestTemplateSpecializationType.py b/lldb/test/API/lang/cpp/template-specialization-type/TestTemplateSpecializationType.py new file mode 100644 index 0000000000000..31f0081dc6977 --- /dev/null +++ b/lldb/test/API/lang/cpp/template-specialization-type/TestTemplateSpecializationType.py @@ -0,0 +1,30 @@ +""" +Test value with a template specialization type. +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TemplateSpecializationTypeTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + def test_template_specialization_cast_children(self): + self.build() + lldbutil.run_to_source_breakpoint(self, '// break here', + lldb.SBFileSpec("main.cpp", False)) + + v = self.frame().EvaluateExpression("t") + self.assertEquals(2, v.GetNumChildren()) + self.assertEquals("42", v.GetChildAtIndex(0).GetValue()) + self.assertEquals("21", v.GetChildAtIndex(1).GetValue()) + + # Test a value of the TemplateSpecialization type. We turn + # RecordType into TemplateSpecializationType by casting and + # dereferencing a pointer to a record. + v = self.frame().EvaluateExpression("*((TestObj*)&t)") + self.assertEquals(2, v.GetNumChildren()) + self.assertEquals("42", v.GetChildAtIndex(0).GetValue()) + self.assertEquals("21", v.GetChildAtIndex(1).GetValue()) diff --git a/lldb/test/API/lang/cpp/template-specialization-type/main.cpp b/lldb/test/API/lang/cpp/template-specialization-type/main.cpp new file mode 100644 index 0000000000000..5ef9c4962c853 --- /dev/null +++ b/lldb/test/API/lang/cpp/template-specialization-type/main.cpp @@ -0,0 +1,9 @@ +template struct TestObj { + int f; + T g; +}; + +int main() { + TestObj t{42, 21}; + return t.f + t.g; // break here +} diff --git a/lldb/test/API/lang/cpp/typeof/TestTypeOfDeclTypeExpr.py b/lldb/test/API/lang/cpp/typeof/TestTypeOfDeclTypeExpr.py new file mode 100644 index 0000000000000..9c5289c4fa797 --- /dev/null +++ b/lldb/test/API/lang/cpp/typeof/TestTypeOfDeclTypeExpr.py @@ -0,0 +1,14 @@ +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class TestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @no_debug_info_test + def test(self): + self.expect_expr("int i; __typeof__(i) j = 1; j", result_type="typeof (i)", result_value="1") + self.expect_expr("int i; typeof(i) j = 1; j", result_type="typeof (i)", result_value="1") + self.expect_expr("int i; decltype(i) j = 1; j", result_type="decltype(i)", result_value="1") diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py index 632d883e0da9a..a211abe6cc41c 100644 --- a/lldb/test/API/lit.cfg.py +++ b/lldb/test/API/lit.cfg.py @@ -124,6 +124,7 @@ def find_python_interpreter(): lldb_repro_mode = lit_config.params.get('lldb-run-with-repro', None) if lldb_repro_mode: lit_config.note("Running API tests in {} mode.".format(lldb_repro_mode)) + mkdir_p(config.lldb_reproducer_directory) if lldb_repro_mode == 'capture': config.available_features.add('lldb-repro-capture') elif lldb_repro_mode == 'replay': @@ -182,6 +183,9 @@ def find_python_interpreter(): if config.filecheck: dotest_cmd += ['--filecheck', config.filecheck] +if config.yaml2obj: + dotest_cmd += ['--yaml2obj', config.yaml2obj] + if config.lldb_libs_dir: dotest_cmd += ['--lldb-libs-dir', config.lldb_libs_dir] diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in index e97f867b265b8..866dc1675e7cf 100644 --- a/lldb/test/API/lit.site.cfg.py.in +++ b/lldb/test/API/lit.site.cfg.py.in @@ -30,6 +30,7 @@ config.test_arch = '@LLDB_TEST_ARCH@' config.test_compiler = '@LLDB_TEST_COMPILER@' config.dsymutil = '@LLDB_TEST_DSYMUTIL@' config.filecheck = '@LLDB_TEST_FILECHECK@' +config.yaml2obj = '@LLDB_TEST_YAML2OBJ@' # The API tests use their own module caches. config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-api") config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-api") diff --git a/lldb/test/API/macosx/find-dsym/bundle-with-dot-in-filename/TestBundleWithDotInFilename.py b/lldb/test/API/macosx/find-dsym/bundle-with-dot-in-filename/TestBundleWithDotInFilename.py index 793551259f9aa..2572600a1829f 100644 --- a/lldb/test/API/macosx/find-dsym/bundle-with-dot-in-filename/TestBundleWithDotInFilename.py +++ b/lldb/test/API/macosx/find-dsym/bundle-with-dot-in-filename/TestBundleWithDotInFilename.py @@ -38,7 +38,6 @@ def test_attach_and_check_dsyms(self): self.build() os.chdir(self.getBuildDir()); popen = self.spawnSubprocess(exe) - self.addTearDownHook(self.cleanupSubprocesses) # Give the inferior time to start up, dlopen a bundle, remove the bundle it linked in sleep(5) diff --git a/lldb/test/API/macosx/find-dsym/deep-bundle/TestDeepBundle.py b/lldb/test/API/macosx/find-dsym/deep-bundle/TestDeepBundle.py index 379ff5d0a7ae9..a486c5159f01a 100644 --- a/lldb/test/API/macosx/find-dsym/deep-bundle/TestDeepBundle.py +++ b/lldb/test/API/macosx/find-dsym/deep-bundle/TestDeepBundle.py @@ -36,7 +36,6 @@ def test_attach_and_check_dsyms(self): exe = self.getBuildArtifact(exe_name) self.build() popen = self.spawnSubprocess(exe, [self.getBuildDir()]) - self.addTearDownHook(self.cleanupSubprocesses) # Give the inferior time to start up, dlopen a bundle, remove the bundle it linked in sleep(5) diff --git a/lldb/test/API/macosx/function-starts/TestFunctionStarts.py b/lldb/test/API/macosx/function-starts/TestFunctionStarts.py index 141f4e70930a4..0a983436462ae 100644 --- a/lldb/test/API/macosx/function-starts/TestFunctionStarts.py +++ b/lldb/test/API/macosx/function-starts/TestFunctionStarts.py @@ -53,7 +53,6 @@ def do_function_starts(self, in_memory): (pid_file_path))) popen = self.spawnSubprocess(exe, [pid_file_path]) - self.addTearDownHook(self.cleanupSubprocesses) # Wait until process has fully started up. pid = lldbutil.wait_for_file_on_target(self, pid_file_path) diff --git a/lldb/test/API/macosx/macabi/TestMacABImacOSFramework.py b/lldb/test/API/macosx/macabi/TestMacABImacOSFramework.py index 5346b9904ce50..aff99e3e2804b 100644 --- a/lldb/test/API/macosx/macabi/TestMacABImacOSFramework.py +++ b/lldb/test/API/macosx/macabi/TestMacABImacOSFramework.py @@ -15,6 +15,7 @@ class TestMacABImacOSFramework(TestBase): @skipIfDarwinEmbedded # There is a Clang driver change missing on llvm.org. @expectedFailureAll(bugnumber="rdar://problem/54986190>") + @skipIfReproducer # This is hitting https://bugs.python.org/issue22393 def test_macabi(self): """Test the x86_64-apple-ios-macabi target linked against a macos dylib""" self.build() diff --git a/lldb/test/API/macosx/universal/TestUniversal.py b/lldb/test/API/macosx/universal/TestUniversal.py index 9982edcc77f07..94a056762a2cc 100644 --- a/lldb/test/API/macosx/universal/TestUniversal.py +++ b/lldb/test/API/macosx/universal/TestUniversal.py @@ -137,7 +137,6 @@ def test_process_attach_with_wrong_arch(self): "Our main breakpoint has locations.") popen = self.spawnSubprocess(exe, ["keep_waiting"]) - self.addTearDownHook(self.cleanupSubprocesses) error = lldb.SBError() empty_listener = lldb.SBListener() diff --git a/lldb/test/API/python_api/hello_world/TestHelloWorld.py b/lldb/test/API/python_api/hello_world/TestHelloWorld.py index c1155cf298145..d52b0087a8e6d 100644 --- a/lldb/test/API/python_api/hello_world/TestHelloWorld.py +++ b/lldb/test/API/python_api/hello_world/TestHelloWorld.py @@ -91,7 +91,6 @@ def test_with_attach_to_process_with_id_api(self): if os.path.exists(token): os.remove(token) popen = self.spawnSubprocess(self.getBuildArtifact(exe), [token]) - self.addTearDownHook(self.cleanupSubprocesses) lldbutil.wait_for_file_on_target(self, token) listener = lldb.SBListener("my.attach.listener") @@ -126,7 +125,6 @@ def test_with_attach_to_process_with_name_api(self): if os.path.exists(token): os.remove(token) popen = self.spawnSubprocess(self.getBuildArtifact(exe), [token]) - self.addTearDownHook(self.cleanupSubprocesses) lldbutil.wait_for_file_on_target(self, token) listener = lldb.SBListener("my.attach.listener") diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteKill.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteKill.py index 48f919aa94b90..bab02e7cfc1df 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteKill.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteKill.py @@ -31,7 +31,7 @@ def attach_commandline_kill_after_initial_stop(self): # Wait a moment for completed and now-detached inferior process to # clear. - time.sleep(self._WAIT_TIMEOUT) + time.sleep(self.DEFAULT_SLEEP) if not lldb.remote_platform: # Process should be dead now. Reap results. diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteProcessInfo.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteProcessInfo.py index bc793a36e9985..5ee32a5d18ccf 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteProcessInfo.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteProcessInfo.py @@ -52,7 +52,7 @@ def attach_commandline_qProcessInfo_reports_correct_pid(self): self.add_process_info_collection_packets() # Run the stream - context = self.expect_gdbremote_sequence(timeout_seconds=self._DEFAULT_TIMEOUT) + context = self.expect_gdbremote_sequence() self.assertIsNotNone(context) # Gather process info response diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteThreadsInStopReply.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteThreadsInStopReply.py index 951932863409e..891a0101614a6 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteThreadsInStopReply.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteThreadsInStopReply.py @@ -42,7 +42,7 @@ def gather_stop_reply_fields(self, post_startup_log_lines, thread_count, hw_info = self.parse_hw_info(context) # Give threads time to start up, then break. - time.sleep(self._WAIT_TIMEOUT) + time.sleep(self.DEFAULT_SLEEP) self.reset_test_sequence() self.test_sequence.add_log_lines( [ @@ -60,8 +60,7 @@ def gather_stop_reply_fields(self, post_startup_log_lines, thread_count, self.assertIsNotNone(context) # Wait until all threads have started. - threads = self.wait_for_thread_count(thread_count, - timeout_seconds=self._WAIT_TIMEOUT) + threads = self.wait_for_thread_count(thread_count) self.assertIsNotNone(threads) self.assertEqual(len(threads), thread_count) diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemote_qThreadStopInfo.py b/lldb/test/API/tools/lldb-server/TestGdbRemote_qThreadStopInfo.py index 51dd0cb1a3bd0..d7fd97e693c67 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemote_qThreadStopInfo.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemote_qThreadStopInfo.py @@ -33,7 +33,7 @@ def gather_stop_replies_via_qThreadStopInfo(self, thread_count): self.assertIsNotNone(context) # Give threads time to start up, then break. - time.sleep(self._WAIT_TIMEOUT) + time.sleep(self.DEFAULT_SLEEP) self.reset_test_sequence() self.test_sequence.add_log_lines( [ @@ -51,8 +51,7 @@ def gather_stop_replies_via_qThreadStopInfo(self, thread_count): self.assertIsNotNone(context) # Wait until all threads have started. - threads = self.wait_for_thread_count(thread_count, - timeout_seconds=self._WAIT_TIMEOUT) + threads = self.wait_for_thread_count(thread_count) self.assertIsNotNone(threads) # On Windows, there could be more threads spawned. For example, DebugBreakProcess will diff --git a/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py b/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py index d46123e337c80..154f8b629dcc4 100644 --- a/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py +++ b/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py @@ -642,7 +642,7 @@ def Hg_switches_to_3_threads(self): self.run_process_then_stop(run_seconds=1) # Wait at most x seconds for 3 threads to be present. - threads = self.wait_for_thread_count(3, timeout_seconds=self._WAIT_TIMEOUT) + threads = self.wait_for_thread_count(3) self.assertEqual(len(threads), 3) # verify we can $H to each thead, and $qC matches the thread we set. @@ -723,7 +723,7 @@ def Hc_then_Csignal_signals_correct_thread(self, segfault_signo): # context = self.run_process_then_stop(run_seconds=1) # Wait at most x seconds for all threads to be present. - # threads = self.wait_for_thread_count(NUM_THREADS, timeout_seconds=5) + # threads = self.wait_for_thread_count(NUM_THREADS) # self.assertEquals(len(threads), NUM_THREADS) signaled_tids = {} @@ -739,7 +739,7 @@ def Hc_then_Csignal_signals_correct_thread(self, segfault_signo): 2: "thread_id"}}], True) - context = self.expect_gdbremote_sequence(timeout_seconds=self._DEFAULT_TIMEOUT) + context = self.expect_gdbremote_sequence() self.assertIsNotNone(context) signo = context.get("signo") self.assertEqual(int(signo, 16), segfault_signo) @@ -775,8 +775,7 @@ def Hc_then_Csignal_signals_correct_thread(self, segfault_signo): True) # Run the sequence. - context = self.expect_gdbremote_sequence( - timeout_seconds=self._DEFAULT_TIMEOUT) + context = self.expect_gdbremote_sequence() self.assertIsNotNone(context) # Ensure the stop signal is the signal we delivered. @@ -1491,7 +1490,7 @@ def P_and_p_thread_suffix_work(self): self.assertIsNotNone(context) # Wait for 3 threads to be present. - threads = self.wait_for_thread_count(3, timeout_seconds=self._WAIT_TIMEOUT) + threads = self.wait_for_thread_count(3) self.assertEqual(len(threads), 3) expected_reg_values = [] diff --git a/lldb/test/API/tools/lldb-server/commandline/TestStubReverseConnect.py b/lldb/test/API/tools/lldb-server/commandline/TestStubReverseConnect.py index 664b6001d8dae..a3250ab4f1bfb 100644 --- a/lldb/test/API/tools/lldb-server/commandline/TestStubReverseConnect.py +++ b/lldb/test/API/tools/lldb-server/commandline/TestStubReverseConnect.py @@ -14,8 +14,6 @@ class TestStubReverseConnect(gdbremote_testcase.GdbRemoteTestCaseBase): mydir = TestBase.compute_mydir(__file__) - _DEFAULT_TIMEOUT = 20 * (10 if ('ASAN_OPTIONS' in os.environ) else 1) - def setUp(self): # Set up the test. gdbremote_testcase.GdbRemoteTestCaseBase.setUp(self) @@ -25,11 +23,11 @@ def setUp(self): self.assertIsNotNone(self.listener_socket) self.listener_port = self.listener_socket.getsockname()[1] - def create_listener_socket(self, timeout_seconds=_DEFAULT_TIMEOUT): + def create_listener_socket(self): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.assertIsNotNone(sock) - sock.settimeout(timeout_seconds) + sock.settimeout(self.DEFAULT_TIMEOUT) sock.bind(("127.0.0.1", 0)) sock.listen(1) @@ -77,7 +75,7 @@ def reverse_connect_works(self): address, stub_socket.getsockname())) # Verify we can do the handshake. If that works, we'll call it good. - self.do_handshake(stub_socket, timeout_seconds=self._DEFAULT_TIMEOUT) + self.do_handshake(stub_socket) # Clean up. stub_socket.shutdown(socket.SHUT_RDWR) diff --git a/lldb/test/API/tools/lldb-server/platform-process-connect/TestPlatformProcessConnect.py b/lldb/test/API/tools/lldb-server/platform-process-connect/TestPlatformProcessConnect.py index a9847c66ca12b..c9331e7d09a58 100644 --- a/lldb/test/API/tools/lldb-server/platform-process-connect/TestPlatformProcessConnect.py +++ b/lldb/test/API/tools/lldb-server/platform-process-connect/TestPlatformProcessConnect.py @@ -54,7 +54,6 @@ def test_platform_process_connect(self): self.debug_monitor_exe, commandline_args, install_remote=False) - self.addTearDownHook(self.cleanupSubprocesses) socket_id = lldbutil.wait_for_file_on_target(self, port_file) diff --git a/lldb/test/API/tools/lldb-vscode/attach/TestVSCode_attach.py b/lldb/test/API/tools/lldb-vscode/attach/TestVSCode_attach.py index e49c9267d971a..7955b6a97b04e 100644 --- a/lldb/test/API/tools/lldb-vscode/attach/TestVSCode_attach.py +++ b/lldb/test/API/tools/lldb-vscode/attach/TestVSCode_attach.py @@ -90,7 +90,6 @@ def cleanup(): self.addTearDownHook(cleanup) popen = self.spawnSubprocess(program, [pid_file_path]) - self.addTearDownHook(self.cleanupSubprocesses) pid = lldbutil.wait_for_file_on_target(self, pid_file_path) diff --git a/lldb/test/API/tools/lldb-vscode/launch/TestVSCode_launch.py b/lldb/test/API/tools/lldb-vscode/launch/TestVSCode_launch.py index fb7d71872a16d..b63eb6e7201c1 100644 --- a/lldb/test/API/tools/lldb-vscode/launch/TestVSCode_launch.py +++ b/lldb/test/API/tools/lldb-vscode/launch/TestVSCode_launch.py @@ -431,7 +431,7 @@ def test_extra_launch_commands(self): @skipIfWindows @skipIfNetBSD # Hangs on NetBSD as well @skipIfDarwin - @skipIf(archs="aarch64") # Example of a flaky run http://lab.llvm.org:8011/builders/lldb-aarch64-ubuntu/builds/5540/steps/test/logs/stdio + @skipIf(archs=["arm", "aarch64"]) # Example of a flaky run http://lab.llvm.org:8011/builders/lldb-aarch64-ubuntu/builds/5540/steps/test/logs/stdio def test_terminate_commands(self): ''' Tests that the "terminateCommands", that can be passed during diff --git a/lldb/test/API/tools/lldb-vscode/module/Makefile b/lldb/test/API/tools/lldb-vscode/module/Makefile new file mode 100644 index 0000000000000..1fb944b138937 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/module/Makefile @@ -0,0 +1,13 @@ +DYLIB_NAME := foo +DYLIB_CXX_SOURCES := foo.cpp +CXX_SOURCES := main.cpp + +all: a.out.stripped + +include Makefile.rules + +a.out.stripped: a.out.dSYM + strip -o a.out.stripped a.out +ifneq "$(CODESIGN)" "" + $(CODESIGN) -fs - a.out.stripped +endif diff --git a/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py b/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py new file mode 100644 index 0000000000000..40c4145b38e36 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py @@ -0,0 +1,77 @@ +""" +Test lldb-vscode setBreakpoints request +""" + +from __future__ import print_function + +import unittest2 +import vscode +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import lldbvscode_testcase + +class TestVSCode_module(lldbvscode_testcase.VSCodeTestCaseBase): + + mydir = TestBase.compute_mydir(__file__) + + + @skipIfWindows + @skipUnlessDarwin + @skipIfRemote + def test_modules_event(self): + program_basename = "a.out.stripped" + program= self.getBuildArtifact(program_basename) + self.build_and_launch(program) + functions = ['foo'] + breakpoint_ids = self.set_function_breakpoints(functions) + self.assertEquals(len(breakpoint_ids), len(functions), + 'expect one breakpoint') + self.continue_to_breakpoints(breakpoint_ids) + active_modules = self.vscode.get_active_modules() + self.assertIn(program_basename, active_modules, '%s module is in active modules' % (program_basename)) + program_module = active_modules[program_basename] + self.assertIn('name', program_module, 'make sure name is in module') + self.assertEqual(program_basename, program_module['name']) + self.assertIn('path', program_module, 'make sure path is in module') + self.assertEqual(program, program_module['path']) + self.assertTrue('symbolFilePath' not in program_module, 'Make sure a.out.stripped has no debug info') + self.assertEqual('Symbols not found.', program_module['symbolStatus']) + symbol_path = self.getBuildArtifact("a.out") + self.vscode.request_evaluate('`%s' % ('target symbols add -s "%s" "%s"' % (program, symbol_path))) + + def checkSymbolsLoaded(): + active_modules = self.vscode.get_active_modules() + program_module = active_modules[program_basename] + return 'Symbols loaded.' == program_module['symbolStatus'] + self.waitUntil(checkSymbolsLoaded) + + active_modules = self.vscode.get_active_modules() + program_module = active_modules[program_basename] + self.assertEqual(program_basename, program_module['name']) + self.assertEqual(program, program_module['path']) + self.assertEqual('Symbols loaded.', program_module['symbolStatus']) + self.assertIn('symbolFilePath', program_module) + self.assertEqual(symbol_path, program_module['symbolFilePath']) + self.assertIn('addressRange', program_module) + + @skipIfWindows + @skipUnlessDarwin + @skipIfRemote + def test_compile_units(self): + program= self.getBuildArtifact("a.out") + self.build_and_launch(program) + source = "main.cpp" + main_source_path = self.getSourcePath(source) + breakpoint1_line = line_number(source, '// breakpoint 1') + lines = [breakpoint1_line] + breakpoint_ids = self.set_source_breakpoints(source, lines) + self.continue_to_breakpoints(breakpoint_ids) + moduleId = self.vscode.get_active_modules()['a.out']['id'] + response = self.vscode.request_getCompileUnits(moduleId) + self.assertTrue(response['body']) + self.assertTrue(len(response['body']['compileUnits']) == 1, + 'Only one source file should exist') + self.assertTrue(response['body']['compileUnits'][0]['compileUnitPath'] == main_source_path, + 'Real path to main.cpp matches') + diff --git a/lldb/test/API/tools/lldb-vscode/module/foo.cpp b/lldb/test/API/tools/lldb-vscode/module/foo.cpp new file mode 100644 index 0000000000000..9dba85a9cccab --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/module/foo.cpp @@ -0,0 +1,3 @@ +int foo() { + return 12; +} diff --git a/lldb/test/API/tools/lldb-vscode/module/foo.h b/lldb/test/API/tools/lldb-vscode/module/foo.h new file mode 100644 index 0000000000000..5d5f8f0c9e786 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/module/foo.h @@ -0,0 +1 @@ +int foo(); diff --git a/lldb/test/API/tools/lldb-vscode/module/main.cpp b/lldb/test/API/tools/lldb-vscode/module/main.cpp new file mode 100644 index 0000000000000..4ff2b2360eb97 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/module/main.cpp @@ -0,0 +1,6 @@ +#include "foo.h" + +int main(int argc, char const *argv[]) { + foo(); + return 0; // breakpoint 1 +} diff --git a/lldb/test/API/use_lldb_suite.py b/lldb/test/API/use_lldb_suite.py index 6a8c12d81898c..f1edf1d7304f1 100644 --- a/lldb/test/API/use_lldb_suite.py +++ b/lldb/test/API/use_lldb_suite.py @@ -8,21 +8,21 @@ def find_lldb_root(): os.path.abspath(inspect.getfile(inspect.currentframe())) ) while True: - lldb_root = os.path.dirname(lldb_root) - if lldb_root is None: - return None + parent = os.path.dirname(lldb_root) + if parent == lldb_root: # dirname('/') == '/' + raise Exception("use_lldb_suite_root.py not found") + lldb_root = parent test_path = os.path.join(lldb_root, "use_lldb_suite_root.py") if os.path.isfile(test_path): return lldb_root - return None lldb_root = find_lldb_root() -if lldb_root is not None: - import imp - fp, pathname, desc = imp.find_module("use_lldb_suite_root", [lldb_root]) - try: - imp.load_module("use_lldb_suite_root", fp, pathname, desc) - finally: - if fp: - fp.close() + +import imp +fp, pathname, desc = imp.find_module("use_lldb_suite_root", [lldb_root]) +try: + imp.load_module("use_lldb_suite_root", fp, pathname, desc) +finally: + if fp: + fp.close() diff --git a/lldb/test/Shell/Expr/TestTypeOfDeclTypeExpr.test b/lldb/test/Shell/Expr/TestTypeOfDeclTypeExpr.test deleted file mode 100644 index c156ae556a714..0000000000000 --- a/lldb/test/Shell/Expr/TestTypeOfDeclTypeExpr.test +++ /dev/null @@ -1,13 +0,0 @@ -# RUN: %lldb -b -s %s | FileCheck %s - -expression int i; __typeof__(i) j = 1; j -# CHECK: (lldb) expression int i; __typeof__(i) j = 1; j -# CHECK-NEXT: (typeof (i)) {{.*}} = 1 - -expression int i; typeof(i) j = 1; j -# CHECK: (lldb) expression int i; typeof(i) j = 1; j -# CHECK-NEXT: (typeof (i)) {{.*}} = 1 - -expression int i; decltype(i) j = 1; j -# CHECK: (lldb) expression int i; decltype(i) j = 1; j -# CHECK-NEXT: (decltype(i)) {{.*}} = 1 diff --git a/lldb/test/Shell/SymbolFile/DWARF/debug_line-tombstone.s b/lldb/test/Shell/SymbolFile/DWARF/debug_line-tombstone.s new file mode 100644 index 0000000000000..53600ac5f4b1b --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/debug_line-tombstone.s @@ -0,0 +1,106 @@ +# This test that we don't get confused by line tables containing a tombstone +# (-1) value, as produced by recent lld's. Line sequences with the tombstone +# value should be completely ignored. The tombstone sequence is deliberately +# longer so that any attempt at an address binary search will likely land inside +# the sequence. + +# RUN: llvm-mc --filetype=obj --triple=x86_64-pc-linux %s -o %t +# RUN: %lldb -o "image lookup -n main -v" -o "image dump line-table main.cpp" \ +# RUN: -o exit %t | FileCheck %s + +# CHECK-LABEL: image lookup -n main -v +# CHECK: LineEntry: [0x0000000000001000-0x0000000000001001): main.cpp:1 +# CHECK-LABEL: image dump line-table main.cpp +# CHECK-NEXT: Line table for main.cpp +# CHECK-NEXT: 0x0000000000001000: main.cpp:1 +# CHECK-NEXT: 0x0000000000001001: main.cpp:1 +# CHECK-EMPTY: +# CHECK-NEXT: exit + + .text +.space 0x1000 +main: + nop +.Lmain_end: + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 37 # DW_AT_producer + .byte 8 # DW_FORM_string + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long 0 # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0xc4 DW_TAG_compile_unit + .asciz "Hand-written DWARF" # DW_AT_producer + .asciz "main.cpp" # DW_AT_name + .long 0 # DW_AT_stmt_list + .quad main-.text # DW_AT_low_pc + .long .Lmain_end-main # DW_AT_high_pc +.Ldebug_info_end0: + +.section .debug_line,"",@progbits + .long .Llt1_end - .Llt1_start # Length of Unit (DWARF-32 format) +.Llt1_start: + .short 4 # DWARF version number + .long .Lprologue1_end-.Lprologue1_start # Length of Prologue +.Lprologue1_start: + .byte 1 # Minimum Instruction Length + .byte 1 # Maximum Operations per Instruction + .byte 1 # Default is_stmt + .byte -5 # Line Base + .byte 14 # Line Range + .byte 13 # Opcode Base + .byte 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 # Standard Opcode Lengths + .byte 0 + .asciz "main.cpp" # File table + .byte 0, 0, 0 + .byte 0 +.Lprologue1_end: + .byte 0, 9, 2 # DW_LNE_set_address + .quad -1 + .byte 1 # DW_LNS_copy + .byte 33 # address += 1, line += 1 + .byte 33 # address += 1, line += 1 + .byte 33 # address += 1, line += 1 + .byte 33 # address += 1, line += 1 + .byte 33 # address += 1, line += 1 + .byte 33 # address += 1, line += 1 + .byte 33 # address += 1, line += 1 + .byte 33 # address += 1, line += 1 + .byte 33 # address += 1, line += 1 + .byte 33 # address += 1, line += 1 + .byte 33 # address += 1, line += 1 + .byte 33 # address += 1, line += 1 + .byte 33 # address += 1, line += 1 + .byte 33 # address += 1, line += 1 + .byte 33 # address += 1, line += 1 + .byte 2 # DW_LNS_advance_pc + .uleb128 1 + .byte 0, 1, 1 # DW_LNE_end_sequence + + .byte 0, 9, 2 # DW_LNE_set_address + .quad main-.text + .byte 18 # address += 0, line += 0 + .byte 2 # DW_LNS_advance_pc + .uleb128 1 + .byte 0, 1, 1 # DW_LNE_end_sequence +.Llt1_end: + diff --git a/lldb/test/Shell/helper/build.py b/lldb/test/Shell/helper/build.py index 3de2f33503185..5689373d37a51 100755 --- a/lldb/test/Shell/helper/build.py +++ b/lldb/test/Shell/helper/build.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python +#!/usr/bin/env python from __future__ import print_function diff --git a/lldb/third_party/Python/module/progress/progress.py b/lldb/third_party/Python/module/progress/progress.py index 3397cf0430176..e4bd9d5fd5b4c 100644 --- a/lldb/third_party/Python/module/progress/progress.py +++ b/lldb/third_party/Python/module/progress/progress.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python from __future__ import print_function diff --git a/lldb/tools/debugserver/source/CMakeLists.txt b/lldb/tools/debugserver/source/CMakeLists.txt index 9a7e2eb9a1a0f..b29b3ddc3056c 100644 --- a/lldb/tools/debugserver/source/CMakeLists.txt +++ b/lldb/tools/debugserver/source/CMakeLists.txt @@ -41,7 +41,7 @@ function(get_debugserver_codesign_identity result) return() endif() - message(WARNING "Development code sign identiy not found: 'lldb_codesign' ${not_found_help}") + message(WARNING "Development code sign identity not found: 'lldb_codesign' ${not_found_help}") # LLVM pendant: fallback if available if(LLVM_CODESIGNING_IDENTITY) diff --git a/lldb/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.h b/lldb/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.h index 43594e890f5f2..a702ea52e8536 100644 --- a/lldb/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.h +++ b/lldb/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.h @@ -31,38 +31,38 @@ class DNBArchImplI386 : public DNBArchProtocol { static void Initialize(); - virtual bool GetRegisterValue(uint32_t set, uint32_t reg, - DNBRegisterValue *value); - virtual bool SetRegisterValue(uint32_t set, uint32_t reg, - const DNBRegisterValue *value); - virtual nub_size_t GetRegisterContext(void *buf, nub_size_t buf_len); - virtual nub_size_t SetRegisterContext(const void *buf, nub_size_t buf_len); - virtual uint32_t SaveRegisterState(); - virtual bool RestoreRegisterState(uint32_t save_id); - - virtual kern_return_t GetRegisterState(int set, bool force); - virtual kern_return_t SetRegisterState(int set); - virtual bool RegisterSetStateIsValid(int set) const; - - virtual uint64_t GetPC(uint64_t failValue); // Get program counter - virtual kern_return_t SetPC(uint64_t value); - virtual uint64_t GetSP(uint64_t failValue); // Get stack pointer - virtual void ThreadWillResume(); - virtual bool ThreadDidStop(); - virtual bool NotifyException(MachException::Data &exc); - - virtual uint32_t NumSupportedHardwareBreakpoints(); - virtual uint32_t NumSupportedHardwareWatchpoints(); - virtual uint32_t EnableHardwareBreakpoint(nub_addr_t addr, nub_size_t size, - bool also_set_on_task); - virtual bool DisableHardwareBreakpoint(uint32_t hw_index, - bool also_set_on_task); - virtual uint32_t EnableHardwareWatchpoint(nub_addr_t addr, nub_size_t size, - bool read, bool write, - bool also_set_on_task); - virtual bool DisableHardwareWatchpoint(uint32_t hw_break_index, - bool also_set_on_task); - virtual uint32_t GetHardwareWatchpointHit(nub_addr_t &addr); + bool GetRegisterValue(uint32_t set, uint32_t reg, + DNBRegisterValue *value) override; + bool SetRegisterValue(uint32_t set, uint32_t reg, + const DNBRegisterValue *value) override; + nub_size_t GetRegisterContext(void *buf, nub_size_t buf_len) override; + nub_size_t SetRegisterContext(const void *buf, nub_size_t buf_len) override; + uint32_t SaveRegisterState() override; + bool RestoreRegisterState(uint32_t save_id) override; + + kern_return_t GetRegisterState(int set, bool force) override; + kern_return_t SetRegisterState(int set) override; + bool RegisterSetStateIsValid(int set) const override; + + uint64_t GetPC(uint64_t failValue) override; // Get program counter + kern_return_t SetPC(uint64_t value) override; + uint64_t GetSP(uint64_t failValue) override; // Get stack pointer + void ThreadWillResume() override; + bool ThreadDidStop() override; + bool NotifyException(MachException::Data &exc) override; + + uint32_t NumSupportedHardwareBreakpoints() override; + uint32_t NumSupportedHardwareWatchpoints() override; + uint32_t EnableHardwareBreakpoint(nub_addr_t addr, nub_size_t size, + bool also_set_on_task) override; + bool DisableHardwareBreakpoint(uint32_t hw_index, + bool also_set_on_task) override; + uint32_t EnableHardwareWatchpoint(nub_addr_t addr, nub_size_t size, + bool read, bool write, + bool also_set_on_task) override; + bool DisableHardwareWatchpoint(uint32_t hw_break_index, + bool also_set_on_task) override; + uint32_t GetHardwareWatchpointHit(nub_addr_t &addr) override; protected: kern_return_t EnableHardwareSingleStep(bool enable); @@ -228,9 +228,9 @@ class DNBArchImplI386 : public DNBArchProtocol { static bool IsWatchpointHit(const DBG &debug_state, uint32_t hw_index); static nub_addr_t GetWatchAddress(const DBG &debug_state, uint32_t hw_index); - virtual bool StartTransForHWP(); - virtual bool RollbackTransForHWP(); - virtual bool FinishTransForHWP(); + bool StartTransForHWP() override; + bool RollbackTransForHWP() override; + bool FinishTransForHWP() override; DBG GetDBGCheckpoint(); MachThread *m_thread; diff --git a/lldb/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.h b/lldb/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.h index 0ed433f7f3a2d..96da02a4c9ff9 100644 --- a/lldb/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.h +++ b/lldb/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.h @@ -30,39 +30,39 @@ class DNBArchImplX86_64 : public DNBArchProtocol { static void Initialize(); - virtual bool GetRegisterValue(uint32_t set, uint32_t reg, - DNBRegisterValue *value); - virtual bool SetRegisterValue(uint32_t set, uint32_t reg, - const DNBRegisterValue *value); - virtual nub_size_t GetRegisterContext(void *buf, nub_size_t buf_len); - virtual nub_size_t SetRegisterContext(const void *buf, nub_size_t buf_len); - virtual uint32_t SaveRegisterState(); - virtual bool RestoreRegisterState(uint32_t save_id); - - virtual kern_return_t GetRegisterState(int set, bool force); - virtual kern_return_t SetRegisterState(int set); - virtual bool RegisterSetStateIsValid(int set) const; - - virtual uint64_t GetPC(uint64_t failValue); // Get program counter - virtual kern_return_t SetPC(uint64_t value); - virtual uint64_t GetSP(uint64_t failValue); // Get stack pointer - virtual void ThreadWillResume(); - virtual bool ThreadDidStop(); - virtual bool NotifyException(MachException::Data &exc); - - virtual uint32_t NumSupportedHardwareBreakpoints(); - virtual uint32_t NumSupportedHardwareWatchpoints(); - - virtual uint32_t EnableHardwareBreakpoint(nub_addr_t addr, nub_size_t size, - bool also_set_on_task); - virtual bool DisableHardwareBreakpoint(uint32_t hw_break_index, - bool also_set_on_task); - virtual uint32_t EnableHardwareWatchpoint(nub_addr_t addr, nub_size_t size, - bool read, bool write, - bool also_set_on_task); - virtual bool DisableHardwareWatchpoint(uint32_t hw_break_index, - bool also_set_on_task); - virtual uint32_t GetHardwareWatchpointHit(nub_addr_t &addr); + bool GetRegisterValue(uint32_t set, uint32_t reg, + DNBRegisterValue *value) override; + bool SetRegisterValue(uint32_t set, uint32_t reg, + const DNBRegisterValue *value) override; + nub_size_t GetRegisterContext(void *buf, nub_size_t buf_len) override; + nub_size_t SetRegisterContext(const void *buf, nub_size_t buf_len) override; + uint32_t SaveRegisterState() override; + bool RestoreRegisterState(uint32_t save_id) override; + + kern_return_t GetRegisterState(int set, bool force) override; + kern_return_t SetRegisterState(int set) override; + bool RegisterSetStateIsValid(int set) const override; + + uint64_t GetPC(uint64_t failValue) override; // Get program counter + kern_return_t SetPC(uint64_t value) override; + uint64_t GetSP(uint64_t failValue) override; // Get stack pointer + void ThreadWillResume() override; + bool ThreadDidStop() override; + bool NotifyException(MachException::Data &exc) override; + + uint32_t NumSupportedHardwareBreakpoints() override; + uint32_t NumSupportedHardwareWatchpoints() override; + + uint32_t EnableHardwareBreakpoint(nub_addr_t addr, nub_size_t size, + bool also_set_on_task) override; + bool DisableHardwareBreakpoint(uint32_t hw_break_index, + bool also_set_on_task) override; + uint32_t EnableHardwareWatchpoint(nub_addr_t addr, nub_size_t size, + bool read, bool write, + bool also_set_on_task) override; + bool DisableHardwareWatchpoint(uint32_t hw_break_index, + bool also_set_on_task) override; + uint32_t GetHardwareWatchpointHit(nub_addr_t &addr) override; protected: kern_return_t EnableHardwareSingleStep(bool enable); @@ -232,9 +232,9 @@ class DNBArchImplX86_64 : public DNBArchProtocol { static bool IsWatchpointHit(const DBG &debug_state, uint32_t hw_index); static nub_addr_t GetWatchAddress(const DBG &debug_state, uint32_t hw_index); - virtual bool StartTransForHWP(); - virtual bool RollbackTransForHWP(); - virtual bool FinishTransForHWP(); + bool StartTransForHWP() override; + bool RollbackTransForHWP() override; + bool FinishTransForHWP() override; DBG GetDBGCheckpoint(); MachThread *m_thread; diff --git a/lldb/tools/intel-features/CMakeLists.txt b/lldb/tools/intel-features/CMakeLists.txt index efba2f74904f7..e5f3bbfaf11a7 100644 --- a/lldb/tools/intel-features/CMakeLists.txt +++ b/lldb/tools/intel-features/CMakeLists.txt @@ -56,7 +56,7 @@ add_lldb_library(lldbIntelFeatures SHARED LINK_LIBS ${FEATURE_LIBS} - ${PYTHON_LIBRARY} + ${PYTHON_LIBRARIES} ) # Add link dependencies for python wrapper diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp index 8fcf179b29aad..1ebaa5c377121 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.cpp +++ b/lldb/tools/lldb-vscode/JSONUtils.cpp @@ -327,6 +327,42 @@ llvm::json::Value CreateBreakpoint(lldb::SBBreakpoint &bp, return llvm::json::Value(std::move(object)); } +llvm::json::Value CreateModule(lldb::SBModule &module) { + llvm::json::Object object; + if (!module.IsValid()) + return llvm::json::Value(std::move(object)); + const char *uuid = module.GetUUIDString(); + object.try_emplace("id", uuid ? std::string(uuid) : std::string("")); + object.try_emplace("name", std::string(module.GetFileSpec().GetFilename())); + char module_path_arr[PATH_MAX]; + module.GetFileSpec().GetPath(module_path_arr, sizeof(module_path_arr)); + std::string module_path(module_path_arr); + object.try_emplace("path", module_path); + if (module.GetNumCompileUnits() > 0) { + object.try_emplace("symbolStatus", "Symbols loaded."); + char symbol_path_arr[PATH_MAX]; + module.GetSymbolFileSpec().GetPath(symbol_path_arr, sizeof(symbol_path_arr)); + std::string symbol_path(symbol_path_arr); + object.try_emplace("symbolFilePath", symbol_path); + } else { + object.try_emplace("symbolStatus", "Symbols not found."); + } + std::string loaded_addr = std::to_string( + module.GetObjectFileHeaderAddress().GetLoadAddress(g_vsc.target)); + object.try_emplace("addressRange", loaded_addr); + std::string version_str; + uint32_t version_nums[3]; + uint32_t num_versions = module.GetVersion(version_nums, sizeof(version_nums)/sizeof(uint32_t)); + for (uint32_t i=0; i request_path, llvm::Optional request_line) { @@ -902,4 +938,13 @@ llvm::json::Value CreateVariable(lldb::SBValue v, int64_t variablesReference, return llvm::json::Value(std::move(object)); } +llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit) { + llvm::json::Object object; + char unit_path_arr[PATH_MAX]; + unit.GetFileSpec().GetPath(unit_path_arr, sizeof(unit_path_arr)); + std::string unit_path(unit_path_arr); + object.try_emplace("compileUnitPath", unit_path); + return llvm::json::Value(std::move(object)); +} + } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/JSONUtils.h b/lldb/tools/lldb-vscode/JSONUtils.h index af76683d11cc8..e2ccfdb1fb2b6 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.h +++ b/lldb/tools/lldb-vscode/JSONUtils.h @@ -13,6 +13,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/JSON.h" #include "VSCodeForward.h" +#include "lldb/API/SBModule.h" namespace lldb_vscode { @@ -237,6 +238,16 @@ CreateBreakpoint(lldb::SBBreakpoint &bp, llvm::Optional request_path = llvm::None, llvm::Optional request_line = llvm::None); +/// Converts a LLDB module to a VS Code DAP module for use in "modules" events. +/// +/// \param[in] module +/// A LLDB module object to convert into a JSON value +/// +/// \return +/// A "Module" JSON object with that follows the formal JSON +/// definition outlined by Microsoft. +llvm::json::Value CreateModule(lldb::SBModule &module); + /// Create a "Event" JSON object using \a event_name as the event name /// /// \param[in] event_name @@ -430,6 +441,8 @@ llvm::json::Value CreateThreadStopped(lldb::SBThread &thread, uint32_t stop_id); llvm::json::Value CreateVariable(lldb::SBValue v, int64_t variablesReference, int64_t varID, bool format_hex); +llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit); + } // namespace lldb_vscode #endif diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-vscode/VSCode.cpp index b2d16f96d1f2b..4a30aef3a6db4 100644 --- a/lldb/tools/lldb-vscode/VSCode.cpp +++ b/lldb/tools/lldb-vscode/VSCode.cpp @@ -358,6 +358,11 @@ void VSCode::SetTarget(const lldb::SBTarget target) { lldb::SBTarget::eBroadcastBitBreakpointChanged); listener.StartListeningForEvents(this->broadcaster, eBroadcastBitStopEventThread); + listener.StartListeningForEvents( + this->target.GetBroadcaster(), + lldb::SBTarget::eBroadcastBitModulesLoaded | + lldb::SBTarget::eBroadcastBitModulesUnloaded | + lldb::SBTarget::eBroadcastBitSymbolsLoaded); } } diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index 168873f827527..27ee832677d72 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include "llvm/ADT/ArrayRef.h" #include "llvm/Option/Arg.h" @@ -434,6 +435,30 @@ void EventThreadFunction() { g_vsc.SendJSON(llvm::json::Value(std::move(bp_event))); } } + } else if (lldb::SBTarget::EventIsTargetEvent(event)) { + if (event_mask & lldb::SBTarget::eBroadcastBitModulesLoaded || + event_mask & lldb::SBTarget::eBroadcastBitModulesUnloaded || + event_mask & lldb::SBTarget::eBroadcastBitSymbolsLoaded) { + int num_modules = lldb::SBTarget::GetNumModulesFromEvent(event); + for (int i = 0; i < num_modules; i++) { + auto module = lldb::SBTarget::GetModuleAtIndexFromEvent(i, event); + auto module_event = CreateEventObject("module"); + llvm::json::Value module_value = CreateModule(module); + llvm::json::Object body; + if (event_mask & lldb::SBTarget::eBroadcastBitModulesLoaded) { + body.try_emplace("reason", "new"); + } else if (event_mask & + lldb::SBTarget::eBroadcastBitModulesUnloaded) { + body.try_emplace("reason", "removed"); + } else if (event_mask & + lldb::SBTarget::eBroadcastBitSymbolsLoaded) { + body.try_emplace("reason", "changed"); + } + body.try_emplace("module", module_value); + module_event.try_emplace("body", std::move(body)); + g_vsc.SendJSON(llvm::json::Value(std::move(module_event))); + } + } } else if (event.BroadcasterMatchesRef(g_vsc.broadcaster)) { if (event_mask & eBroadcastBitStopEventThread) { done = true; @@ -1149,6 +1174,72 @@ void request_evaluate(const llvm::json::Object &request) { g_vsc.SendJSON(llvm::json::Value(std::move(response))); } +// "getCompileUnitsRequest": { +// "allOf": [ { "$ref": "#/definitions/Request" }, { +// "type": "object", +// "description": "Compile Unit request; value of command field is +// 'getCompileUnits'.", +// "properties": { +// "command": { +// "type": "string", +// "enum": [ "getCompileUnits" ] +// }, +// "arguments": { +// "$ref": "#/definitions/getCompileUnitRequestArguments" +// } +// }, +// "required": [ "command", "arguments" ] +// }] +// }, +// "getCompileUnitsRequestArguments": { +// "type": "object", +// "description": "Arguments for 'getCompileUnits' request.", +// "properties": { +// "moduleId": { +// "type": "string", +// "description": "The ID of the module." +// } +// }, +// "required": [ "moduleId" ] +// }, +// "getCompileUnitsResponse": { +// "allOf": [ { "$ref": "#/definitions/Response" }, { +// "type": "object", +// "description": "Response to 'getCompileUnits' request.", +// "properties": { +// "body": { +// "description": "Response to 'getCompileUnits' request. Array of +// paths of compile units." +// } +// } +// }] +// } + +void request_getCompileUnits(const llvm::json::Object &request) { + llvm::json::Object response; + FillResponse(request, response); + lldb::SBProcess process = g_vsc.target.GetProcess(); + llvm::json::Object body; + llvm::json::Array units; + auto arguments = request.getObject("arguments"); + std::string module_id = std::string(GetString(arguments, "moduleId")); + int num_modules = g_vsc.target.GetNumModules(); + for (int i = 0; i < num_modules; i++) { + auto curr_module = g_vsc.target.GetModuleAtIndex(i); + if (module_id == curr_module.GetUUIDString()) { + int num_units = curr_module.GetNumCompileUnits(); + for (int j = 0; j < num_units; j++) { + auto curr_unit = curr_module.GetCompileUnitAtIndex(j);\ + units.emplace_back(CreateCompileUnit(curr_unit));\ + } + body.try_emplace("compileUnits", std::move(units)); + break; + } + } + response.try_emplace("body", std::move(body)); + g_vsc.SendJSON(llvm::json::Value(std::move(response))); +} + // "InitializeRequest": { // "allOf": [ { "$ref": "#/definitions/Request" }, { // "type": "object", @@ -2734,6 +2825,7 @@ const std::map &GetRequestHandlers() { REQUEST_CALLBACK(disconnect), REQUEST_CALLBACK(evaluate), REQUEST_CALLBACK(exceptionInfo), + REQUEST_CALLBACK(getCompileUnits), REQUEST_CALLBACK(initialize), REQUEST_CALLBACK(launch), REQUEST_CALLBACK(next), diff --git a/lldb/unittests/Core/CMakeLists.txt b/lldb/unittests/Core/CMakeLists.txt index a2cc5a7f1f6d5..de99856486f1e 100644 --- a/lldb/unittests/Core/CMakeLists.txt +++ b/lldb/unittests/Core/CMakeLists.txt @@ -1,6 +1,7 @@ add_lldb_unittest(LLDBCoreTests CommunicationTest.cpp MangledTest.cpp + ModuleSpecTest.cpp RichManglingContextTest.cpp SourceManagerTest.cpp StreamCallbackTest.cpp @@ -11,6 +12,8 @@ add_lldb_unittest(LLDBCoreTests lldbHost lldbSymbol lldbPluginObjectFileELF + lldbPluginObjectFileMachO + lldbPluginObjectFilePECOFF lldbPluginSymbolFileSymtab lldbUtilityHelpers LLVMTestingSupport diff --git a/lldb/unittests/Core/MangledTest.cpp b/lldb/unittests/Core/MangledTest.cpp index 5e667d1ada8c0..6e1bdd59978d8 100644 --- a/lldb/unittests/Core/MangledTest.cpp +++ b/lldb/unittests/Core/MangledTest.cpp @@ -165,8 +165,7 @@ TEST(MangledTest, NameIndexes_FindFunctionSymbols) { )"); ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded()); - ModuleSpec Spec{FileSpec(ExpectedFile->name())}; - auto M = std::make_shared(Spec); + auto M = std::make_shared(ExpectedFile->moduleSpec()); auto Count = [M](const char *Name, FunctionNameType Type) -> int { SymbolContextList SymList; diff --git a/lldb/unittests/Core/ModuleSpecTest.cpp b/lldb/unittests/Core/ModuleSpecTest.cpp new file mode 100644 index 0000000000000..f9e19ed35acc4 --- /dev/null +++ b/lldb/unittests/Core/ModuleSpecTest.cpp @@ -0,0 +1,166 @@ +//===-- ModuleSpecTest.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TestingSupport/SubsystemRAII.h" +#include "TestingSupport/TestUtilities.h" + +#include "lldb/Core/Module.h" +#include "lldb/Core/ModuleSpec.h" +#include "lldb/Utility/DataBuffer.h" + +#include "Plugins/ObjectFile/ELF/ObjectFileELF.h" +#include "Plugins/ObjectFile/Mach-O/ObjectFileMachO.h" +#include "Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h" + +#include "gtest/gtest.h" + +using namespace lldb; +using namespace lldb_private; + +extern const char *TestMainArgv0; + +// This test file intentionally doesn't initialize the FileSystem. +// Everything in this file should be able to run without requiring +// any interaction with the FileSystem class; by keeping it +// uninitialized, it will assert if anything tries to interact with +// it. + +TEST(ModuleSpecTest, InvalidInMemoryBuffer) { + uint8_t Invalid[] = "This is not a binary file."; + DataBufferSP InvalidBufferSP = + std::make_shared(Invalid, sizeof(Invalid)); + ModuleSpec Spec(FileSpec(), UUID(), InvalidBufferSP); + + auto InvalidModuleSP = std::make_shared(Spec); + ASSERT_EQ(InvalidModuleSP->GetObjectFile(), nullptr); +} + +TEST(ModuleSpecTest, InvalidInMemoryBufferValidFile) { + uint8_t Invalid[] = "This is not a binary file."; + DataBufferSP InvalidBufferSP = + std::make_shared(Invalid, sizeof(Invalid)); + ModuleSpec Spec(FileSpec(TestMainArgv0), UUID(), InvalidBufferSP); + + auto InvalidModuleSP = std::make_shared(Spec); + ASSERT_EQ(InvalidModuleSP->GetObjectFile(), nullptr); +} + +TEST(ModuleSpecTest, TestELFFile) { + SubsystemRAII subsystems; + + auto ExpectedFile = TestFile::fromYaml(R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + AddressAlign: 0x0000000000000010 +... +)"); + ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded()); + + auto M = std::make_shared(ExpectedFile->moduleSpec()); + ObjectFile *OF = M->GetObjectFile(); + + ASSERT_EQ(llvm::isa(OF), true); +} + +TEST(ModuleSpecTest, TestCOFFFile) { + SubsystemRAII subsystems; + + auto ExpectedFile = TestFile::fromYaml(R"( +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 0 + ImageBase: 16777216 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 6 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 6 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI + DLLCharacteristics: [ IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA, IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE, IMAGE_DLL_CHARACTERISTICS_NX_COMPAT ] + SizeOfStackReserve: 1048576 + SizeOfStackCommit: 4096 + SizeOfHeapReserve: 1048576 + SizeOfHeapCommit: 4096 +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_LARGE_ADDRESS_AWARE ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + VirtualAddress: 4096 + VirtualSize: 4096 +symbols: [] +... +)"); + ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded()); + + auto M = std::make_shared(ExpectedFile->moduleSpec()); + ObjectFile *OF = M->GetObjectFile(); + + ASSERT_EQ(llvm::isa(OF), true); +} + +TEST(ModuleSpecTest, TestMachOFile) { + SubsystemRAII subsystems; + + auto ExpectedFile = TestFile::fromYaml(R"( +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x0100000C + cpusubtype: 0x00000000 + filetype: 0x00000001 + ncmds: 1 + sizeofcmds: 232 + flags: 0x00002000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 232 + segname: '' + vmaddr: 0 + vmsize: 56 + fileoff: 392 + filesize: 56 + maxprot: 7 + initprot: 7 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 24 + offset: 0x00000188 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 +... +)"); + ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded()); + + auto M = std::make_shared(ExpectedFile->moduleSpec()); + ObjectFile *OF = M->GetObjectFile(); + + ASSERT_EQ(llvm::isa(OF), true); +} diff --git a/lldb/unittests/ObjectFile/CMakeLists.txt b/lldb/unittests/ObjectFile/CMakeLists.txt index a9b42ea3199d7..b5d248e3965d3 100644 --- a/lldb/unittests/ObjectFile/CMakeLists.txt +++ b/lldb/unittests/ObjectFile/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(Breakpad) add_subdirectory(ELF) +add_subdirectory(MachO) add_subdirectory(PECOFF) diff --git a/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp b/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp index b9a650d5fafaf..9718ad3d27e95 100644 --- a/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp +++ b/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp @@ -91,10 +91,7 @@ TEST_F(ObjectFileELFTest, SectionsResolveConsistently) { )"); ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded()); - ModuleSpec spec{FileSpec(ExpectedFile->name())}; - spec.GetSymbolFileSpec().SetFile(ExpectedFile->name(), - FileSpec::Style::native); - auto module_sp = std::make_shared(spec); + auto module_sp = std::make_shared(ExpectedFile->moduleSpec()); SectionList *list = module_sp->GetSectionList(); ASSERT_NE(nullptr, list); @@ -212,10 +209,7 @@ TEST_F(ObjectFileELFTest, GetSymtab_NoSymEntryPointArmThumbAddressClass) { )"); ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded()); - ModuleSpec spec{FileSpec(ExpectedFile->name())}; - spec.GetSymbolFileSpec().SetFile(ExpectedFile->name(), - FileSpec::Style::native); - auto module_sp = std::make_shared(spec); + auto module_sp = std::make_shared(ExpectedFile->moduleSpec()); auto entry_point_addr = module_sp->GetObjectFile()->GetEntryPointAddress(); ASSERT_TRUE(entry_point_addr.GetOffset() & 1); @@ -277,10 +271,7 @@ TEST_F(ObjectFileELFTest, GetSymtab_NoSymEntryPointArmAddressClass) { )"); ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded()); - ModuleSpec spec{FileSpec(ExpectedFile->name())}; - spec.GetSymbolFileSpec().SetFile(ExpectedFile->name(), - FileSpec::Style::native); - auto module_sp = std::make_shared(spec); + auto module_sp = std::make_shared(ExpectedFile->moduleSpec()); auto entry_point_addr = module_sp->GetObjectFile()->GetEntryPointAddress(); ASSERT_EQ(entry_point_addr.GetAddressClass(), AddressClass::eCode); diff --git a/lldb/unittests/ObjectFile/MachO/CMakeLists.txt b/lldb/unittests/ObjectFile/MachO/CMakeLists.txt new file mode 100644 index 0000000000000..b6c4225114a36 --- /dev/null +++ b/lldb/unittests/ObjectFile/MachO/CMakeLists.txt @@ -0,0 +1,10 @@ +add_lldb_unittest(ObjectFileMachOTests + TestObjectFileMachO.cpp + + LINK_LIBS + lldbPluginObjectFileMachO + lldbPluginSymbolFileSymtab + lldbCore + lldbUtilityHelpers + LLVMTestingSupport + ) diff --git a/lldb/unittests/ObjectFile/MachO/TestObjectFileMachO.cpp b/lldb/unittests/ObjectFile/MachO/TestObjectFileMachO.cpp new file mode 100644 index 0000000000000..119be3822ccb9 --- /dev/null +++ b/lldb/unittests/ObjectFile/MachO/TestObjectFileMachO.cpp @@ -0,0 +1,79 @@ +//===-- ObjectFileMachOTest.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Host/HostInfo.h" +#include "Plugins/ObjectFile/Mach-O/ObjectFileMachO.h" +#include "TestingSupport/SubsystemRAII.h" +#include "TestingSupport/TestUtilities.h" +#include "lldb/Core/Module.h" +#include "lldb/Host/FileSystem.h" +#include "lldb/lldb-defines.h" +#include "gtest/gtest.h" + +#ifdef __APPLE__ +#include +#endif + +using namespace lldb_private; +using namespace llvm; + +namespace { +class ObjectFileMachOTest : public ::testing::Test { + SubsystemRAII subsystems; +}; +} // namespace + +#if defined(__APPLE__) +TEST_F(ObjectFileMachOTest, ModuleFromSharedCacheInfo) { + SharedCacheImageInfo image_info = + HostInfo::GetSharedCacheImageInfo("/usr/lib/libobjc.A.dylib"); + EXPECT_TRUE(image_info.uuid); + EXPECT_TRUE(image_info.data_sp); + + ModuleSpec spec(FileSpec(), UUID(), image_info.data_sp); + lldb::ModuleSP module = std::make_shared(spec); + ObjectFile *OF = module->GetObjectFile(); + ASSERT_TRUE(llvm::isa(OF)); + EXPECT_TRUE( + OF->GetArchitecture().IsCompatibleMatch(HostInfo::GetArchitecture())); + Symtab *symtab = OF->GetSymtab(); + ASSERT_NE(symtab, nullptr); + void *libobjc = dlopen("/usr/lib/libobjc.A.dylib", RTLD_LAZY); + ASSERT_NE(libobjc, nullptr); + + // This function checks that if we read something from the + // ObjectFile we get through the shared cache in-mmeory + // buffer, it matches what we get by reading directly the + // memory of the symbol. + auto check_symbol = [&](const char *sym_name) { + std::vector symbol_indices; + symtab->FindAllSymbolsWithNameAndType(ConstString(sym_name), + lldb::eSymbolTypeAny, symbol_indices); + EXPECT_EQ(symbol_indices.size(), 1u); + + Symbol *sym = symtab->SymbolAtIndex(symbol_indices[0]); + ASSERT_NE(sym, nullptr); + Address base = sym->GetAddress(); + size_t size = sym->GetByteSize(); + ASSERT_NE(size, 0u); + uint8_t buffer[size]; + EXPECT_EQ(OF->ReadSectionData(base.GetSection().get(), base.GetOffset(), + buffer, size), + size); + + void *sym_addr = dlsym(libobjc, sym_name); + ASSERT_NE(sym_addr, nullptr); + EXPECT_EQ(memcmp(buffer, sym_addr, size), 0); + }; + + // Read a symbol from the __TEXT segment... + check_symbol("objc_msgSend"); + // ... and one from the __DATA segment + check_symbol("OBJC_CLASS_$_NSObject"); +} +#endif diff --git a/lldb/unittests/ObjectFile/PECOFF/TestPECallFrameInfo.cpp b/lldb/unittests/ObjectFile/PECOFF/TestPECallFrameInfo.cpp index 1c6fdd301bf85..e842df5988867 100644 --- a/lldb/unittests/ObjectFile/PECOFF/TestPECallFrameInfo.cpp +++ b/lldb/unittests/ObjectFile/PECOFF/TestPECallFrameInfo.cpp @@ -192,7 +192,7 @@ symbols: [] )"); ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded()); - ModuleSP module_sp = std::make_shared(ModuleSpec(FileSpec(ExpectedFile->name()))); + ModuleSP module_sp = std::make_shared(ExpectedFile->moduleSpec()); ObjectFile *object_file = module_sp->GetObjectFile(); ASSERT_NE(object_file, nullptr); diff --git a/lldb/unittests/Symbol/TestDWARFCallFrameInfo.cpp b/lldb/unittests/Symbol/TestDWARFCallFrameInfo.cpp index bc2de074806b3..86a6cf0cacb14 100644 --- a/lldb/unittests/Symbol/TestDWARFCallFrameInfo.cpp +++ b/lldb/unittests/Symbol/TestDWARFCallFrameInfo.cpp @@ -220,8 +220,7 @@ void DWARFCallFrameInfoTest::TestBasic(DWARFCallFrameInfo::Type type, )"); ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded()); - auto module_sp = - std::make_shared(ModuleSpec(FileSpec(ExpectedFile->name()))); + auto module_sp = std::make_shared(ExpectedFile->moduleSpec()); SectionList *list = module_sp->GetSectionList(); ASSERT_NE(nullptr, list); diff --git a/lldb/unittests/Symbol/TestLineEntry.cpp b/lldb/unittests/Symbol/TestLineEntry.cpp index 389b338faa421..d32ec9b1e7c6d 100644 --- a/lldb/unittests/Symbol/TestLineEntry.cpp +++ b/lldb/unittests/Symbol/TestLineEntry.cpp @@ -49,7 +49,7 @@ void LineEntryTest::SetUp() { auto ExpectedFile = TestFile::fromYamlFile("inlined-functions.yaml"); ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded()); m_file.emplace(std::move(*ExpectedFile)); - m_module_sp = std::make_shared(ModuleSpec(FileSpec(m_file->name()))); + m_module_sp = std::make_shared(m_file->moduleSpec()); } llvm::Expected LineEntryTest::GetLineEntryForLine(uint32_t line) { diff --git a/lldb/unittests/Target/StackFrameRecognizerTest.cpp b/lldb/unittests/Target/StackFrameRecognizerTest.cpp index 067a56a19902b..bf458b3b14145 100644 --- a/lldb/unittests/Target/StackFrameRecognizerTest.cpp +++ b/lldb/unittests/Target/StackFrameRecognizerTest.cpp @@ -51,18 +51,14 @@ class DummyStackFrameRecognizer : public StackFrameRecognizer { std::string GetName() override { return "Dummy StackFrame Recognizer"; } }; -void RegisterDummyStackFrameRecognizer() { - static llvm::once_flag g_once_flag; +void RegisterDummyStackFrameRecognizer(StackFrameRecognizerManager &manager) { + RegularExpressionSP module_regex_sp = nullptr; + RegularExpressionSP symbol_regex_sp(new RegularExpression("boom")); - llvm::call_once(g_once_flag, []() { - RegularExpressionSP module_regex_sp = nullptr; - RegularExpressionSP symbol_regex_sp(new RegularExpression("boom")); + StackFrameRecognizerSP dummy_recognizer_sp(new DummyStackFrameRecognizer()); - StackFrameRecognizerSP dummy_recognizer_sp(new DummyStackFrameRecognizer()); - - StackFrameRecognizerManager::AddRecognizer( - dummy_recognizer_sp, module_regex_sp, symbol_regex_sp, false); - }); + manager.AddRecognizer(dummy_recognizer_sp, module_regex_sp, symbol_regex_sp, + false); } } // namespace @@ -71,13 +67,15 @@ TEST_F(StackFrameRecognizerTest, NullModuleRegex) { DebuggerSP debugger_sp = Debugger::CreateInstance(); ASSERT_TRUE(debugger_sp); - RegisterDummyStackFrameRecognizer(); + StackFrameRecognizerManager manager; + + RegisterDummyStackFrameRecognizer(manager); bool any_printed = false; - StackFrameRecognizerManager::ForEach( - [&any_printed](uint32_t recognizer_id, std::string name, - std::string function, llvm::ArrayRef symbols, - bool regexp) { any_printed = true; }); + manager.ForEach([&any_printed](uint32_t recognizer_id, std::string name, + std::string function, + llvm::ArrayRef symbols, + bool regexp) { any_printed = true; }); EXPECT_TRUE(any_printed); } diff --git a/lldb/unittests/TestingSupport/TestUtilities.cpp b/lldb/unittests/TestingSupport/TestUtilities.cpp index d40ae9dd99250..34f49e5862a7a 100644 --- a/lldb/unittests/TestingSupport/TestUtilities.cpp +++ b/lldb/unittests/TestingSupport/TestUtilities.cpp @@ -27,23 +27,13 @@ std::string lldb_private::GetInputFilePath(const llvm::Twine &name) { } llvm::Expected TestFile::fromYaml(llvm::StringRef Yaml) { - const auto *Info = testing::UnitTest::GetInstance()->current_test_info(); - assert(Info); - llvm::SmallString<128> Name; - int FD; - if (std::error_code EC = llvm::sys::fs::createTemporaryFile( - llvm::Twine(Info->test_case_name()) + "-" + Info->name(), "test", FD, - Name)) - return llvm::errorCodeToError(EC); - llvm::FileRemover Remover(Name); - { - llvm::raw_fd_ostream OS(FD, /*shouldClose*/ true); - llvm::yaml::Input YIn(Yaml); - if (!llvm::yaml::convertYAML(YIn, OS, [](const llvm::Twine &Msg) {})) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "convertYAML() failed"); - } - return TestFile(Name, std::move(Remover)); + std::string Buffer; + llvm::raw_string_ostream OS(Buffer); + llvm::yaml::Input YIn(Yaml); + if (!llvm::yaml::convertYAML(YIn, OS, [](const llvm::Twine &Msg) {})) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "convertYAML() failed"); + return TestFile(std::move(Buffer)); } llvm::Expected TestFile::fromYamlFile(const llvm::Twine &Name) { @@ -54,12 +44,3 @@ llvm::Expected TestFile::fromYamlFile(const llvm::Twine &Name) { return llvm::errorCodeToError(BufferOrError.getError()); return fromYaml(BufferOrError.get()->getBuffer()); } - -TestFile::~TestFile() { - if (!Name) - return; - if (std::error_code EC = - llvm::sys::fs::remove(*Name, /*IgnoreNonExisting*/ false)) - GTEST_LOG_(WARNING) << "Failed to delete `" << Name->c_str() - << "`: " << EC.message(); -} diff --git a/lldb/unittests/TestingSupport/TestUtilities.h b/lldb/unittests/TestingSupport/TestUtilities.h index 852c87ed3d9df..60a07119e9243 100644 --- a/lldb/unittests/TestingSupport/TestUtilities.h +++ b/lldb/unittests/TestingSupport/TestUtilities.h @@ -9,6 +9,8 @@ #ifndef LLDB_UNITTESTS_TESTINGSUPPORT_TESTUTILITIES_H #define LLDB_UNITTESTS_TESTINGSUPPORT_TESTUTILITIES_H +#include "lldb/Core/ModuleSpec.h" +#include "lldb/Utility/DataBuffer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Error.h" @@ -34,22 +36,24 @@ class TestFile { static llvm::Expected fromYaml(llvm::StringRef Yaml); static llvm::Expected fromYamlFile(const llvm::Twine &Name); - TestFile(TestFile &&RHS) : Name(std::move(RHS.Name)) { - RHS.Name = llvm::None; + ~TestFile() = default; + + ModuleSpec moduleSpec() { + return ModuleSpec(FileSpec(), UUID(), dataBuffer()); } - ~TestFile(); +private: + TestFile(std::string &&Buffer) : Buffer(std::move(Buffer)) {} - llvm::StringRef name() { return *Name; } + void operator=(const TestFile &) = delete; -private: - TestFile(llvm::StringRef Name, llvm::FileRemover &&Remover) - : Name(std::string(Name)) { - Remover.releaseFile(); + lldb::DataBufferSP dataBuffer() { + auto *Data = reinterpret_cast(Buffer.data()); + return std::make_shared(const_cast(Data), + Buffer.size()); } - void operator=(const TestFile &) = delete; - llvm::Optional Name; + std::string Buffer; }; } diff --git a/lldb/unittests/Utility/ScalarTest.cpp b/lldb/unittests/Utility/ScalarTest.cpp index 42a2f2aaebf2f..dd4683145b968 100644 --- a/lldb/unittests/Utility/ScalarTest.cpp +++ b/lldb/unittests/Utility/ScalarTest.cpp @@ -334,6 +334,20 @@ TEST(ScalarTest, SetValueFromCString) { EXPECT_THAT_ERROR( a.SetValueFromCString("-123", lldb::eEncodingUint, 8).ToError(), Failed()); + EXPECT_THAT_ERROR( + a.SetValueFromCString("-2147483648", lldb::eEncodingSint, 4).ToError(), + Succeeded()); + EXPECT_EQ(-2147483648, a); + EXPECT_THAT_ERROR( + a.SetValueFromCString("-2147483649", lldb::eEncodingSint, 4).ToError(), + Failed()); + EXPECT_THAT_ERROR( + a.SetValueFromCString("47.25", lldb::eEncodingIEEE754, 4).ToError(), + Succeeded()); + EXPECT_EQ(47.25f, a); + EXPECT_THAT_ERROR( + a.SetValueFromCString("asdf", lldb::eEncodingIEEE754, 4).ToError(), + Failed()); } TEST(ScalarTest, APIntConstructor) { diff --git a/lldb/utils/lldb-dotest/CMakeLists.txt b/lldb/utils/lldb-dotest/CMakeLists.txt index 0278c370f7fe1..0ef60c1427610 100644 --- a/lldb/utils/lldb-dotest/CMakeLists.txt +++ b/lldb/utils/lldb-dotest/CMakeLists.txt @@ -26,6 +26,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}") + string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ}") # Remaining ones must be paths to the provided LLVM build-tree. if(${config_type} IN_LIST LLVM_CONFIGURATION_TYPES) @@ -37,6 +38,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}") + string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_LIBS_DIR_CONFIGURED "${LLDB_LIBS_DIR}") else() # Single-configuration generator like Ninja. @@ -47,6 +49,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}") + string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ_CONFIGURED}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_LIBS_DIR_CONFIGURED "${LLDB_LIBS_DIR}") endif() @@ -65,6 +68,7 @@ elseif(NOT "${CMAKE_CFG_INTDIR}" STREQUAL ".") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}") + string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_LIBS_DIR_CONFIGURED "${LLDB_LIBS_DIR}") configure_file( @@ -80,6 +84,7 @@ else() set(LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") set(LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") set(LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}") + set(LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ}") set(LLDB_LIBS_DIR_CONFIGURED "${LLDB_LIBS_DIR}") configure_file( diff --git a/lldb/utils/lldb-dotest/lldb-dotest.in b/lldb/utils/lldb-dotest/lldb-dotest.in index 36d5fd38cc6d5..ee0ea6dff748c 100755 --- a/lldb/utils/lldb-dotest/lldb-dotest.in +++ b/lldb/utils/lldb-dotest/lldb-dotest.in @@ -10,6 +10,7 @@ executable = '@LLDB_TEST_EXECUTABLE_CONFIGURED@' compiler = '@LLDB_TEST_COMPILER_CONFIGURED@' dsymutil = '@LLDB_TEST_DSYMUTIL_CONFIGURED@' filecheck = '@LLDB_TEST_FILECHECK_CONFIGURED@' +yaml2obj = '@LLDB_TEST_YAML2OBJ_CONFIGURED@' lldb_libs_dir = "@LLDB_LIBS_DIR_CONFIGURED@" lldb_build_intel_pt = "@LLDB_BUILD_INTEL_PT@" @@ -24,6 +25,7 @@ if __name__ == '__main__': cmd.extend(['--executable', executable]) cmd.extend(['--compiler', compiler]) cmd.extend(['--dsymutil', dsymutil]) + cmd.extend(['--yaml2obj', yaml2obj]) cmd.extend(['--filecheck', filecheck]) cmd.extend(['--lldb-libs-dir', lldb_libs_dir]) if lldb_build_intel_pt == "1": diff --git a/llvm-spirv/include/LLVMSPIRVLib.h b/llvm-spirv/include/LLVMSPIRVLib.h index 3e8a9f0908828..2fe5b3e8720c3 100644 --- a/llvm-spirv/include/LLVMSPIRVLib.h +++ b/llvm-spirv/include/LLVMSPIRVLib.h @@ -215,6 +215,11 @@ ModulePass *createPreprocessMetadata(); /// ostream. ModulePass *createSPIRVWriterPass(std::ostream &Str); +/// Create and return a pass that writes the module to the specified +/// ostream. +ModulePass *createSPIRVWriterPass(std::ostream &Str, + const SPIRV::TranslatorOpts &Opts); + } // namespace llvm #endif // SPIRV_H diff --git a/llvm-spirv/include/LLVMSPIRVOpts.h b/llvm-spirv/include/LLVMSPIRVOpts.h index 62831d0c4b6b7..6abbf15eb9bbe 100644 --- a/llvm-spirv/include/LLVMSPIRVOpts.h +++ b/llvm-spirv/include/LLVMSPIRVOpts.h @@ -137,6 +137,15 @@ class TranslatorOpts { FPContractMode getFPContractMode() const { return FPCMode; } + bool isSPIRVAllowUnknownIntrinsicsEnabled() const noexcept { + return SPIRVAllowUnknownIntrinsics; + } + + void + setSPIRVAllowUnknownIntrinsicsEnabled(bool AllowUnknownIntrinsics) noexcept { + SPIRVAllowUnknownIntrinsics = AllowUnknownIntrinsics; + } + private: // Common translation options VersionNumber MaxVersion = VersionNumber::MaximumVersion; @@ -159,6 +168,10 @@ class TranslatorOpts { // - FPContractMode::Fast allows *all* operations to be contracted // for all entry points FPContractMode FPCMode = FPContractMode::On; + + // Unknown LLVM intrinsics will be translated as external function calls in + // SPIR-V + bool SPIRVAllowUnknownIntrinsics = false; }; } // namespace SPIRV diff --git a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp index 1c60fa8af22a7..bb1f47d9129c6 100644 --- a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp +++ b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp @@ -58,8 +58,7 @@ namespace SPIRV { char OCLTypeToSPIRV::ID = 0; -OCLTypeToSPIRV::OCLTypeToSPIRV() - : ModulePass(ID), M(nullptr), Ctx(nullptr), CLVer(0) { +OCLTypeToSPIRV::OCLTypeToSPIRV() : ModulePass(ID), M(nullptr), Ctx(nullptr) { initializeOCLTypeToSPIRVPass(*PassRegistry::getPassRegistry()); } diff --git a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h index 30025cd2aba1f..00d3104d0e780 100644 --- a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h +++ b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h @@ -71,7 +71,6 @@ class OCLTypeToSPIRV : public ModulePass { private: Module *M; LLVMContext *Ctx; - unsigned CLVer; std::map AdaptedTy; // Adapted types for values std::set WorkSet; // Functions to be adapted diff --git a/llvm-spirv/lib/SPIRV/OCLUtil.cpp b/llvm-spirv/lib/SPIRV/OCLUtil.cpp index eff0cda426198..cb230ce01f528 100644 --- a/llvm-spirv/lib/SPIRV/OCLUtil.cpp +++ b/llvm-spirv/lib/SPIRV/OCLUtil.cpp @@ -96,11 +96,548 @@ namespace OCLUtil { #define SPIRV_IMAGE_ADDR_SPACE SPIRAS_Global #endif +} // namespace OCLUtil + +/////////////////////////////////////////////////////////////////////////////// +// +// Map definitions +// +/////////////////////////////////////////////////////////////////////////////// + +using namespace OCLUtil; +namespace SPIRV { + +template <> void SPIRVMap::init() { + add(OCLMF_Local, MemorySemanticsWorkgroupMemoryMask); + add(OCLMF_Global, MemorySemanticsCrossWorkgroupMemoryMask); + add(OCLMF_Image, MemorySemanticsImageMemoryMask); +} + +template <> +void SPIRVMap::init() { + add(OCLMFEx_Local, MemorySemanticsWorkgroupMemoryMask); + add(OCLMFEx_Global, MemorySemanticsCrossWorkgroupMemoryMask); + add(OCLMFEx_Local_Global, MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsCrossWorkgroupMemoryMask); + add(OCLMFEx_Image, MemorySemanticsImageMemoryMask); + add(OCLMFEx_Image_Local, + MemorySemanticsWorkgroupMemoryMask | MemorySemanticsImageMemoryMask); + add(OCLMFEx_Image_Global, + MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsImageMemoryMask); + add(OCLMFEx_Image_Local_Global, MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsCrossWorkgroupMemoryMask | + MemorySemanticsImageMemoryMask); +} + +template <> +void SPIRVMap::init() { + add(OCLMO_relaxed, MemorySemanticsMaskNone); + add(OCLMO_acquire, MemorySemanticsAcquireMask); + add(OCLMO_release, MemorySemanticsReleaseMask); + add(OCLMO_acq_rel, MemorySemanticsAcquireReleaseMask); + add(OCLMO_seq_cst, MemorySemanticsSequentiallyConsistentMask); +} + +template <> void SPIRVMap::init() { + add(OCLMS_work_item, ScopeInvocation); + add(OCLMS_work_group, ScopeWorkgroup); + add(OCLMS_device, ScopeDevice); + add(OCLMS_all_svm_devices, ScopeCrossDevice); + add(OCLMS_sub_group, ScopeSubgroup); +} + +template <> void SPIRVMap::init() { + add("reduce", GroupOperationReduce); + add("scan_inclusive", GroupOperationInclusiveScan); + add("scan_exclusive", GroupOperationExclusiveScan); + add("ballot_bit_count", GroupOperationReduce); + add("ballot_inclusive_scan", GroupOperationInclusiveScan); + add("ballot_exclusive_scan", GroupOperationExclusiveScan); + add("non_uniform_reduce", GroupOperationReduce); + add("non_uniform_scan_inclusive", GroupOperationInclusiveScan); + add("non_uniform_scan_exclusive", GroupOperationExclusiveScan); + add("non_uniform_reduce_logical", GroupOperationReduce); + add("non_uniform_scan_inclusive_logical", GroupOperationInclusiveScan); + add("non_uniform_scan_exclusive_logical", GroupOperationExclusiveScan); + add("clustered_reduce", GroupOperationClusteredReduce); +} + +template <> void SPIRVMap::init() { + add("rte", FPRoundingModeRTE); + add("rtz", FPRoundingModeRTZ); + add("rtp", FPRoundingModeRTP); + add("rtn", FPRoundingModeRTN); +} + +template <> void SPIRVMap::init() { +#define _SPIRV_OP(x) add(OclExt::x, #x); + _SPIRV_OP(cl_images) + _SPIRV_OP(cl_doubles) + _SPIRV_OP(cl_khr_int64_base_atomics) + _SPIRV_OP(cl_khr_int64_extended_atomics) + _SPIRV_OP(cl_khr_fp16) + _SPIRV_OP(cl_khr_gl_sharing) + _SPIRV_OP(cl_khr_gl_event) + _SPIRV_OP(cl_khr_d3d10_sharing) + _SPIRV_OP(cl_khr_media_sharing) + _SPIRV_OP(cl_khr_d3d11_sharing) + _SPIRV_OP(cl_khr_global_int32_base_atomics) + _SPIRV_OP(cl_khr_global_int32_extended_atomics) + _SPIRV_OP(cl_khr_local_int32_base_atomics) + _SPIRV_OP(cl_khr_local_int32_extended_atomics) + _SPIRV_OP(cl_khr_byte_addressable_store) + _SPIRV_OP(cl_khr_3d_image_writes) + _SPIRV_OP(cl_khr_gl_msaa_sharing) + _SPIRV_OP(cl_khr_depth_images) + _SPIRV_OP(cl_khr_gl_depth_images) + _SPIRV_OP(cl_khr_subgroups) + _SPIRV_OP(cl_khr_mipmap_image) + _SPIRV_OP(cl_khr_mipmap_image_writes) + _SPIRV_OP(cl_khr_egl_event) + _SPIRV_OP(cl_khr_srgb_image_writes) +#undef _SPIRV_OP +} + +template <> void SPIRVMap::init() { + add(OclExt::cl_images, CapabilityImageBasic); + add(OclExt::cl_doubles, CapabilityFloat64); + add(OclExt::cl_khr_int64_base_atomics, CapabilityInt64Atomics); + add(OclExt::cl_khr_int64_extended_atomics, CapabilityInt64Atomics); + add(OclExt::cl_khr_fp16, CapabilityFloat16); + add(OclExt::cl_khr_subgroups, CapabilityGroups); + add(OclExt::cl_khr_mipmap_image, CapabilityImageMipmap); + add(OclExt::cl_khr_mipmap_image_writes, CapabilityImageMipmap); +} + +/// Map OpenCL work functions to SPIR-V builtin variables. +template <> void SPIRVMap::init() { + add("get_work_dim", BuiltInWorkDim); + add("get_global_size", BuiltInGlobalSize); + add("get_global_id", BuiltInGlobalInvocationId); + add("get_global_offset", BuiltInGlobalOffset); + add("get_local_size", BuiltInWorkgroupSize); + add("get_enqueued_local_size", BuiltInEnqueuedWorkgroupSize); + add("get_local_id", BuiltInLocalInvocationId); + add("get_num_groups", BuiltInNumWorkgroups); + add("get_group_id", BuiltInWorkgroupId); + add("get_global_linear_id", BuiltInGlobalLinearId); + add("get_local_linear_id", BuiltInLocalInvocationIndex); + // cl_khr_subgroups + add("get_sub_group_size", BuiltInSubgroupSize); + add("get_max_sub_group_size", BuiltInSubgroupMaxSize); + add("get_num_sub_groups", BuiltInNumSubgroups); + add("get_enqueued_num_sub_groups", BuiltInNumEnqueuedSubgroups); + add("get_sub_group_id", BuiltInSubgroupId); + add("get_sub_group_local_id", BuiltInSubgroupLocalInvocationId); + // cl_khr_subgroup_ballot + add("get_sub_group_eq_mask", BuiltInSubgroupEqMask); + add("get_sub_group_ge_mask", BuiltInSubgroupGeMask); + add("get_sub_group_gt_mask", BuiltInSubgroupGtMask); + add("get_sub_group_le_mask", BuiltInSubgroupLeMask); + add("get_sub_group_lt_mask", BuiltInSubgroupLtMask); +} + +// Maps uniqued OCL builtin function name to SPIR-V op code. +// A uniqued OCL builtin function name may be different from the real +// OCL builtin function name. e.g. instead of atomic_min, atomic_umin +// is used for atomic_min with unsigned integer parameter. +// work_group_ and sub_group_ functions are unified as group_ functions +// except work_group_barrier. +class SPIRVInstruction; +template <> void SPIRVMap::init() { +#define _SPIRV_OP(x, y) add("atom_" #x, OpAtomic##y); + // cl_khr_int64_base_atomics builtins + _SPIRV_OP(add, IAdd) + _SPIRV_OP(sub, ISub) + _SPIRV_OP(xchg, Exchange) + _SPIRV_OP(dec, IDecrement) + _SPIRV_OP(inc, IIncrement) + _SPIRV_OP(cmpxchg, CompareExchange) + // cl_khr_int64_extended_atomics builtins + _SPIRV_OP(min, SMin) + _SPIRV_OP(max, SMax) + _SPIRV_OP(and, And) + _SPIRV_OP(or, Or) + _SPIRV_OP(xor, Xor) +#undef _SPIRV_OP +#define _SPIRV_OP(x, y) add("atomic_" #x, Op##y); + // CL 2.0 atomic builtins + _SPIRV_OP(flag_test_and_set_explicit, AtomicFlagTestAndSet) + _SPIRV_OP(flag_clear_explicit, AtomicFlagClear) + _SPIRV_OP(load_explicit, AtomicLoad) + _SPIRV_OP(store_explicit, AtomicStore) + _SPIRV_OP(exchange_explicit, AtomicExchange) + _SPIRV_OP(compare_exchange_strong_explicit, AtomicCompareExchange) + _SPIRV_OP(compare_exchange_weak_explicit, AtomicCompareExchangeWeak) + _SPIRV_OP(inc, AtomicIIncrement) + _SPIRV_OP(dec, AtomicIDecrement) + _SPIRV_OP(fetch_add_explicit, AtomicIAdd) + _SPIRV_OP(fetch_sub_explicit, AtomicISub) + _SPIRV_OP(fetch_umin_explicit, AtomicUMin) + _SPIRV_OP(fetch_umax_explicit, AtomicUMax) + _SPIRV_OP(fetch_min_explicit, AtomicSMin) + _SPIRV_OP(fetch_max_explicit, AtomicSMax) + _SPIRV_OP(fetch_and_explicit, AtomicAnd) + _SPIRV_OP(fetch_or_explicit, AtomicOr) + _SPIRV_OP(fetch_xor_explicit, AtomicXor) +#undef _SPIRV_OP +#define _SPIRV_OP(x, y) add(#x, Op##y); + _SPIRV_OP(dot, Dot) + _SPIRV_OP(async_work_group_copy, GroupAsyncCopy) + _SPIRV_OP(async_work_group_strided_copy, GroupAsyncCopy) + _SPIRV_OP(wait_group_events, GroupWaitEvents) + _SPIRV_OP(isequal, FOrdEqual) + _SPIRV_OP(isnotequal, FUnordNotEqual) + _SPIRV_OP(isgreater, FOrdGreaterThan) + _SPIRV_OP(isgreaterequal, FOrdGreaterThanEqual) + _SPIRV_OP(isless, FOrdLessThan) + _SPIRV_OP(islessequal, FOrdLessThanEqual) + _SPIRV_OP(islessgreater, LessOrGreater) + _SPIRV_OP(isordered, Ordered) + _SPIRV_OP(isunordered, Unordered) + _SPIRV_OP(isfinite, IsFinite) + _SPIRV_OP(isinf, IsInf) + _SPIRV_OP(isnan, IsNan) + _SPIRV_OP(isnormal, IsNormal) + _SPIRV_OP(signbit, SignBitSet) + _SPIRV_OP(any, Any) + _SPIRV_OP(all, All) + _SPIRV_OP(popcount, BitCount) + _SPIRV_OP(get_fence, GenericPtrMemSemantics) + // CL 2.0 kernel enqueue builtins + _SPIRV_OP(enqueue_marker, EnqueueMarker) + _SPIRV_OP(enqueue_kernel, EnqueueKernel) + _SPIRV_OP(get_kernel_sub_group_count_for_ndrange_impl, + GetKernelNDrangeSubGroupCount) + _SPIRV_OP(get_kernel_max_sub_group_size_for_ndrange_impl, + GetKernelNDrangeMaxSubGroupSize) + _SPIRV_OP(get_kernel_work_group_size_impl, GetKernelWorkGroupSize) + _SPIRV_OP(get_kernel_preferred_work_group_size_multiple_impl, + GetKernelPreferredWorkGroupSizeMultiple) + _SPIRV_OP(retain_event, RetainEvent) + _SPIRV_OP(release_event, ReleaseEvent) + _SPIRV_OP(create_user_event, CreateUserEvent) + _SPIRV_OP(is_valid_event, IsValidEvent) + _SPIRV_OP(set_user_event_status, SetUserEventStatus) + _SPIRV_OP(capture_event_profiling_info, CaptureEventProfilingInfo) + _SPIRV_OP(get_default_queue, GetDefaultQueue) + _SPIRV_OP(ndrange_1D, BuildNDRange) + _SPIRV_OP(ndrange_2D, BuildNDRange) + _SPIRV_OP(ndrange_3D, BuildNDRange) + // Generic Address Space Casts + _SPIRV_OP(to_global, GenericCastToPtrExplicit) + _SPIRV_OP(to_local, GenericCastToPtrExplicit) + _SPIRV_OP(to_private, GenericCastToPtrExplicit) + // CL 2.0 pipe builtins + _SPIRV_OP(read_pipe_2, ReadPipe) + _SPIRV_OP(write_pipe_2, WritePipe) + _SPIRV_OP(read_pipe_2_bl, ReadPipeBlockingINTEL) + _SPIRV_OP(write_pipe_2_bl, WritePipeBlockingINTEL) + _SPIRV_OP(read_pipe_4, ReservedReadPipe) + _SPIRV_OP(write_pipe_4, ReservedWritePipe) + _SPIRV_OP(reserve_read_pipe, ReserveReadPipePackets) + _SPIRV_OP(reserve_write_pipe, ReserveWritePipePackets) + _SPIRV_OP(commit_read_pipe, CommitReadPipe) + _SPIRV_OP(commit_write_pipe, CommitWritePipe) + _SPIRV_OP(is_valid_reserve_id, IsValidReserveId) + _SPIRV_OP(group_reserve_read_pipe, GroupReserveReadPipePackets) + _SPIRV_OP(group_reserve_write_pipe, GroupReserveWritePipePackets) + _SPIRV_OP(group_commit_read_pipe, GroupCommitReadPipe) + _SPIRV_OP(group_commit_write_pipe, GroupCommitWritePipe) + _SPIRV_OP(get_pipe_num_packets_ro, GetNumPipePackets) + _SPIRV_OP(get_pipe_num_packets_wo, GetNumPipePackets) + _SPIRV_OP(get_pipe_max_packets_ro, GetMaxPipePackets) + _SPIRV_OP(get_pipe_max_packets_wo, GetMaxPipePackets) + // CL 2.0 workgroup builtins + _SPIRV_OP(group_all, GroupAll) + _SPIRV_OP(group_any, GroupAny) + _SPIRV_OP(group_broadcast, GroupBroadcast) + _SPIRV_OP(group_iadd, GroupIAdd) + _SPIRV_OP(group_fadd, GroupFAdd) + _SPIRV_OP(group_fmin, GroupFMin) + _SPIRV_OP(group_umin, GroupUMin) + _SPIRV_OP(group_smin, GroupSMin) + _SPIRV_OP(group_fmax, GroupFMax) + _SPIRV_OP(group_umax, GroupUMax) + _SPIRV_OP(group_smax, GroupSMax) + // CL image builtins + _SPIRV_OP(SampledImage, SampledImage) + _SPIRV_OP(ImageSampleExplicitLod, ImageSampleExplicitLod) + _SPIRV_OP(read_image, ImageRead) + _SPIRV_OP(write_image, ImageWrite) + _SPIRV_OP(get_image_channel_data_type, ImageQueryFormat) + _SPIRV_OP(get_image_channel_order, ImageQueryOrder) + _SPIRV_OP(get_image_num_mip_levels, ImageQueryLevels) + _SPIRV_OP(get_image_num_samples, ImageQuerySamples) + // Intel Subgroups builtins + _SPIRV_OP(intel_sub_group_shuffle, SubgroupShuffleINTEL) + _SPIRV_OP(intel_sub_group_shuffle_down, SubgroupShuffleDownINTEL) + _SPIRV_OP(intel_sub_group_shuffle_up, SubgroupShuffleUpINTEL) + _SPIRV_OP(intel_sub_group_shuffle_xor, SubgroupShuffleXorINTEL) + // Intel media_block_io builtins + _SPIRV_OP(intel_sub_group_media_block_read, SubgroupImageMediaBlockReadINTEL) + _SPIRV_OP(intel_sub_group_media_block_write, + SubgroupImageMediaBlockWriteINTEL) + // cl_khr_subgroup_non_uniform_vote + _SPIRV_OP(group_elect, GroupNonUniformElect) + _SPIRV_OP(group_non_uniform_all, GroupNonUniformAll) + _SPIRV_OP(group_non_uniform_any, GroupNonUniformAny) + _SPIRV_OP(group_non_uniform_all_equal, GroupNonUniformAllEqual) + // cl_khr_subgroup_ballot + _SPIRV_OP(group_non_uniform_broadcast, GroupNonUniformBroadcast) + _SPIRV_OP(group_broadcast_first, GroupNonUniformBroadcastFirst) + _SPIRV_OP(group_ballot, GroupNonUniformBallot) + _SPIRV_OP(group_inverse_ballot, GroupNonUniformInverseBallot) + _SPIRV_OP(group_ballot_bit_extract, GroupNonUniformBallotBitExtract) + _SPIRV_OP(group_ballot_bit_count_iadd, GroupNonUniformBallotBitCount) + _SPIRV_OP(group_ballot_find_lsb, GroupNonUniformBallotFindLSB) + _SPIRV_OP(group_ballot_find_msb, GroupNonUniformBallotFindMSB) + // cl_khr_subgroup_non_uniform_arithmetic + _SPIRV_OP(group_non_uniform_iadd, GroupNonUniformIAdd) + _SPIRV_OP(group_non_uniform_fadd, GroupNonUniformFAdd) + _SPIRV_OP(group_non_uniform_imul, GroupNonUniformIMul) + _SPIRV_OP(group_non_uniform_fmul, GroupNonUniformFMul) + _SPIRV_OP(group_non_uniform_smin, GroupNonUniformSMin) + _SPIRV_OP(group_non_uniform_umin, GroupNonUniformUMin) + _SPIRV_OP(group_non_uniform_fmin, GroupNonUniformFMin) + _SPIRV_OP(group_non_uniform_smax, GroupNonUniformSMax) + _SPIRV_OP(group_non_uniform_umax, GroupNonUniformUMax) + _SPIRV_OP(group_non_uniform_fmax, GroupNonUniformFMax) + _SPIRV_OP(group_non_uniform_iand, GroupNonUniformBitwiseAnd) + _SPIRV_OP(group_non_uniform_ior, GroupNonUniformBitwiseOr) + _SPIRV_OP(group_non_uniform_ixor, GroupNonUniformBitwiseXor) + _SPIRV_OP(group_non_uniform_logical_iand, GroupNonUniformLogicalAnd) + _SPIRV_OP(group_non_uniform_logical_ior, GroupNonUniformLogicalOr) + _SPIRV_OP(group_non_uniform_logical_ixor, GroupNonUniformLogicalXor) + // cl_khr_subgroup_shuffle + _SPIRV_OP(group_shuffle, GroupNonUniformShuffle) + _SPIRV_OP(group_shuffle_xor, GroupNonUniformShuffleXor) + // cl_khr_subgroup_shuffle_relative + _SPIRV_OP(group_shuffle_up, GroupNonUniformShuffleUp) + _SPIRV_OP(group_shuffle_down, GroupNonUniformShuffleDown) +#undef _SPIRV_OP +} + +template <> void SPIRVMap::init() { +#define _SPIRV_OP(x, y) add(#x, Op##y); + _SPIRV_OP(add, AtomicIAdd) + _SPIRV_OP(sub, AtomicISub) + _SPIRV_OP(xchg, AtomicExchange) + _SPIRV_OP(cmpxchg, AtomicCompareExchange) + _SPIRV_OP(inc, AtomicIIncrement) + _SPIRV_OP(dec, AtomicIDecrement) + _SPIRV_OP(min, AtomicSMin) + _SPIRV_OP(max, AtomicSMax) + _SPIRV_OP(umin, AtomicUMin) + _SPIRV_OP(umax, AtomicUMax) + _SPIRV_OP(and, AtomicAnd) + _SPIRV_OP(or, AtomicOr) + _SPIRV_OP(xor, AtomicXor) +#undef _SPIRV_OP +} + +// SPV_INTEL_device_side_avc_motion_estimation extension builtins +class SPIRVSubgroupsAVCIntelInst; +template <> void SPIRVMap::init() { + // Here is a workaround for a bug in the specification: + // 'avc' missed in 'intel_sub_group_avc' prefix. + add("intel_sub_group_ime_ref_window_size", + OpSubgroupAvcImeRefWindowSizeINTEL); + +#define _SPIRV_OP(x, y) add("intel_sub_group_avc_" #x, OpSubgroupAvc##y##INTEL); + // Initialization phase functions + _SPIRV_OP(ime_initialize, ImeInitialize) + _SPIRV_OP(fme_initialize, FmeInitialize) + _SPIRV_OP(bme_initialize, BmeInitialize) + _SPIRV_OP(sic_initialize, SicInitialize) + + // Result and payload types conversion functions + _SPIRV_OP(mce_convert_to_ime_payload, MceConvertToImePayload) + _SPIRV_OP(mce_convert_to_ime_result, MceConvertToImeResult) + _SPIRV_OP(mce_convert_to_ref_payload, MceConvertToRefPayload) + _SPIRV_OP(mce_convert_to_ref_result, MceConvertToRefResult) + _SPIRV_OP(mce_convert_to_sic_payload, MceConvertToSicPayload) + _SPIRV_OP(mce_convert_to_sic_result, MceConvertToSicResult) + _SPIRV_OP(ime_convert_to_mce_payload, ImeConvertToMcePayload) + _SPIRV_OP(ime_convert_to_mce_result, ImeConvertToMceResult) + _SPIRV_OP(ref_convert_to_mce_payload, RefConvertToMcePayload) + _SPIRV_OP(ref_convert_to_mce_result, RefConvertToMceResult) + _SPIRV_OP(sic_convert_to_mce_payload, SicConvertToMcePayload) + _SPIRV_OP(sic_convert_to_mce_result, SicConvertToMceResult) +#undef _SPIRV_OP + +// MCE instructions +#define _SPIRV_OP(x, y) \ + add("intel_sub_group_avc_mce_" #x, OpSubgroupAvcMce##y##INTEL); + _SPIRV_OP(get_default_inter_base_multi_reference_penalty, + GetDefaultInterBaseMultiReferencePenalty) + _SPIRV_OP(set_inter_base_multi_reference_penalty, + SetInterBaseMultiReferencePenalty) + _SPIRV_OP(get_default_inter_shape_penalty, GetDefaultInterShapePenalty) + _SPIRV_OP(set_inter_shape_penalty, SetInterShapePenalty) + _SPIRV_OP(get_default_inter_direction_penalty, + GetDefaultInterDirectionPenalty) + _SPIRV_OP(set_inter_direction_penalty, SetInterDirectionPenalty) + _SPIRV_OP(get_default_intra_luma_shape_penalty, + GetDefaultIntraLumaShapePenalty) + _SPIRV_OP(get_default_inter_motion_vector_cost_table, + GetDefaultInterMotionVectorCostTable) + _SPIRV_OP(get_default_high_penalty_cost_table, GetDefaultHighPenaltyCostTable) + _SPIRV_OP(get_default_medium_penalty_cost_table, + GetDefaultMediumPenaltyCostTable) + _SPIRV_OP(get_default_low_penalty_cost_table, GetDefaultLowPenaltyCostTable) + _SPIRV_OP(set_motion_vector_cost_function, SetMotionVectorCostFunction) + _SPIRV_OP(get_default_intra_luma_mode_penalty, GetDefaultIntraLumaModePenalty) + _SPIRV_OP(get_default_non_dc_luma_intra_penalty, + GetDefaultNonDcLumaIntraPenalty) + _SPIRV_OP(get_default_intra_chroma_mode_base_penalty, + GetDefaultIntraChromaModeBasePenalty) + _SPIRV_OP(set_ac_only_haar, SetAcOnlyHaar) + _SPIRV_OP(set_source_interlaced_field_polarity, + SetSourceInterlacedFieldPolarity) + _SPIRV_OP(set_single_reference_interlaced_field_polarity, + SetSingleReferenceInterlacedFieldPolarity) + _SPIRV_OP(set_dual_reference_interlaced_field_polarities, + SetDualReferenceInterlacedFieldPolarities) + _SPIRV_OP(get_motion_vectors, GetMotionVectors) + _SPIRV_OP(get_inter_distortions, GetInterDistortions) + _SPIRV_OP(get_best_inter_distortion, GetBestInterDistortions) + _SPIRV_OP(get_inter_major_shape, GetInterMajorShape) + _SPIRV_OP(get_inter_minor_shapes, GetInterMinorShape) + _SPIRV_OP(get_inter_directions, GetInterDirections) + _SPIRV_OP(get_inter_motion_vector_count, GetInterMotionVectorCount) + _SPIRV_OP(get_inter_reference_ids, GetInterReferenceIds) + _SPIRV_OP(get_inter_reference_interlaced_field_polarities, + GetInterReferenceInterlacedFieldPolarities) +#undef _SPIRV_OP + +// IME instructions +#define _SPIRV_OP(x, y) \ + add("intel_sub_group_avc_ime_" #x, OpSubgroupAvcIme##y##INTEL); + _SPIRV_OP(set_single_reference, SetSingleReference) + _SPIRV_OP(set_dual_reference, SetDualReference) + _SPIRV_OP(ref_window_size, RefWindowSize) + _SPIRV_OP(adjust_ref_offset, AdjustRefOffset) + _SPIRV_OP(set_max_motion_vector_count, SetMaxMotionVectorCount) + _SPIRV_OP(set_unidirectional_mix_disable, SetUnidirectionalMixDisable) + _SPIRV_OP(set_early_search_termination_threshold, + SetEarlySearchTerminationThreshold) + _SPIRV_OP(set_weighted_sad, SetWeightedSad) + _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference) + _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference) + _SPIRV_OP(evaluate_with_single_reference_streamin, + EvaluateWithSingleReferenceStreamin) + _SPIRV_OP(evaluate_with_dual_reference_streamin, + EvaluateWithDualReferenceStreamin) + _SPIRV_OP(evaluate_with_single_reference_streamout, + EvaluateWithSingleReferenceStreamout) + _SPIRV_OP(evaluate_with_dual_reference_streamout, + EvaluateWithDualReferenceStreamout) + _SPIRV_OP(evaluate_with_single_reference_streaminout, + EvaluateWithSingleReferenceStreaminout) + _SPIRV_OP(evaluate_with_dual_reference_streaminout, + EvaluateWithDualReferenceStreaminout) + _SPIRV_OP(get_single_reference_streamin, GetSingleReferenceStreamin) + _SPIRV_OP(get_dual_reference_streamin, GetDualReferenceStreamin) + _SPIRV_OP(strip_single_reference_streamout, StripSingleReferenceStreamout) + _SPIRV_OP(strip_dual_reference_streamout, StripDualReferenceStreamout) + _SPIRV_OP(get_border_reached, GetBorderReached) + _SPIRV_OP(get_truncated_search_indication, GetTruncatedSearchIndication) + _SPIRV_OP(get_unidirectional_early_search_termination, + GetUnidirectionalEarlySearchTermination) + _SPIRV_OP(get_weighting_pattern_minimum_motion_vector, + GetWeightingPatternMinimumMotionVector) + _SPIRV_OP(get_weighting_pattern_minimum_distortion, + GetWeightingPatternMinimumDistortion) +#undef _SPIRV_OP + +#define _SPIRV_OP(x, y) \ + add("intel_sub_group_avc_ime_get_streamout_major_shape_" #x, \ + OpSubgroupAvcImeGetStreamout##y##INTEL); + _SPIRV_OP(motion_vectors_single_reference, + SingleReferenceMajorShapeMotionVectors) + _SPIRV_OP(distortions_single_reference, SingleReferenceMajorShapeDistortions) + _SPIRV_OP(reference_ids_single_reference, + SingleReferenceMajorShapeReferenceIds) + _SPIRV_OP(motion_vectors_dual_reference, DualReferenceMajorShapeMotionVectors) + _SPIRV_OP(distortions_dual_reference, DualReferenceMajorShapeDistortions) + _SPIRV_OP(reference_ids_dual_reference, DualReferenceMajorShapeReferenceIds) +#undef _SPIRV_OP + +// REF instructions +#define _SPIRV_OP(x, y) \ + add("intel_sub_group_avc_ref_" #x, OpSubgroupAvcRef##y##INTEL); + _SPIRV_OP(set_bidirectional_mix_disable, SetBidirectionalMixDisable) + _SPIRV_OP(set_bilinear_filter_enable, SetBilinearFilterEnable) + _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference) + _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference) + _SPIRV_OP(evaluate_with_multi_reference, EvaluateWithMultiReference) + _SPIRV_OP(evaluate_with_multi_reference_interlaced, + EvaluateWithMultiReferenceInterlaced) +#undef _SPIRV_OP + +// SIC instructions +#define _SPIRV_OP(x, y) \ + add("intel_sub_group_avc_sic_" #x, OpSubgroupAvcSic##y##INTEL); + _SPIRV_OP(configure_skc, ConfigureSkc) + _SPIRV_OP(configure_ipe_luma, ConfigureIpeLuma) + _SPIRV_OP(configure_ipe_luma_chroma, ConfigureIpeLumaChroma) + _SPIRV_OP(get_motion_vector_mask, GetMotionVectorMask) + _SPIRV_OP(set_intra_luma_shape_penalty, SetIntraLumaShapePenalty) + _SPIRV_OP(set_intra_luma_mode_cost_function, SetIntraLumaModeCostFunction) + _SPIRV_OP(set_intra_chroma_mode_cost_function, SetIntraChromaModeCostFunction) + _SPIRV_OP(set_skc_bilinear_filter_enable, SetBilinearFilterEnable) + _SPIRV_OP(set_skc_forward_transform_enable, SetSkcForwardTransformEnable) + _SPIRV_OP(set_block_based_raw_skip_sad, SetBlockBasedRawSkipSad) + _SPIRV_OP(evaluate_ipe, EvaluateIpe) + _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference) + _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference) + _SPIRV_OP(evaluate_with_multi_reference, EvaluateWithMultiReference) + _SPIRV_OP(evaluate_with_multi_reference_interlaced, + EvaluateWithMultiReferenceInterlaced) + _SPIRV_OP(get_ipe_luma_shape, GetIpeLumaShape) + _SPIRV_OP(get_best_ipe_luma_distortion, GetBestIpeLumaDistortion) + _SPIRV_OP(get_best_ipe_chroma_distortion, GetBestIpeChromaDistortion) + _SPIRV_OP(get_packed_ipe_luma_modes, GetPackedIpeLumaModes) + _SPIRV_OP(get_ipe_chroma_mode, GetIpeChromaMode) + _SPIRV_OP(get_packed_skc_luma_count_threshold, GetPackedSkcLumaCountThreshold) + _SPIRV_OP(get_packed_skc_luma_sum_threshold, GetPackedSkcLumaSumThreshold) + _SPIRV_OP(get_inter_raw_sads, GetInterRawSads) +#undef _SPIRV_OP +} + +template <> void SPIRVMap::init() { + add("opencl.event_t", OpTypeEvent); + add("opencl.pipe_t", OpTypePipe); + add("opencl.clk_event_t", OpTypeDeviceEvent); + add("opencl.reserve_id_t", OpTypeReserveId); + add("opencl.queue_t", OpTypeQueue); + add("opencl.sampler_t", OpTypeSampler); +} + +template <> void LLVMSPIRVAtomicRmwOpCodeMap::init() { + add(llvm::AtomicRMWInst::Xchg, OpAtomicExchange); + add(llvm::AtomicRMWInst::Add, OpAtomicIAdd); + add(llvm::AtomicRMWInst::Sub, OpAtomicISub); + add(llvm::AtomicRMWInst::And, OpAtomicAnd); + add(llvm::AtomicRMWInst::Or, OpAtomicOr); + add(llvm::AtomicRMWInst::Xor, OpAtomicXor); + add(llvm::AtomicRMWInst::Max, OpAtomicSMax); + add(llvm::AtomicRMWInst::Min, OpAtomicSMin); + add(llvm::AtomicRMWInst::UMax, OpAtomicUMax); + add(llvm::AtomicRMWInst::UMin, OpAtomicUMin); +} + +} // namespace SPIRV + /////////////////////////////////////////////////////////////////////////////// // // Functions for getting builtin call info // /////////////////////////////////////////////////////////////////////////////// + +namespace OCLUtil { + AtomicWorkItemFenceLiterals getAtomicWorkItemFenceLiterals(CallInst *CI) { return std::make_tuple(getArgAsInt(CI, 0), static_cast(getArgAsInt(CI, 1)), diff --git a/llvm-spirv/lib/SPIRV/OCLUtil.h b/llvm-spirv/lib/SPIRV/OCLUtil.h index d91316e0a310f..3d4ae9b9ae2e5 100644 --- a/llvm-spirv/lib/SPIRV/OCLUtil.h +++ b/llvm-spirv/lib/SPIRV/OCLUtil.h @@ -444,52 +444,8 @@ std::string getIntelSubgroupBlockDataPostfix(unsigned ElementBitSize, unsigned VectorNumElements); } // namespace OCLUtil -/////////////////////////////////////////////////////////////////////////////// -// -// Map definitions -// -/////////////////////////////////////////////////////////////////////////////// - using namespace OCLUtil; namespace SPIRV { -template <> inline void SPIRVMap::init() { - add(OCLMF_Local, MemorySemanticsWorkgroupMemoryMask); - add(OCLMF_Global, MemorySemanticsCrossWorkgroupMemoryMask); - add(OCLMF_Image, MemorySemanticsImageMemoryMask); -} - -template <> -inline void SPIRVMap::init() { - add(OCLMFEx_Local, MemorySemanticsWorkgroupMemoryMask); - add(OCLMFEx_Global, MemorySemanticsCrossWorkgroupMemoryMask); - add(OCLMFEx_Local_Global, MemorySemanticsWorkgroupMemoryMask | - MemorySemanticsCrossWorkgroupMemoryMask); - add(OCLMFEx_Image, MemorySemanticsImageMemoryMask); - add(OCLMFEx_Image_Local, - MemorySemanticsWorkgroupMemoryMask | MemorySemanticsImageMemoryMask); - add(OCLMFEx_Image_Global, - MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsImageMemoryMask); - add(OCLMFEx_Image_Local_Global, MemorySemanticsWorkgroupMemoryMask | - MemorySemanticsCrossWorkgroupMemoryMask | - MemorySemanticsImageMemoryMask); -} - -template <> -inline void SPIRVMap::init() { - add(OCLMO_relaxed, MemorySemanticsMaskNone); - add(OCLMO_acquire, MemorySemanticsAcquireMask); - add(OCLMO_release, MemorySemanticsReleaseMask); - add(OCLMO_acq_rel, MemorySemanticsAcquireReleaseMask); - add(OCLMO_seq_cst, MemorySemanticsSequentiallyConsistentMask); -} - -template <> inline void SPIRVMap::init() { - add(OCLMS_work_item, ScopeInvocation); - add(OCLMS_work_group, ScopeWorkgroup); - add(OCLMS_device, ScopeDevice); - add(OCLMS_all_svm_devices, ScopeCrossDevice); - add(OCLMS_sub_group, ScopeSubgroup); -} template Instruction * @@ -623,492 +579,11 @@ Value *transSPIRVMemorySemanticsIntoOCLMemoryOrder(Value *MemorySemantics, Value *transSPIRVMemorySemanticsIntoOCLMemFenceFlags(Value *MemorySemantics, Instruction *InsertBefore); -template <> inline void SPIRVMap::init() { - add("reduce", GroupOperationReduce); - add("scan_inclusive", GroupOperationInclusiveScan); - add("scan_exclusive", GroupOperationExclusiveScan); - add("ballot_bit_count", GroupOperationReduce); - add("ballot_inclusive_scan", GroupOperationInclusiveScan); - add("ballot_exclusive_scan", GroupOperationExclusiveScan); - add("non_uniform_reduce", GroupOperationReduce); - add("non_uniform_scan_inclusive", GroupOperationInclusiveScan); - add("non_uniform_scan_exclusive", GroupOperationExclusiveScan); - add("non_uniform_reduce_logical", GroupOperationReduce); - add("non_uniform_scan_inclusive_logical", GroupOperationInclusiveScan); - add("non_uniform_scan_exclusive_logical", GroupOperationExclusiveScan); - add("clustered_reduce", GroupOperationClusteredReduce); -} - -template <> inline void SPIRVMap::init() { - add("rte", FPRoundingModeRTE); - add("rtz", FPRoundingModeRTZ); - add("rtp", FPRoundingModeRTP); - add("rtn", FPRoundingModeRTN); -} - -template <> inline void SPIRVMap::init() { -#define _SPIRV_OP(x) add(OclExt::x, #x); - _SPIRV_OP(cl_images) - _SPIRV_OP(cl_doubles) - _SPIRV_OP(cl_khr_int64_base_atomics) - _SPIRV_OP(cl_khr_int64_extended_atomics) - _SPIRV_OP(cl_khr_fp16) - _SPIRV_OP(cl_khr_gl_sharing) - _SPIRV_OP(cl_khr_gl_event) - _SPIRV_OP(cl_khr_d3d10_sharing) - _SPIRV_OP(cl_khr_media_sharing) - _SPIRV_OP(cl_khr_d3d11_sharing) - _SPIRV_OP(cl_khr_global_int32_base_atomics) - _SPIRV_OP(cl_khr_global_int32_extended_atomics) - _SPIRV_OP(cl_khr_local_int32_base_atomics) - _SPIRV_OP(cl_khr_local_int32_extended_atomics) - _SPIRV_OP(cl_khr_byte_addressable_store) - _SPIRV_OP(cl_khr_3d_image_writes) - _SPIRV_OP(cl_khr_gl_msaa_sharing) - _SPIRV_OP(cl_khr_depth_images) - _SPIRV_OP(cl_khr_gl_depth_images) - _SPIRV_OP(cl_khr_subgroups) - _SPIRV_OP(cl_khr_mipmap_image) - _SPIRV_OP(cl_khr_mipmap_image_writes) - _SPIRV_OP(cl_khr_egl_event) - _SPIRV_OP(cl_khr_srgb_image_writes) -#undef _SPIRV_OP -} - -template <> inline void SPIRVMap::init() { - add(OclExt::cl_images, CapabilityImageBasic); - add(OclExt::cl_doubles, CapabilityFloat64); - add(OclExt::cl_khr_int64_base_atomics, CapabilityInt64Atomics); - add(OclExt::cl_khr_int64_extended_atomics, CapabilityInt64Atomics); - add(OclExt::cl_khr_fp16, CapabilityFloat16); - add(OclExt::cl_khr_subgroups, CapabilityGroups); - add(OclExt::cl_khr_mipmap_image, CapabilityImageMipmap); - add(OclExt::cl_khr_mipmap_image_writes, CapabilityImageMipmap); -} - -/// Map OpenCL work functions to SPIR-V builtin variables. -template <> -inline void SPIRVMap::init() { - add("get_work_dim", BuiltInWorkDim); - add("get_global_size", BuiltInGlobalSize); - add("get_global_id", BuiltInGlobalInvocationId); - add("get_global_offset", BuiltInGlobalOffset); - add("get_local_size", BuiltInWorkgroupSize); - add("get_enqueued_local_size", BuiltInEnqueuedWorkgroupSize); - add("get_local_id", BuiltInLocalInvocationId); - add("get_num_groups", BuiltInNumWorkgroups); - add("get_group_id", BuiltInWorkgroupId); - add("get_global_linear_id", BuiltInGlobalLinearId); - add("get_local_linear_id", BuiltInLocalInvocationIndex); - // cl_khr_subgroups - add("get_sub_group_size", BuiltInSubgroupSize); - add("get_max_sub_group_size", BuiltInSubgroupMaxSize); - add("get_num_sub_groups", BuiltInNumSubgroups); - add("get_enqueued_num_sub_groups", BuiltInNumEnqueuedSubgroups); - add("get_sub_group_id", BuiltInSubgroupId); - add("get_sub_group_local_id", BuiltInSubgroupLocalInvocationId); - // cl_khr_subgroup_ballot - add("get_sub_group_eq_mask", BuiltInSubgroupEqMask); - add("get_sub_group_ge_mask", BuiltInSubgroupGeMask); - add("get_sub_group_gt_mask", BuiltInSubgroupGtMask); - add("get_sub_group_le_mask", BuiltInSubgroupLeMask); - add("get_sub_group_lt_mask", BuiltInSubgroupLtMask); -} - -// Maps uniqued OCL builtin function name to SPIR-V op code. -// A uniqued OCL builtin function name may be different from the real -// OCL builtin function name. e.g. instead of atomic_min, atomic_umin -// is used for atomic_min with unsigned integer parameter. -// work_group_ and sub_group_ functions are unified as group_ functions -// except work_group_barrier. -class SPIRVInstruction; -template <> inline void SPIRVMap::init() { -#define _SPIRV_OP(x, y) add("atom_" #x, OpAtomic##y); - // cl_khr_int64_base_atomics builtins - _SPIRV_OP(add, IAdd) - _SPIRV_OP(sub, ISub) - _SPIRV_OP(xchg, Exchange) - _SPIRV_OP(dec, IDecrement) - _SPIRV_OP(inc, IIncrement) - _SPIRV_OP(cmpxchg, CompareExchange) - // cl_khr_int64_extended_atomics builtins - _SPIRV_OP(min, SMin) - _SPIRV_OP(max, SMax) - _SPIRV_OP(and, And) - _SPIRV_OP(or, Or) - _SPIRV_OP(xor, Xor) -#undef _SPIRV_OP -#define _SPIRV_OP(x, y) add("atomic_" #x, Op##y); - // CL 2.0 atomic builtins - _SPIRV_OP(flag_test_and_set_explicit, AtomicFlagTestAndSet) - _SPIRV_OP(flag_clear_explicit, AtomicFlagClear) - _SPIRV_OP(load_explicit, AtomicLoad) - _SPIRV_OP(store_explicit, AtomicStore) - _SPIRV_OP(exchange_explicit, AtomicExchange) - _SPIRV_OP(compare_exchange_strong_explicit, AtomicCompareExchange) - _SPIRV_OP(compare_exchange_weak_explicit, AtomicCompareExchangeWeak) - _SPIRV_OP(inc, AtomicIIncrement) - _SPIRV_OP(dec, AtomicIDecrement) - _SPIRV_OP(fetch_add_explicit, AtomicIAdd) - _SPIRV_OP(fetch_sub_explicit, AtomicISub) - _SPIRV_OP(fetch_umin_explicit, AtomicUMin) - _SPIRV_OP(fetch_umax_explicit, AtomicUMax) - _SPIRV_OP(fetch_min_explicit, AtomicSMin) - _SPIRV_OP(fetch_max_explicit, AtomicSMax) - _SPIRV_OP(fetch_and_explicit, AtomicAnd) - _SPIRV_OP(fetch_or_explicit, AtomicOr) - _SPIRV_OP(fetch_xor_explicit, AtomicXor) -#undef _SPIRV_OP -#define _SPIRV_OP(x, y) add(#x, Op##y); - _SPIRV_OP(dot, Dot) - _SPIRV_OP(async_work_group_copy, GroupAsyncCopy) - _SPIRV_OP(async_work_group_strided_copy, GroupAsyncCopy) - _SPIRV_OP(wait_group_events, GroupWaitEvents) - _SPIRV_OP(isequal, FOrdEqual) - _SPIRV_OP(isnotequal, FUnordNotEqual) - _SPIRV_OP(isgreater, FOrdGreaterThan) - _SPIRV_OP(isgreaterequal, FOrdGreaterThanEqual) - _SPIRV_OP(isless, FOrdLessThan) - _SPIRV_OP(islessequal, FOrdLessThanEqual) - _SPIRV_OP(islessgreater, LessOrGreater) - _SPIRV_OP(isordered, Ordered) - _SPIRV_OP(isunordered, Unordered) - _SPIRV_OP(isfinite, IsFinite) - _SPIRV_OP(isinf, IsInf) - _SPIRV_OP(isnan, IsNan) - _SPIRV_OP(isnormal, IsNormal) - _SPIRV_OP(signbit, SignBitSet) - _SPIRV_OP(any, Any) - _SPIRV_OP(all, All) - _SPIRV_OP(popcount, BitCount) - _SPIRV_OP(get_fence, GenericPtrMemSemantics) - // CL 2.0 kernel enqueue builtins - _SPIRV_OP(enqueue_marker, EnqueueMarker) - _SPIRV_OP(enqueue_kernel, EnqueueKernel) - _SPIRV_OP(get_kernel_sub_group_count_for_ndrange_impl, - GetKernelNDrangeSubGroupCount) - _SPIRV_OP(get_kernel_max_sub_group_size_for_ndrange_impl, - GetKernelNDrangeMaxSubGroupSize) - _SPIRV_OP(get_kernel_work_group_size_impl, GetKernelWorkGroupSize) - _SPIRV_OP(get_kernel_preferred_work_group_size_multiple_impl, - GetKernelPreferredWorkGroupSizeMultiple) - _SPIRV_OP(retain_event, RetainEvent) - _SPIRV_OP(release_event, ReleaseEvent) - _SPIRV_OP(create_user_event, CreateUserEvent) - _SPIRV_OP(is_valid_event, IsValidEvent) - _SPIRV_OP(set_user_event_status, SetUserEventStatus) - _SPIRV_OP(capture_event_profiling_info, CaptureEventProfilingInfo) - _SPIRV_OP(get_default_queue, GetDefaultQueue) - _SPIRV_OP(ndrange_1D, BuildNDRange) - _SPIRV_OP(ndrange_2D, BuildNDRange) - _SPIRV_OP(ndrange_3D, BuildNDRange) - // Generic Address Space Casts - _SPIRV_OP(to_global, GenericCastToPtrExplicit) - _SPIRV_OP(to_local, GenericCastToPtrExplicit) - _SPIRV_OP(to_private, GenericCastToPtrExplicit) - // CL 2.0 pipe builtins - _SPIRV_OP(read_pipe_2, ReadPipe) - _SPIRV_OP(write_pipe_2, WritePipe) - _SPIRV_OP(read_pipe_2_bl, ReadPipeBlockingINTEL) - _SPIRV_OP(write_pipe_2_bl, WritePipeBlockingINTEL) - _SPIRV_OP(read_pipe_4, ReservedReadPipe) - _SPIRV_OP(write_pipe_4, ReservedWritePipe) - _SPIRV_OP(reserve_read_pipe, ReserveReadPipePackets) - _SPIRV_OP(reserve_write_pipe, ReserveWritePipePackets) - _SPIRV_OP(commit_read_pipe, CommitReadPipe) - _SPIRV_OP(commit_write_pipe, CommitWritePipe) - _SPIRV_OP(is_valid_reserve_id, IsValidReserveId) - _SPIRV_OP(group_reserve_read_pipe, GroupReserveReadPipePackets) - _SPIRV_OP(group_reserve_write_pipe, GroupReserveWritePipePackets) - _SPIRV_OP(group_commit_read_pipe, GroupCommitReadPipe) - _SPIRV_OP(group_commit_write_pipe, GroupCommitWritePipe) - _SPIRV_OP(get_pipe_num_packets_ro, GetNumPipePackets) - _SPIRV_OP(get_pipe_num_packets_wo, GetNumPipePackets) - _SPIRV_OP(get_pipe_max_packets_ro, GetMaxPipePackets) - _SPIRV_OP(get_pipe_max_packets_wo, GetMaxPipePackets) - // CL 2.0 workgroup builtins - _SPIRV_OP(group_all, GroupAll) - _SPIRV_OP(group_any, GroupAny) - _SPIRV_OP(group_broadcast, GroupBroadcast) - _SPIRV_OP(group_iadd, GroupIAdd) - _SPIRV_OP(group_fadd, GroupFAdd) - _SPIRV_OP(group_fmin, GroupFMin) - _SPIRV_OP(group_umin, GroupUMin) - _SPIRV_OP(group_smin, GroupSMin) - _SPIRV_OP(group_fmax, GroupFMax) - _SPIRV_OP(group_umax, GroupUMax) - _SPIRV_OP(group_smax, GroupSMax) - // CL image builtins - _SPIRV_OP(SampledImage, SampledImage) - _SPIRV_OP(ImageSampleExplicitLod, ImageSampleExplicitLod) - _SPIRV_OP(read_image, ImageRead) - _SPIRV_OP(write_image, ImageWrite) - _SPIRV_OP(get_image_channel_data_type, ImageQueryFormat) - _SPIRV_OP(get_image_channel_order, ImageQueryOrder) - _SPIRV_OP(get_image_num_mip_levels, ImageQueryLevels) - _SPIRV_OP(get_image_num_samples, ImageQuerySamples) - // Intel Subgroups builtins - _SPIRV_OP(intel_sub_group_shuffle, SubgroupShuffleINTEL) - _SPIRV_OP(intel_sub_group_shuffle_down, SubgroupShuffleDownINTEL) - _SPIRV_OP(intel_sub_group_shuffle_up, SubgroupShuffleUpINTEL) - _SPIRV_OP(intel_sub_group_shuffle_xor, SubgroupShuffleXorINTEL) - // Intel media_block_io builtins - _SPIRV_OP(intel_sub_group_media_block_read, SubgroupImageMediaBlockReadINTEL) - _SPIRV_OP(intel_sub_group_media_block_write, - SubgroupImageMediaBlockWriteINTEL) - // cl_khr_subgroup_non_uniform_vote - _SPIRV_OP(group_elect, GroupNonUniformElect) - _SPIRV_OP(group_non_uniform_all, GroupNonUniformAll) - _SPIRV_OP(group_non_uniform_any, GroupNonUniformAny) - _SPIRV_OP(group_non_uniform_all_equal, GroupNonUniformAllEqual) - // cl_khr_subgroup_ballot - _SPIRV_OP(group_non_uniform_broadcast, GroupNonUniformBroadcast) - _SPIRV_OP(group_broadcast_first, GroupNonUniformBroadcastFirst) - _SPIRV_OP(group_ballot, GroupNonUniformBallot) - _SPIRV_OP(group_inverse_ballot, GroupNonUniformInverseBallot) - _SPIRV_OP(group_ballot_bit_extract, GroupNonUniformBallotBitExtract) - _SPIRV_OP(group_ballot_bit_count_iadd, GroupNonUniformBallotBitCount) - _SPIRV_OP(group_ballot_find_lsb, GroupNonUniformBallotFindLSB) - _SPIRV_OP(group_ballot_find_msb, GroupNonUniformBallotFindMSB) - // cl_khr_subgroup_non_uniform_arithmetic - _SPIRV_OP(group_non_uniform_iadd, GroupNonUniformIAdd) - _SPIRV_OP(group_non_uniform_fadd, GroupNonUniformFAdd) - _SPIRV_OP(group_non_uniform_imul, GroupNonUniformIMul) - _SPIRV_OP(group_non_uniform_fmul, GroupNonUniformFMul) - _SPIRV_OP(group_non_uniform_smin, GroupNonUniformSMin) - _SPIRV_OP(group_non_uniform_umin, GroupNonUniformUMin) - _SPIRV_OP(group_non_uniform_fmin, GroupNonUniformFMin) - _SPIRV_OP(group_non_uniform_smax, GroupNonUniformSMax) - _SPIRV_OP(group_non_uniform_umax, GroupNonUniformUMax) - _SPIRV_OP(group_non_uniform_fmax, GroupNonUniformFMax) - _SPIRV_OP(group_non_uniform_iand, GroupNonUniformBitwiseAnd) - _SPIRV_OP(group_non_uniform_ior, GroupNonUniformBitwiseOr) - _SPIRV_OP(group_non_uniform_ixor, GroupNonUniformBitwiseXor) - _SPIRV_OP(group_non_uniform_logical_iand, GroupNonUniformLogicalAnd) - _SPIRV_OP(group_non_uniform_logical_ior, GroupNonUniformLogicalOr) - _SPIRV_OP(group_non_uniform_logical_ixor, GroupNonUniformLogicalXor) - // cl_khr_subgroup_shuffle - _SPIRV_OP(group_shuffle, GroupNonUniformShuffle) - _SPIRV_OP(group_shuffle_xor, GroupNonUniformShuffleXor) - // cl_khr_subgroup_shuffle_relative - _SPIRV_OP(group_shuffle_up, GroupNonUniformShuffleUp) - _SPIRV_OP(group_shuffle_down, GroupNonUniformShuffleDown) -#undef _SPIRV_OP -} - -template <> inline void SPIRVMap::init() { -#define _SPIRV_OP(x, y) add(#x, Op##y); - _SPIRV_OP(add, AtomicIAdd) - _SPIRV_OP(sub, AtomicISub) - _SPIRV_OP(xchg, AtomicExchange) - _SPIRV_OP(cmpxchg, AtomicCompareExchange) - _SPIRV_OP(inc, AtomicIIncrement) - _SPIRV_OP(dec, AtomicIDecrement) - _SPIRV_OP(min, AtomicSMin) - _SPIRV_OP(max, AtomicSMax) - _SPIRV_OP(umin, AtomicUMin) - _SPIRV_OP(umax, AtomicUMax) - _SPIRV_OP(and, AtomicAnd) - _SPIRV_OP(or, AtomicOr) - _SPIRV_OP(xor, AtomicXor) -#undef _SPIRV_OP -} - -// SPV_INTEL_device_side_avc_motion_estimation extension builtins class SPIRVSubgroupsAVCIntelInst; -template <> -inline void SPIRVMap::init() { - // Here is a workaround for a bug in the specification: - // 'avc' missed in 'intel_sub_group_avc' prefix. - add("intel_sub_group_ime_ref_window_size", - OpSubgroupAvcImeRefWindowSizeINTEL); - -#define _SPIRV_OP(x, y) add("intel_sub_group_avc_" #x, OpSubgroupAvc##y##INTEL); - // Initialization phase functions - _SPIRV_OP(ime_initialize, ImeInitialize) - _SPIRV_OP(fme_initialize, FmeInitialize) - _SPIRV_OP(bme_initialize, BmeInitialize) - _SPIRV_OP(sic_initialize, SicInitialize) - - // Result and payload types conversion functions - _SPIRV_OP(mce_convert_to_ime_payload, MceConvertToImePayload) - _SPIRV_OP(mce_convert_to_ime_result, MceConvertToImeResult) - _SPIRV_OP(mce_convert_to_ref_payload, MceConvertToRefPayload) - _SPIRV_OP(mce_convert_to_ref_result, MceConvertToRefResult) - _SPIRV_OP(mce_convert_to_sic_payload, MceConvertToSicPayload) - _SPIRV_OP(mce_convert_to_sic_result, MceConvertToSicResult) - _SPIRV_OP(ime_convert_to_mce_payload, ImeConvertToMcePayload) - _SPIRV_OP(ime_convert_to_mce_result, ImeConvertToMceResult) - _SPIRV_OP(ref_convert_to_mce_payload, RefConvertToMcePayload) - _SPIRV_OP(ref_convert_to_mce_result, RefConvertToMceResult) - _SPIRV_OP(sic_convert_to_mce_payload, SicConvertToMcePayload) - _SPIRV_OP(sic_convert_to_mce_result, SicConvertToMceResult) -#undef _SPIRV_OP - -// MCE instructions -#define _SPIRV_OP(x, y) \ - add("intel_sub_group_avc_mce_" #x, OpSubgroupAvcMce##y##INTEL); - _SPIRV_OP(get_default_inter_base_multi_reference_penalty, - GetDefaultInterBaseMultiReferencePenalty) - _SPIRV_OP(set_inter_base_multi_reference_penalty, - SetInterBaseMultiReferencePenalty) - _SPIRV_OP(get_default_inter_shape_penalty, GetDefaultInterShapePenalty) - _SPIRV_OP(set_inter_shape_penalty, SetInterShapePenalty) - _SPIRV_OP(get_default_inter_direction_penalty, - GetDefaultInterDirectionPenalty) - _SPIRV_OP(set_inter_direction_penalty, SetInterDirectionPenalty) - _SPIRV_OP(get_default_intra_luma_shape_penalty, - GetDefaultIntraLumaShapePenalty) - _SPIRV_OP(get_default_inter_motion_vector_cost_table, - GetDefaultInterMotionVectorCostTable) - _SPIRV_OP(get_default_high_penalty_cost_table, GetDefaultHighPenaltyCostTable) - _SPIRV_OP(get_default_medium_penalty_cost_table, - GetDefaultMediumPenaltyCostTable) - _SPIRV_OP(get_default_low_penalty_cost_table, GetDefaultLowPenaltyCostTable) - _SPIRV_OP(set_motion_vector_cost_function, SetMotionVectorCostFunction) - _SPIRV_OP(get_default_intra_luma_mode_penalty, GetDefaultIntraLumaModePenalty) - _SPIRV_OP(get_default_non_dc_luma_intra_penalty, - GetDefaultNonDcLumaIntraPenalty) - _SPIRV_OP(get_default_intra_chroma_mode_base_penalty, - GetDefaultIntraChromaModeBasePenalty) - _SPIRV_OP(set_ac_only_haar, SetAcOnlyHaar) - _SPIRV_OP(set_source_interlaced_field_polarity, - SetSourceInterlacedFieldPolarity) - _SPIRV_OP(set_single_reference_interlaced_field_polarity, - SetSingleReferenceInterlacedFieldPolarity) - _SPIRV_OP(set_dual_reference_interlaced_field_polarities, - SetDualReferenceInterlacedFieldPolarities) - _SPIRV_OP(get_motion_vectors, GetMotionVectors) - _SPIRV_OP(get_inter_distortions, GetInterDistortions) - _SPIRV_OP(get_best_inter_distortion, GetBestInterDistortions) - _SPIRV_OP(get_inter_major_shape, GetInterMajorShape) - _SPIRV_OP(get_inter_minor_shapes, GetInterMinorShape) - _SPIRV_OP(get_inter_directions, GetInterDirections) - _SPIRV_OP(get_inter_motion_vector_count, GetInterMotionVectorCount) - _SPIRV_OP(get_inter_reference_ids, GetInterReferenceIds) - _SPIRV_OP(get_inter_reference_interlaced_field_polarities, - GetInterReferenceInterlacedFieldPolarities) -#undef _SPIRV_OP - -// IME instructions -#define _SPIRV_OP(x, y) \ - add("intel_sub_group_avc_ime_" #x, OpSubgroupAvcIme##y##INTEL); - _SPIRV_OP(set_single_reference, SetSingleReference) - _SPIRV_OP(set_dual_reference, SetDualReference) - _SPIRV_OP(ref_window_size, RefWindowSize) - _SPIRV_OP(adjust_ref_offset, AdjustRefOffset) - _SPIRV_OP(set_max_motion_vector_count, SetMaxMotionVectorCount) - _SPIRV_OP(set_unidirectional_mix_disable, SetUnidirectionalMixDisable) - _SPIRV_OP(set_early_search_termination_threshold, - SetEarlySearchTerminationThreshold) - _SPIRV_OP(set_weighted_sad, SetWeightedSad) - _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference) - _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference) - _SPIRV_OP(evaluate_with_single_reference_streamin, - EvaluateWithSingleReferenceStreamin) - _SPIRV_OP(evaluate_with_dual_reference_streamin, - EvaluateWithDualReferenceStreamin) - _SPIRV_OP(evaluate_with_single_reference_streamout, - EvaluateWithSingleReferenceStreamout) - _SPIRV_OP(evaluate_with_dual_reference_streamout, - EvaluateWithDualReferenceStreamout) - _SPIRV_OP(evaluate_with_single_reference_streaminout, - EvaluateWithSingleReferenceStreaminout) - _SPIRV_OP(evaluate_with_dual_reference_streaminout, - EvaluateWithDualReferenceStreaminout) - _SPIRV_OP(get_single_reference_streamin, GetSingleReferenceStreamin) - _SPIRV_OP(get_dual_reference_streamin, GetDualReferenceStreamin) - _SPIRV_OP(strip_single_reference_streamout, StripSingleReferenceStreamout) - _SPIRV_OP(strip_dual_reference_streamout, StripDualReferenceStreamout) - _SPIRV_OP(get_border_reached, GetBorderReached) - _SPIRV_OP(get_truncated_search_indication, GetTruncatedSearchIndication) - _SPIRV_OP(get_unidirectional_early_search_termination, - GetUnidirectionalEarlySearchTermination) - _SPIRV_OP(get_weighting_pattern_minimum_motion_vector, - GetWeightingPatternMinimumMotionVector) - _SPIRV_OP(get_weighting_pattern_minimum_distortion, - GetWeightingPatternMinimumDistortion) -#undef _SPIRV_OP - -#define _SPIRV_OP(x, y) \ - add("intel_sub_group_avc_ime_get_streamout_major_shape_" #x, \ - OpSubgroupAvcImeGetStreamout##y##INTEL); - _SPIRV_OP(motion_vectors_single_reference, - SingleReferenceMajorShapeMotionVectors) - _SPIRV_OP(distortions_single_reference, SingleReferenceMajorShapeDistortions) - _SPIRV_OP(reference_ids_single_reference, - SingleReferenceMajorShapeReferenceIds) - _SPIRV_OP(motion_vectors_dual_reference, DualReferenceMajorShapeMotionVectors) - _SPIRV_OP(distortions_dual_reference, DualReferenceMajorShapeDistortions) - _SPIRV_OP(reference_ids_dual_reference, DualReferenceMajorShapeReferenceIds) -#undef _SPIRV_OP - -// REF instructions -#define _SPIRV_OP(x, y) \ - add("intel_sub_group_avc_ref_" #x, OpSubgroupAvcRef##y##INTEL); - _SPIRV_OP(set_bidirectional_mix_disable, SetBidirectionalMixDisable) - _SPIRV_OP(set_bilinear_filter_enable, SetBilinearFilterEnable) - _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference) - _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference) - _SPIRV_OP(evaluate_with_multi_reference, EvaluateWithMultiReference) - _SPIRV_OP(evaluate_with_multi_reference_interlaced, - EvaluateWithMultiReferenceInterlaced) -#undef _SPIRV_OP - -// SIC instructions -#define _SPIRV_OP(x, y) \ - add("intel_sub_group_avc_sic_" #x, OpSubgroupAvcSic##y##INTEL); - _SPIRV_OP(configure_skc, ConfigureSkc) - _SPIRV_OP(configure_ipe_luma, ConfigureIpeLuma) - _SPIRV_OP(configure_ipe_luma_chroma, ConfigureIpeLumaChroma) - _SPIRV_OP(get_motion_vector_mask, GetMotionVectorMask) - _SPIRV_OP(set_intra_luma_shape_penalty, SetIntraLumaShapePenalty) - _SPIRV_OP(set_intra_luma_mode_cost_function, SetIntraLumaModeCostFunction) - _SPIRV_OP(set_intra_chroma_mode_cost_function, SetIntraChromaModeCostFunction) - _SPIRV_OP(set_skc_bilinear_filter_enable, SetBilinearFilterEnable) - _SPIRV_OP(set_skc_forward_transform_enable, SetSkcForwardTransformEnable) - _SPIRV_OP(set_block_based_raw_skip_sad, SetBlockBasedRawSkipSad) - _SPIRV_OP(evaluate_ipe, EvaluateIpe) - _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference) - _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference) - _SPIRV_OP(evaluate_with_multi_reference, EvaluateWithMultiReference) - _SPIRV_OP(evaluate_with_multi_reference_interlaced, - EvaluateWithMultiReferenceInterlaced) - _SPIRV_OP(get_ipe_luma_shape, GetIpeLumaShape) - _SPIRV_OP(get_best_ipe_luma_distortion, GetBestIpeLumaDistortion) - _SPIRV_OP(get_best_ipe_chroma_distortion, GetBestIpeChromaDistortion) - _SPIRV_OP(get_packed_ipe_luma_modes, GetPackedIpeLumaModes) - _SPIRV_OP(get_ipe_chroma_mode, GetIpeChromaMode) - _SPIRV_OP(get_packed_skc_luma_count_threshold, GetPackedSkcLumaCountThreshold) - _SPIRV_OP(get_packed_skc_luma_sum_threshold, GetPackedSkcLumaSumThreshold) - _SPIRV_OP(get_inter_raw_sads, GetInterRawSads) -#undef _SPIRV_OP -} typedef SPIRVMap OCLSPIRVSubgroupAVCIntelBuiltinMap; -template <> inline void SPIRVMap::init() { - add("opencl.event_t", OpTypeEvent); - add("opencl.pipe_t", OpTypePipe); - add("opencl.clk_event_t", OpTypeDeviceEvent); - add("opencl.reserve_id_t", OpTypeReserveId); - add("opencl.queue_t", OpTypeQueue); - add("opencl.sampler_t", OpTypeSampler); -} - typedef SPIRVMap LLVMSPIRVAtomicRmwOpCodeMap; -template <> inline void LLVMSPIRVAtomicRmwOpCodeMap::init() { - add(llvm::AtomicRMWInst::Xchg, OpAtomicExchange); - add(llvm::AtomicRMWInst::Add, OpAtomicIAdd); - add(llvm::AtomicRMWInst::Sub, OpAtomicISub); - add(llvm::AtomicRMWInst::And, OpAtomicAnd); - add(llvm::AtomicRMWInst::Or, OpAtomicOr); - add(llvm::AtomicRMWInst::Xor, OpAtomicXor); - add(llvm::AtomicRMWInst::Max, OpAtomicSMax); - add(llvm::AtomicRMWInst::Min, OpAtomicSMin); - add(llvm::AtomicRMWInst::UMax, OpAtomicUMax); - add(llvm::AtomicRMWInst::UMin, OpAtomicUMin); -} } // namespace SPIRV diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp index ca90e1bbcb98a..bd6317c9bdfb1 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp @@ -3460,6 +3460,10 @@ bool SPIRVToLLVM::transVectorComputeMetadata(SPIRVFunction *BF) { return true; F->addFnAttr(kVCMetadata::VCFunction); + SPIRVWord SIMTMode = 0; + if (BF->hasDecorate(DecorationSIMTCallINTEL, 0, &SIMTMode)) + F->addFnAttr(kVCMetadata::VCSIMTCall, std::to_string(SIMTMode)); + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { auto ArgNo = I->getArgNo(); diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 1161ad5c97dc0..347111bb34454 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -92,11 +92,6 @@ using namespace OCLUtil; namespace SPIRV { -cl::opt SPIRVAllowUnknownIntrinsics( - "spirv-allow-unknown-intrinsics", cl::init(false), - cl::desc("Unknown LLVM intrinsics will be translated as external function " - "calls in SPIR-V")); - static void foreachKernelArgMD( MDNode *MD, SPIRVFunction *BF, std::function @@ -313,8 +308,11 @@ SPIRVType *LLVMToSPIRV::transType(Type *T) { // extension if (!BM->isAllowedToUseExtension( ExtensionID::SPV_INTEL_usm_storage_classes) && - ((AddrSpc == SPIRAS_GlobalDevice) || (AddrSpc == SPIRAS_GlobalHost))) - AddrSpc = SPIRAS_Global; + ((AddrSpc == SPIRAS_GlobalDevice) || (AddrSpc == SPIRAS_GlobalHost))) { + auto NewType = + PointerType::get(T->getPointerElementType(), SPIRAS_Global); + return mapType(T, transType(NewType)); + } if (ST && !ST->isSized()) { Op OpCode; StringRef STName = ST->getName(); @@ -509,8 +507,8 @@ SPIRVFunction *LLVMToSPIRV::transFunctionDecl(Function *F) { if (auto BF = getTranslatedValue(F)) return static_cast(BF); - if (F->isIntrinsic() && - (!SPIRVAllowUnknownIntrinsics || isKnownIntrinsic(F->getIntrinsicID()))) { + if (F->isIntrinsic() && (!BM->isSPIRVAllowUnknownIntrinsicsEnabled() || + isKnownIntrinsic(F->getIntrinsicID()))) { // We should not translate LLVM intrinsics as a function assert(none_of(F->user_begin(), F->user_end(), [this](User *U) { return getTranslatedValue(U); }) && @@ -587,6 +585,14 @@ void LLVMToSPIRV::transVectorComputeMetadata(Function *F) { else return; + if (Attrs.hasFnAttribute(kVCMetadata::VCSIMTCall)) { + SPIRVWord SIMTMode = 0; + Attrs.getAttribute(AttributeList::FunctionIndex, kVCMetadata::VCSIMTCall) + .getValueAsString() + .getAsInteger(0, SIMTMode); + BF->addDecorate(DecorationSIMTCallINTEL, SIMTMode); + } + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { auto ArgNo = I->getArgNo(); @@ -2131,7 +2137,7 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, // change is pending the trap/abort intrisinc implementation. return nullptr; default: - if (SPIRVAllowUnknownIntrinsics) + if (BM->isSPIRVAllowUnknownIntrinsicsEnabled()) return BM->addCallInst( transFunctionDecl(II->getCalledFunction()), transArguments(II, BB, diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriterPass.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriterPass.cpp index ffd0676045e41..4e211d0567d83 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriterPass.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriterPass.cpp @@ -21,23 +21,26 @@ using namespace llvm; PreservedAnalyses SPIRVWriterPass::run(Module &M) { // FIXME: at the moment LLVM/SPIR-V translation errors are ignored. std::string Err; - writeSpirv(&M, OS, Err); + writeSpirv(&M, Opts, OS, Err); return PreservedAnalyses::all(); } namespace { class WriteSPIRVPass : public ModulePass { std::ostream &OS; // std::ostream to print on + SPIRV::TranslatorOpts Opts; + public: static char ID; // Pass identification, replacement for typeid - explicit WriteSPIRVPass(std::ostream &O) : ModulePass(ID), OS(O) {} + WriteSPIRVPass(std::ostream &OS, const SPIRV::TranslatorOpts &Opts) + : ModulePass(ID), OS(OS), Opts(Opts) {} StringRef getPassName() const override { return "SPIRV Writer"; } bool runOnModule(Module &M) override { // FIXME: at the moment LLVM/SPIR-V translation errors are ignored. std::string Err; - writeSpirv(&M, OS, Err); + writeSpirv(&M, Opts, OS, Err); return false; } }; @@ -46,5 +49,14 @@ class WriteSPIRVPass : public ModulePass { char WriteSPIRVPass::ID = 0; ModulePass *llvm::createSPIRVWriterPass(std::ostream &Str) { - return new WriteSPIRVPass(Str); + SPIRV::TranslatorOpts DefaultOpts; + // To preserve old behavior of the translator, let's enable all extensions + // by default in this API + DefaultOpts.enableAllExtensions(); + return createSPIRVWriterPass(Str, DefaultOpts); +} + +ModulePass *llvm::createSPIRVWriterPass(std::ostream &Str, + const SPIRV::TranslatorOpts &Opts) { + return new WriteSPIRVPass(Str, Opts); } diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriterPass.h b/llvm-spirv/lib/SPIRV/SPIRVWriterPass.h index b8005486bbaad..fe80217230be8 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriterPass.h +++ b/llvm-spirv/lib/SPIRV/SPIRVWriterPass.h @@ -15,6 +15,7 @@ #ifndef SPIRV_SPIRVWRITERPASS_H #define SPIRV_SPIRVWRITERPASS_H +#include "LLVMSPIRVOpts.h" #include "llvm/ADT/StringRef.h" namespace llvm { @@ -27,16 +28,27 @@ class PreservedAnalyses; /// manager. ModulePass *createSPIRVWriterPass(std::ostream &Str); +/// \brief Create and return a pass that writes the module to the specified +/// ostream. Note that this pass is designed for use with the legacy pass +/// manager. +ModulePass *createSPIRVWriterPass(std::ostream &Str, + const SPIRV::TranslatorOpts &Opts); + /// \brief Pass for writing a module of IR out to a SPIRV file. /// /// Note that this is intended for use with the new pass manager. To construct /// a pass for the legacy pass manager, use the function above. class SPIRVWriterPass { std::ostream &OS; + SPIRV::TranslatorOpts Opts; public: /// \brief Construct a SPIRV writer pass around a particular output stream. - explicit SPIRVWriterPass(std::ostream &OS) : OS(OS) {} + explicit SPIRVWriterPass(std::ostream &OS) : OS(OS) { + Opts.enableAllExtensions(); + } + SPIRVWriterPass(std::ostream &OS, const SPIRV::TranslatorOpts &Opts) + : OS(OS), Opts(Opts) {} /// \brief Run the SPIRV writer pass, and output the module to the selected /// output stream. diff --git a/llvm-spirv/lib/SPIRV/VectorComputeUtil.h b/llvm-spirv/lib/SPIRV/VectorComputeUtil.h index f215b2dc2df3e..772d682a5e42a 100755 --- a/llvm-spirv/lib/SPIRV/VectorComputeUtil.h +++ b/llvm-spirv/lib/SPIRV/VectorComputeUtil.h @@ -116,6 +116,7 @@ const static char VCSLMSize[] = "VCSLMSize"; const static char VCGlobalVariable[] = "VCGlobalVariable"; const static char VCVolatile[] = "VCVolatile"; const static char VCByteOffset[] = "VCByteOffset"; +const static char VCSIMTCall[] = "VCSIMTCall"; const static char VCArgumentKind[] = "VCArgumentKind"; const static char VCArgumentDesc[] = "VCArgumentDesc"; } // namespace kVCMetadata diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEnum.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEnum.h index 57ce0d3eedc0c..078a6ec426376 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEnum.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEnum.h @@ -393,6 +393,7 @@ template <> inline void SPIRVMap::init() { {CapabilityVectorComputeINTEL}); ADD_VEC_INIT(DecorationFuncParamIOKind, {CapabilityVectorComputeINTEL}); ADD_VEC_INIT(DecorationStackCallINTEL, {CapabilityVectorComputeINTEL}); + ADD_VEC_INIT(DecorationSIMTCallINTEL, {CapabilityVectorComputeINTEL}); ADD_VEC_INIT(DecorationFuncParamKindINTEL, {CapabilityVectorComputeINTEL}); ADD_VEC_INIT(DecorationFuncParamDescINTEL, {CapabilityVectorComputeINTEL}); } diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h index 4ea503613854f..60a9bcc3de60e 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -538,15 +538,9 @@ class SPIRVStore : public SPIRVInstruction, public SPIRVMemoryAccess { SPIRVInstruction::validate(); if (getSrc()->isForward() || getDst()->isForward()) return; -#ifndef NDEBUG - if (getValueType(PtrId)->getPointerElementType() != getValueType(ValId)) { - assert(getValueType(PtrId) - ->getPointerElementType() - ->getPointerStorageClass() == - getValueType(ValId)->getPointerStorageClass() && - "Inconsistent operand types"); - } -#endif // NDEBUG + assert(getValueType(PtrId)->getPointerElementType() == + getValueType(ValId) && + "Inconsistent operand types"); } private: diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h index a4d23c2d51aa0..2b96e6c298497 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h @@ -431,6 +431,7 @@ inline bool isValid(spv::Decoration V) { case DecorationVectorComputeVariableINTEL: case DecorationGlobalVariableOffsetINTEL: case DecorationFuncParamIOKind: + case DecorationSIMTCallINTEL: return true; default: return false; diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h index a4ec3ef3b7140..24620bb97085a 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h @@ -476,6 +476,10 @@ class SPIRVModule { return TranslationOpts.getFPContractMode(); } + bool isSPIRVAllowUnknownIntrinsicsEnabled() const noexcept { + return TranslationOpts.isSPIRVAllowUnknownIntrinsicsEnabled(); + } + // I/O functions friend spv_ostream &operator<<(spv_ostream &O, SPIRVModule &M); friend std::istream &operator>>(std::istream &I, SPIRVModule &M); diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h index 5cff7007899f2..5290bdb9bb5a0 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h @@ -371,6 +371,7 @@ template <> inline void SPIRVMap::init() { add(DecorationVectorComputeVariableINTEL, "VectorComputeVariableINTEL"); add(DecorationGlobalVariableOffsetINTEL, "GlobalVariableOffsetINTEL"); add(DecorationFuncParamIOKind, "FuncParamIOKind"); + add(DecorationSIMTCallINTEL, "SIMTCallINTEL"); } SPIRV_DEF_NAMEMAP(Decoration, SPIRVDecorationNameMap) diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp b/llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp index c6992238452a2..862c45e4f4ae6 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp +++ b/llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp @@ -477,6 +477,7 @@ enum Decoration { DecorationRestrictPointerEXT = 5355, DecorationAliasedPointer = 5356, DecorationAliasedPointerEXT = 5356, + DecorationSIMTCallINTEL = 5599, DecorationFuncParamKindINTEL = 9624, DecorationFuncParamDescINTEL = 9625, DecorationReferencedIndirectlyINTEL = 5602, diff --git a/llvm-spirv/test/transcoding/decoration_simt_call.ll b/llvm-spirv/test/transcoding/decoration_simt_call.ll new file mode 100755 index 0000000000000..0bd687ea7897d --- /dev/null +++ b/llvm-spirv/test/transcoding/decoration_simt_call.ll @@ -0,0 +1,35 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -o %t.spv --spirv-ext=+SPV_INTEL_vector_compute --spirv-allow-unknown-intrinsics +; RUN: llvm-spirv %t.spv -o %t.spt --to-text +; RUN: llvm-spirv -r %t.spv -o %t.bc +; RUN: llvm-dis %t.bc -o %t.ll +; RUN: FileCheck %s --input-file %t.spt -check-prefix=SPV +; RUN: FileCheck %s --input-file %t.ll -check-prefix=LLVM + +; ModuleID = 'slm.bc' +source_filename = "slm.cpp" +target datalayout = "e-p:64:64-i64:64-n8:16:32" +target triple = "spir" + +; LLVM-DAG: @k_rte{{[^#]*}}#[[K_RTE:[0-9]+]] +; LLVM-DAG: attributes #[[K_RTE]]{{.*"VCSIMTCall"="5" }} +; SPV-DAG: EntryPoint 6 [[K_RTE:[0-9]+]] "k_rte" +; SPV-DAG: Decorate [[K_RTE]] SIMTCallINTEL 5 + +@in = internal global <256 x i8> undef, align 256 #0 +declare <256 x i8> @llvm.genx.vload(<256 x i8>* nonnull %aaa) + +; Function Attrs: noinline norecurse nounwind readnone +define dso_local dllexport spir_kernel void @k_rte(i32 %ibuf, i32 %obuf) local_unnamed_addr #1 { +entry: + %gload53 = tail call <256 x i8> @llvm.genx.vload(<256 x i8>* nonnull @in) + ret void +} + +attributes #1 = { noinline norecurse nounwind readnone "VCFunction" "VCSIMTCall"="5" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 8.0.1"} diff --git a/llvm-spirv/test/transcoding/intel_usm_addrspaces.ll b/llvm-spirv/test/transcoding/intel_usm_addrspaces.ll index 457222be9c372..98a40601e6e73 100644 --- a/llvm-spirv/test/transcoding/intel_usm_addrspaces.ll +++ b/llvm-spirv/test/transcoding/intel_usm_addrspaces.ll @@ -23,10 +23,11 @@ ; CHECK-SPIRV: Name [[HOST_ARG2:[0-9]+]] "arg_host.addr" ; CHECK-SPIRV-EXT: TypePointer [[DEVICE_TY:[0-9]+]] 5936 {{[0-9]+}} ; CHECK-SPIRV-EXT: TypePointer [[HOST_TY:[0-9]+]] 5937 {{[0-9]+}} -; CHECK-SPIRV-NO-EXT: TypePointer [[DEVICE_TY:[0-9]+]] 5 {{[0-9]+}} -; CHECK-SPIRV-NO-EXT: TypePointer [[HOST_TY:[0-9]+]] 5 {{[0-9]+}} -; CHECK-SPIRV: Load [[DEVICE_TY]] {{[0-9]+}} [[DEVICE]] {{[0-9]+}} {{[0-9]+}} -; CHECK-SPIRV: Load [[HOST_TY]] {{[0-9]+}} [[HOST]] {{[0-9]+}} {{[0-9]+}} +; CHECK-SPIRV-NO-EXT: TypePointer [[GLOB_TY:[0-9]+]] 5 {{[0-9]+}} +; CHECK-SPIRV-EXT: Load [[DEVICE_TY]] {{[0-9]+}} [[DEVICE]] {{[0-9]+}} {{[0-9]+}} +; CHECK-SPIRV-EXT: Load [[HOST_TY]] {{[0-9]+}} [[HOST]] {{[0-9]+}} {{[0-9]+}} +; CHECK-SPIRV-NO-EXT: Load [[GLOB_TY]] {{[0-9]+}} [[DEVICE]] {{[0-9]+}} {{[0-9]+}} +; CHECK-SPIRV-NO-EXT: Load [[GLOB_TY]] {{[0-9]+}} [[HOST]] {{[0-9]+}} {{[0-9]+}} ; ModuleID = 'intel_usm_addrspaces.cpp' source_filename = "intel_usm_addrspaces.cpp" diff --git a/llvm-spirv/tools/llvm-spirv/llvm-spirv.cpp b/llvm-spirv/tools/llvm-spirv/llvm-spirv.cpp index ce4cd190dda3b..42be0f4f581f2 100644 --- a/llvm-spirv/tools/llvm-spirv/llvm-spirv.cpp +++ b/llvm-spirv/tools/llvm-spirv/llvm-spirv.cpp @@ -178,6 +178,11 @@ static cl::opt FPCMode( SPIRV::FPContractMode::Fast, "fast", "allow all operations to be contracted for all entry points"))); +cl::opt SPIRVAllowUnknownIntrinsics( + "spirv-allow-unknown-intrinsics", cl::init(false), + cl::desc("Unknown LLVM intrinsics will be translated as external function " + "calls in SPIR-V")); + static std::string removeExt(const std::string &FileName) { size_t Pos = FileName.find_last_of("."); if (Pos != std::string::npos) @@ -553,6 +558,16 @@ int main(int Ac, char **Av) { return -1; } + if (SPIRVAllowUnknownIntrinsics.getNumOccurrences() != 0) { + if (IsReverse) { + errs() + << "Note: --spirv-allow-unknown-intrinsics option ignored as it only " + "affects translation from LLVM IR to SPIR-V"; + } else { + Opts.setSPIRVAllowUnknownIntrinsicsEnabled(SPIRVAllowUnknownIntrinsics); + } + } + #ifdef _SPIRV_SUPPORT_TEXT_FMT if (ToText && (ToBinary || IsReverse || IsRegularization)) { errs() << "Cannot use -to-text with -to-binary, -r, -s\n"; diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index de2887b64c2a9..303b39221d9e6 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -3,11 +3,9 @@ cmake_minimum_required(VERSION 3.4.3) if ("${CMAKE_VERSION}" VERSION_LESS "3.13.4") - message(WARNING - "Your CMake version is ${CMAKE_VERSION}. Starting with LLVM 12.0.0, the " - "minimum version of CMake required to build LLVM will become 3.13.4, and " - "using an older CMake will become an error. Please upgrade your CMake to " - "at least 3.13.4 now to avoid issues in the future!") + message(FATAL_ERROR + "Your CMake version is ${CMAKE_VERSION}. The minimum version of CMake " + "required to build LLVM is now 3.13.4.") endif() if(POLICY CMP0068) @@ -24,7 +22,7 @@ if(POLICY CMP0077) endif() if(NOT DEFINED LLVM_VERSION_MAJOR) - set(LLVM_VERSION_MAJOR 11) + set(LLVM_VERSION_MAJOR 12) endif() if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 0) @@ -832,6 +830,21 @@ configure_file( ${LLVM_INCLUDE_DIR}/llvm/Config/Targets.def ) +# For up-to-date instructions for installing the Tensorflow dependency, refer to +# the bot setup script: https://github.com/google/ml-compiler-opt/blob/master/buildbot/buildbot_init.sh +# In this case, the latest C API library is available for download from +# https://www.tensorflow.org/install/lang_c. +# We will expose the conditional compilation variable, +# LLVM_HAVE_TF_API, through llvm-config.h, so that a user of the LLVM library may +# also leverage the dependency. +set(TENSORFLOW_C_LIB_PATH "" CACHE PATH "Path to TensorFlow C library install") +find_library(tensorflow_c_api tensorflow PATHS ${TENSORFLOW_C_LIB_PATH}/lib) + +if (tensorflow_c_api) + set(LLVM_HAVE_TF_API "ON" CACHE BOOL "Full Tensorflow API available") + include_directories(${TENSORFLOW_C_LIB_PATH}/include) +endif() + # Configure the three LLVM configuration header files. configure_file( ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/config.h.cmake @@ -972,9 +985,12 @@ set(TENSORFLOW_AOT_PATH "" CACHE PATH "Path to TensorFlow pip install dir") if (NOT TENSORFLOW_AOT_PATH STREQUAL "") set(LLVM_HAVE_TF_AOT "ON" CACHE BOOL "Tensorflow AOT available") - set(TENSORFLOW_AOT_COMPILER - "${TENSORFLOW_AOT_PATH}/../../../../bin/saved_model_cli" - CACHE PATH "Path to the Tensorflow AOT compiler") + set(TENSORFLOW_AOT_COMPILER + "${TENSORFLOW_AOT_PATH}/../../../../bin/saved_model_cli" + CACHE PATH "Path to the Tensorflow AOT compiler") + # Unlike the LLVM_HAVE_TF_API case, we don't need to expose this through + # llvm-config.h, because it's an internal implementation detail. A user of the llvm library that wants to also + # use the TF AOT compiler may do so through their custom build step. add_definitions("-DLLVM_HAVE_TF_AOT") include_directories(${TENSORFLOW_AOT_PATH}/include) add_subdirectory(${TENSORFLOW_AOT_PATH}/xla_aot_runtime_src diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 744c52e82f3b3..333167bfb6b0d 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -891,7 +891,7 @@ endmacro(add_llvm_executable name) # only an object library is built, and no module is built. This is specific to the Polly use case. # # The SUBPROJECT argument contains the LLVM project the plugin belongs -# to. If set, the plugin will link statically by default it if the +# to. If set, the plugin will link statically by default it if the # project was enabled. function(add_llvm_pass_plugin name) cmake_parse_arguments(ARG @@ -928,6 +928,9 @@ function(add_llvm_pass_plugin name) if (TARGET omp_gen) add_dependencies(obj.${name} omp_gen) endif() + if (TARGET acc_gen) + add_dependencies(obj.${name} acc_gen) + endif() set_property(GLOBAL APPEND PROPERTY LLVM_STATIC_EXTENSIONS ${name}) elseif(NOT ARG_NO_MODULE) add_llvm_library(${name} MODULE ${ARG_UNPARSED_ARGUMENTS}) diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in index c9a1c8131d2d6..e729a839f614d 100644 --- a/llvm/cmake/modules/LLVMConfig.cmake.in +++ b/llvm/cmake/modules/LLVMConfig.cmake.in @@ -104,15 +104,18 @@ if(NOT TARGET LLVMSupport) @llvm_config_include_buildtree_only_exports@ endif() -# By creating intrinsics_gen and omp_gen here, subprojects that depend on LLVM's -# tablegen-generated headers can always depend on this target whether building -# in-tree with LLVM or not. +# By creating intrinsics_gen, omp_gen and acc_gen here, subprojects that depend +# on LLVM's tablegen-generated headers can always depend on this target whether +# building in-tree with LLVM or not. if(NOT TARGET intrinsics_gen) add_custom_target(intrinsics_gen) endif() if(NOT TARGET omp_gen) add_custom_target(omp_gen) endif() +if(NOT TARGET acc_gen) + add_custom_target(acc_gen) +endif() set_property(GLOBAL PROPERTY LLVM_TARGETS_CONFIGURED On) include(${LLVM_CMAKE_DIR}/LLVM-Config.cmake) diff --git a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake index 4eb5fad85634b..706a1ffb5c7b7 100644 --- a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake +++ b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake @@ -250,6 +250,7 @@ function(llvm_ExternalProject_Add name source_dir) -DLLVM_HAVE_LINK_VERSION_SCRIPT=${LLVM_HAVE_LINK_VERSION_SCRIPT} -DLLVM_USE_RELATIVE_PATHS_IN_DEBUG_INFO=${LLVM_USE_RELATIVE_PATHS_IN_DEBUG_INFO} -DLLVM_USE_RELATIVE_PATHS_IN_FILES=${LLVM_USE_RELATIVE_PATHS_IN_FILES} + -DLLVM_LIT_ARGS=${LLVM_LIT_ARGS} -DLLVM_SOURCE_PREFIX=${LLVM_SOURCE_PREFIX} -DPACKAGE_VERSION=${PACKAGE_VERSION} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} diff --git a/llvm/docs/CodingStandards.rst b/llvm/docs/CodingStandards.rst index 99fb6af02a282..a9884cd9f3b5a 100644 --- a/llvm/docs/CodingStandards.rst +++ b/llvm/docs/CodingStandards.rst @@ -1575,27 +1575,28 @@ faraway places in the file to tell that the function is local. Don't Use Braces on Simple Single-Statement Bodies of if/else/loop Statements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -When writing the body of an ``if``, ``else``, or loop statement, omit the braces to -avoid unnecessary line noise. However, braces should be used in cases where the -omission of braces harm the readability and maintainability of the code. - -Readability is harmed when a single statement is accompanied by a comment that loses -its meaning if hoisted above the ``if`` or loop statement. Similarly, braces should -be used when single-statement body is complex enough that it becomes difficult to see -where the block containing the following statement began. An ``if``/``else`` chain or -a loop is considered a single statement for this rule, and this rule applies recursively. -This list is not exhaustive, for example, readability is also harmed if an -``if``/``else`` chain starts using braced bodies partway through and does not continue -on with braced bodies. +When writing the body of an ``if``, ``else``, or loop statement, we prefer to +omit the braces to avoid unnecessary line noise. However, braces should be used +in cases where the omission of braces harm the readability and maintainability +of the code. + +We consider that readability is harmed when omitting the brace in the presence +of a single statement that is accompanied by a comment (assuming the comment +can't be hoisted above the ``if`` or loop statement, see below). +Similarly, braces should be used when a single-statement body is complex enough +that it becomes difficult to see where the block containing the following +statement began. An ``if``/``else`` chain or a loop is considered a single +statement for this rule, and this rule applies recursively. -Maintainability is harmed if the body of an ``if`` ends with a (directly or indirectly) -nested ``if`` statement with no ``else``. Braces on the outer ``if`` would help to avoid -running into a "dangling else" situation. +This list is not exhaustive, for example, readability is also harmed if an +``if``/``else`` chain does not use braced bodies for either all or none of its +members, with complex conditionals, deep nesting, etc. The examples below +intend to provide some guidelines. +Maintainability is harmed if the body of an ``if`` ends with a (directly or +indirectly) nested ``if`` statement with no ``else``. Braces on the outer ``if`` +would help to avoid running into a "dangling else" situation. -Note that comments should only be hoisted for loops and -``if``, and not in ``else if`` or ``else``, where it would be unclear whether the comment -belonged to the preceeding condition, or the ``else``. .. code-block:: c++ @@ -1604,20 +1605,67 @@ belonged to the preceeding condition, or the ``else``. handleFunctionDecl(D); else if (isa(D)) handleVarDecl(D); - else { + + + // Here we document the condition itself and not the body. + if (isa(D)) { + // It is necessary that we explain the situation with this surprisingly long + // comment, so it would be unclear without the braces whether the following + // statement is in the scope of the `if`. + // Because the condition is documented, we can't really hoist this + // comment that applies to the body above the if. + handleOtherDecl(D); + } + + // Use braces on the outer `if` to avoid a potential dangling else situation. + if (isa(D)) { + for (auto *A : D.attrs()) + if (shouldProcessAttr(A)) + handleAttr(A); + } + + // Use braces for the `if` block to keep it uniform with the else block. + if (isa(D)) { + handleFunctionDecl(D); + } else { // In this else case, it is necessary that we explain the situation with this // surprisingly long comment, so it would be unclear without the braces whether - // the following statement is in the scope of the else. + // the following statement is in the scope of the `if`. handleOtherDecl(D); } - // This should also omit braces. The for loop contains only a single statement, - // so it shouldn't have braces. The if also only contains a single statement (the - // for loop), so it also should omit braces. + // This should also omit braces. The `for` loop contains only a single statement, + // so it shouldn't have braces. The `if` also only contains a single simple + // statement (the for loop), so it also should omit braces. if (isa(D)) for (auto *A : D.attrs()) handleAttr(A); + // Use braces for the outer `if` since the nested `for` is braced. + if (isa(D)) { + for (auto *A : D.attrs()) { + // In this for loop body, it is necessary that we explain the situation + // with this surprisingly long comment, forcing braces on the `for` block. + handleAttr(A); + } + } + + // Use braces on the outer block because there are more than two levels of nesting. + if (isa(D)) { + for (auto *A : D.attrs()) + for (ssize_t i : llvm::seq(count)) + handleAttrOnDecl(D, A, i); + } + + // Use braces on the outer block because of a nested `if`, otherwise the + // compiler would warn: `add explicit braces to avoid dangling else` + if (auto *D = dyn_cast(D)) { + if (shouldProcess(D)) + handleVarDecl(D); + else + markAsIgnored(D); + } + See Also ======== diff --git a/llvm/docs/CommandGuide/index.rst b/llvm/docs/CommandGuide/index.rst index 77fece030eb60..d13c2a0fc0a9b 100644 --- a/llvm/docs/CommandGuide/index.rst +++ b/llvm/docs/CommandGuide/index.rst @@ -14,25 +14,26 @@ Basic Commands .. toctree:: :maxdepth: 1 - llvm-as - llvm-dis - opt + dsymutil llc lli - llvm-link - llvm-lib - llvm-lipo + llvm-as llvm-config + llvm-cov llvm-cxxmap llvm-diff - llvm-cov - llvm-profdata - llvm-stress - llvm-symbolizer + llvm-dis llvm-dwarfdump - dsymutil + llvm-lib + llvm-libtool-darwin + llvm-link + llvm-lipo llvm-mca + llvm-profdata llvm-readobj + llvm-stress + llvm-symbolizer + opt GNU binutils replacements ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/llvm/docs/CommandGuide/llvm-libtool-darwin.rst b/llvm/docs/CommandGuide/llvm-libtool-darwin.rst new file mode 100644 index 0000000000000..0baacfd88e8a5 --- /dev/null +++ b/llvm/docs/CommandGuide/llvm-libtool-darwin.rst @@ -0,0 +1,59 @@ +llvm-libtool-darwin - LLVM tool for creating libraries for Darwin +================================================================= + +.. program:: llvm-libtool-darwin + +SYNOPSIS +-------- + +:program:`llvm-libtool-darwin` [*options*] ** + +DESCRIPTION +----------- + +:program:`llvm-libtool-darwin` is a tool for creating static and dynamic +libraries for Darwin. + +For most scenarios, it works as a drop-in replacement for cctools' +:program:`libtool`. + +OPTIONS +-------- +:program:`llvm-libtool-darwin` supports the following options: + +.. option:: -h, -help + + Show help and usage for this command. + +.. option:: -help-list + + Show help and usage for this command without grouping the options + into categories. + +.. option:: -color + + Use colors in output. + +.. option:: -version + + Display the version of this program. + +.. option:: -o + + Specify the output file name. Must be specified exactly once. + +EXIT STATUS +----------- + +:program:`llvm-libtool-darwin` exits with a non-zero exit code if there is an error. +Otherwise, it exits with code 0. + +BUGS +---- + +To report bugs, please visit . + +SEE ALSO +-------- + +:manpage:`llvm-ar(1)` diff --git a/llvm/docs/ExtendingLLVM.rst b/llvm/docs/ExtendingLLVM.rst index 53daf5c8427a0..f2eb374b4ef94 100644 --- a/llvm/docs/ExtendingLLVM.rst +++ b/llvm/docs/ExtendingLLVM.rst @@ -147,7 +147,7 @@ complicated behavior in a single node (rotate). code you wrote in ``LegalizeOp`` above to decompose your new node into other legal nodes for this target. -#. ``lib/Target/TargetSelectionDAG.td``: +#. ``include/llvm/Target/TargetSelectionDAG.td``: Most current targets supported by LLVM generate code using the DAGToDAG method, where SelectionDAG nodes are pattern matched to target-specific diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index c2d6200e67fa8..86d8c62af2b7e 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2778,7 +2778,9 @@ floating-point transformations. ``contract`` Allow floating-point contraction (e.g. fusing a multiply followed by an - addition into a fused multiply-and-add). + addition into a fused multiply-and-add). This does not enable reassociating + to form arbitrary contractions. For example, ``(a*b) + (c*d) + e`` can not + be transformed into ``(a*b) + ((c*d) + e)`` to create two fma operations. ``afn`` Approximate functions - Allow substitution of approximate calculations for @@ -3521,6 +3523,9 @@ uses with" concept would not hold. To ensure all uses of a given register observe the same value (even if '``undef``'), the :ref:`freeze instruction ` can be used. A value is frozen if its uses see the same value. +An aggregate value or vector is frozen if its elements are frozen. +The padding of an aggregate isn't considered, since it isn't visible +without storing it into memory and loading it with a different type. .. code-block:: llvm @@ -5716,33 +5721,34 @@ attribute on parameters and return values. It is sometimes useful to attach information to loop constructs. Currently, loop metadata is implemented as metadata attached to the branch instruction -in the loop latch block. This type of metadata refer to a metadata node that is -guaranteed to be separate for each loop. The loop identifier metadata is -specified with the name ``llvm.loop``. - -The loop identifier metadata is implemented using a metadata that refers to -itself to avoid merging it with any other identifier metadata, e.g., -during module linkage or function inlining. That is, each loop should refer -to their own identification metadata even if they reside in separate functions. -The following example contains loop identifier metadata for two separate loop -constructs: +in the loop latch block. The loop metadata node is a list of +other metadata nodes, each representing a property of the loop. Usually, +the first item of the property node is a string. For example, the +``llvm.loop.unroll.count`` suggests an unroll factor to the loop +unroller: .. code-block:: llvm - !0 = !{!0} - !1 = !{!1} + br i1 %exitcond, label %._crit_edge, label %.lr.ph, !llvm.loop !0 + ... + !0 = !{!0, !1, !2} + !1 = !{!"llvm.loop.unroll.enable"} + !2 = !{!"llvm.loop.unroll.count", i32 4} -The loop identifier metadata can be used to specify additional -per-loop metadata. Any operands after the first operand can be treated -as user-defined metadata. For example the ``llvm.loop.unroll.count`` -suggests an unroll factor to the loop unroller: +For legacy reasons, the first item of a loop metadata node must be a +reference to itself. Before the advent of the 'distinct' keyword, this +forced the preservation of otherwise identical metadata nodes. Since +the loop-metadata node can be attached to multiple nodes, the 'distinct' +keyword has become unnecessary. -.. code-block:: llvm +Prior to the property nodes, one or two ``DILocation`` (debug location) +nodes can be present in the list. The first, if present, identifies the +source-code location where the loop begins. The second, if present, +identifies the source-code location where the loop ends. - br i1 %exitcond, label %._crit_edge, label %.lr.ph, !llvm.loop !0 - ... - !0 = !{!0, !1} - !1 = !{!"llvm.loop.unroll.count", i32 4} +Loop metadata nodes cannot be used as unique identifiers. They are +neither persistent for the same loop through transformations nor +necessarily unique to just one loop. '``llvm.loop.disable_nonforced``' ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10679,6 +10685,9 @@ instructions may yield different values. While ``undef`` and ``poison`` pointers can be frozen, the result is a non-dereferenceable pointer. See the :ref:`Pointer Aliasing Rules ` section for more information. +If an aggregate value or vector is frozen, the operand is frozen element-wise. +The padding of an aggregate isn't considered, since it isn't visible +without storing it into memory and loading it with a different type. Example: @@ -11640,9 +11649,11 @@ the escaped allocas are allocated, which would break attempts to use '``llvm.localrecover``'. .. _int_read_register: +.. _int_read_volatile_register: .. _int_write_register: -'``llvm.read_register``' and '``llvm.write_register``' Intrinsics +'``llvm.read_register``', '``llvm.read_volatile_register``', and +'``llvm.write_register``' Intrinsics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: @@ -11652,6 +11663,8 @@ Syntax: declare i32 @llvm.read_register.i32(metadata) declare i64 @llvm.read_register.i64(metadata) + declare i32 @llvm.read_volatile_register.i32(metadata) + declare i64 @llvm.read_volatile_register.i64(metadata) declare void @llvm.write_register.i32(metadata, i32 @value) declare void @llvm.write_register.i64(metadata, i64 @value) !0 = !{!"sp\00"} @@ -11659,17 +11672,21 @@ Syntax: Overview: """"""""" -The '``llvm.read_register``' and '``llvm.write_register``' intrinsics -provides access to the named register. The register must be valid on -the architecture being compiled to. The type needs to be compatible -with the register being read. +The '``llvm.read_register``', '``llvm.read_volatile_register``', and +'``llvm.write_register``' intrinsics provide access to the named register. +The register must be valid on the architecture being compiled to. The type +needs to be compatible with the register being read. Semantics: """""""""" -The '``llvm.read_register``' intrinsic returns the current value of the -register, where possible. The '``llvm.write_register``' intrinsic sets -the current value of the register, where possible. +The '``llvm.read_register``' and '``llvm.read_volatile_register``' intrinsics +return the current value of the register, where possible. The +'``llvm.write_register``' intrinsic sets the current value of the register, +where possible. + +A call to '``llvm.read_volatile_register``' is assumed to have side-effects +and possibly return a different value each time (e.g. for a timer register). This is useful to implement named register global variables that need to always be mapped to a specific register, as is common practice on @@ -15522,6 +15539,7 @@ The argument to this intrinsic must be a vector of floating-point values. Syntax: """"""" +This is an overloaded intrinsic. :: @@ -15546,17 +15564,20 @@ Matrix Intrinsics ----------------- Operations on matrixes requiring shape information (like number of rows/columns -or the memory layout) can be expressed using the matrix intrinsics. Matrixes are -embedded in a flat vector and the intrinsics take the dimensions as arguments. -Currently column-major layout is assumed. The intrinsics support both integer -and floating point matrixes. +or the memory layout) can be expressed using the matrix intrinsics. These +intrinsics require matrix dimensions to be passed as immediate arguments, and +matrixes are passed and returned as vectors. This means that for a ``R`` x +``C`` matrix, element ``i`` of column ``j`` is at index ``j * R + i`` in the +corresponding vector, with indices starting at 0. Currently column-major layout +is assumed. The intrinsics support both integer and floating point matrixes. '``llvm.matrix.transpose.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" +This is an overloaded intrinsic. :: @@ -15565,21 +15586,24 @@ Syntax: Overview: """"""""" -The '``llvm.matrix.transpose.*``' intrinsic treats %In as containing a matrix -with rows and columns and returns the transposed matrix embedded in -the result vector. +The '``llvm.matrix.transpose.*``' intrinsics treat ``%In`` as a `` x +`` matrix and return the transposed matrix in the result vector. Arguments: """""""""" -The and arguments must be constant integers. The vector argument -%In and the returned vector must have * elements. +The first argument ``%In`` is a vector that corresponds to a `` x +`` matrix. Thus, arguments ```` and ```` correspond to the +number of rows and columns, respectively, and must be positive, constant +integers. The returned vector must have `` * `` elements, and have +the same float or integer element type as ``%In``. '``llvm.matrix.multiply.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" +This is an overloaded intrinsic. :: @@ -15588,18 +15612,20 @@ Syntax: Overview: """"""""" -The '``llvm.matrix.multiply.*``' intrinsic treats %A as a matrix with -rows and columns, %B as a matrix with rows and -columns and multiplies them. The result matrix is returned embedded in the -result vector. +The '``llvm.matrix.multiply.*``' intrinsics treat ``%A`` as a `` x +`` matrix, ``%B`` as a `` x `` matrix, and +multiplies them. The result matrix is returned in the result vector. Arguments: """""""""" -The , and arguments must be constant -integers. The vector argument %A must have * elements, %B -must have * elements and the returned vector must have - * elements. +The first vector argument ``%A`` corresponds to a matrix with `` * +`` elements, and the second argument ``%B`` to a matrix with +`` * `` elements. Arguments ````, +```` and ```` must be positive, constant integers. The +returned vector must have `` * `` elements. +Vectors ``%A``, ``%B``, and the returned vector all have the same float or +integer element type. '``llvm.matrix.column.major.load.*``' Intrinsic @@ -15607,6 +15633,7 @@ must have * elements and the returned vector must have Syntax: """"""" +This is an overloaded intrinsic. :: @@ -15616,25 +15643,29 @@ Syntax: Overview: """"""""" -The '``llvm.matrix.column.major.load.*``' intrinsic loads a matrix with -rows and columns, using a stride of %Stride between columns. For two -consecutive columns A and B, %Stride refers to the distance (the number of -elements) between the start of column A and the start of column B. The result -matrix is returned embedded in the result vector. This allows for convenient -loading of sub matrixes. If is true, the intrinsic is considered -a :ref:`volatile memory access `. - -If the %Ptr argument is known to be aligned to some boundary, this can be +The '``llvm.matrix.column.major.load.*``' intrinsics load a `` x `` +matrix using a stride of ``%Stride`` to compute the start address of the +different columns. This allows for convenient loading of sub matrixes. If +```` is true, the intrinsic is considered a :ref:`volatile memory +access `. The result matrix is returned in the result vector. If the +``%Ptr`` argument is known to be aligned to some boundary, this can be specified as an attribute on the argument. Arguments: """""""""" -The , and arguments must be constant integers. The -returned vector must have * elements. %Stride must be >= . +The first argument ``%Ptr`` is a pointer type to the returned vector type, and +correponds to the start address to load from. The second argument ``%Stride`` +is a postive, constant integer with ``%Stride >= ``. ``%Stride`` is used +to compute the column memory addresses. I.e., for a column ``C``, its start +memory addresses is calculated with ``%Ptr + C * %Stride``. The third Argument +```` is a boolean value. The fourth and fifth arguments, +```` and ````, correspond to the number of rows and columns, +respectively, and must be positive, constant integers. The returned vector must +have `` * `` elements. -The :ref:`align ` parameter attribute can be provided -for the %Ptr arguments. +The :ref:`align ` parameter attribute can be provided for the +``%Ptr`` arguments. '``llvm.matrix.column.major.store.*``' Intrinsic @@ -15651,24 +15682,29 @@ Syntax: Overview: """"""""" -The '``llvm.matrix.column.major.store.*``' intrinsic stores the matrix with - rows and columns embedded in %In, using a stride of %Stride -between columns. For two consecutive columns A and B, %Stride refers to the -distance (the number of elements) between the start of column A and the start -of column B. If is true, the intrinsic is considered a +The '``llvm.matrix.column.major.store.*``' intrinsics store the `` x +`` matrix in ``%In`` to memory using a stride of ``%Stride`` between +columns. If ```` is true, the intrinsic is considered a :ref:`volatile memory access `. -If the %Ptr argument is known to be aligned to some boundary, this can be +If the ``%Ptr`` argument is known to be aligned to some boundary, this can be specified as an attribute on the argument. Arguments: """""""""" -The , , arguments must be constant integers. The -vector argument %In must have * elements. %Stride must be >= . +The first argument ``%In`` is a vector that corresponds to a `` x +`` matrix to be stored to memory. The second argument ``%Ptr`` is a +pointer to the vector type of ``%In``, and is the start address of the matrix +in memory. The third argument ``%Stride`` is a positive, constant integer with +``%Stride >= ``. ``%Stride`` is used to compute the column memory +addresses. I.e., for a column ``C``, its start memory addresses is calculated +with ``%Ptr + C * %Stride``. The fourth argument ```` is a boolean +value. The arguments ```` and ```` correspond to the number of rows +and columns, respectively, and must be positive, constant integers. The :ref:`align ` parameter attribute can be provided -for the %Ptr arguments. +for the ``%Ptr`` arguments. Half Precision Floating-Point Intrinsics diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 8d8da954ece39..e234965aaa7c0 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -1,12 +1,12 @@ ========================= -LLVM 11.0.0 Release Notes +LLVM 12.0.0 Release Notes ========================= .. contents:: :local: .. warning:: - These are in-progress notes for the upcoming LLVM 11 release. + These are in-progress notes for the upcoming LLVM 12 release. Release notes for previous releases can be found on `the Download Page `_. @@ -15,7 +15,7 @@ Introduction ============ This document contains the release notes for the LLVM Compiler Infrastructure, -release 11.0.0. Here we describe the status of LLVM, including major improvements +release 12.0.0. Here we describe the status of LLVM, including major improvements from the previous release, improvements in various subprojects of LLVM, and some of the current users of the code. All LLVM releases may be downloaded from the `LLVM releases web site `_. @@ -57,22 +57,7 @@ Non-comprehensive list of changes in this release Changes to the LLVM IR ---------------------- -* The callsite attribute `vector-function-abi-variant - `_ has been - added to describe the mapping between scalar functions and vector - functions, to enable vectorization of call sites. The information - provided by the attribute is interfaced via the API provided by the - ``VFDatabase`` class. - -* `dereferenceable` attributes and metadata on pointers no longer imply - anything about the alignment of the pointer in question. Previously, some - optimizations would make assumptions based on the type of the pointer. This - behavior was undocumented. To preserve optimizations, frontends may need to - be updated to generate appropriate `align` attributes and metadata. - -* The DIModule metadata is extended to contain file and line number - information. This information is used to represent Fortran modules debug - info at IR level. +* ... Changes to building LLVM ------------------------ @@ -82,21 +67,6 @@ Changes to the ARM Backend During this release ... -* Implemented C-language intrinsics for the full Arm v8.1-M MVE instruction - set. ```` now supports the complete API defined in the Arm C - Language Extensions. - -* Added support for assembly for the optional Custom Datapath Extension (CDE) - for Arm M-profile targets. - -* Implemented C-language intrinsics ```` for the CDE instruction set. - -* Clang now defaults to ``-fomit-frame-pointer`` when targeting non-Android - Linux for arm and thumb when optimizations are enabled. Users that were - previously not specifying a value and relying on the implicit compiler - default may wish to specify ``-fno-omit-frame-pointer`` to get the old - behavior. This improves compatibility with GCC. - Changes to the MIPS Target -------------------------- @@ -113,45 +83,20 @@ Changes to the X86 Target During this release ... - -* Functions with the probe-stack attribute set to "inline-asm" are now protected - against stack clash without the need of a third-party probing function and - with limited impact on performance. -* -x86-enable-old-knl-abi command line switch has been removed. v32i16/v64i8 - vectors are always passed in ZMM register when avx512f is enabled and avx512bw - is disabled. -* Vectors larger than 512 bits with i16 or i8 elements will be passed in - multiple ZMM registers when avx512f is enabled. Previously this required - avx512bw otherwise they would split into multiple YMM registers. This means - vXi16/vXi8 vectors are consistently treated the same as - vXi32/vXi64/vXf64/vXf32 vectors of the same total width. - Changes to the AMDGPU Target ----------------------------- -* The backend default denormal handling mode has been switched to on - for all targets for all compute function types. Frontends wishing to - retain the old behavior should explicitly request f32 denormal - flushing. +During this release ... Changes to the AVR Target ----------------------------- -* Moved from an experimental backend to an official backend. AVR support is now - included by default in all LLVM builds and releases and is available under - the "avr-unknown-unknown" target triple. +During this release ... Changes to the WebAssembly Target --------------------------------- -* Programs which don't have a "main" function, called "reactors" are now - properly supported, with a new `-mexec-model=reactor` flag. Programs which - previously used `-Wl,--no-entry` to avoid having a main function should - switch to this new flag, so that static initialization is properly - performed. - -* `__attribute__((visibility("protected")))` now evokes a warning, as - WebAssembly does not support "protected" visibility. +During this release ... Changes to the OCaml bindings ----------------------------- @@ -173,28 +118,17 @@ Changes to the DAG infrastructure Changes to the Debug Info --------------------------------- -* LLVM now supports the debug entry values (DW_OP_entry_value) production for - the x86, ARM, and AArch64 targets by default. Other targets can use - the utility by using the experimental option ("-debug-entry-values"). - This is a debug info feature that allows debuggers to recover the value of - optimized-out parameters by going up a stack frame and interpreting the values - passed to the callee. The feature improves the debugging user experience when - debugging optimized code. +During this release ... Changes to the LLVM tools --------------------------------- -* Added an option (--show-section-sizes) to llvm-dwarfdump to show the sizes - of all debug sections within a file. - -* llvm-nm now implements the flag ``--special-syms`` and will filter out special - symbols, i.e. mapping symbols on ARM and AArch64, by default. This matches - the GNU nm behavior. +During this release ... Changes to LLDB -=============== +--------------------------------- -External Open Source Projects Using LLVM 11 +External Open Source Projects Using LLVM 12 =========================================== * A project... diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst index c8ee65f132f99..2e937f0006272 100644 --- a/llvm/docs/TestingGuide.rst +++ b/llvm/docs/TestingGuide.rst @@ -47,6 +47,9 @@ Unit tests Unit tests are written using `Google Test `_ and `Google Mock `_ and are located in the ``llvm/unittests`` directory. +In general unit tests are reserved for targeting the support library and other +generic data structure, we prefer relying on regression tests for testing +transformations and analysis on the IR. Regression tests ---------------- @@ -62,6 +65,17 @@ enough code to reproduce the problem should be written and placed somewhere underneath this directory. For example, it can be a small piece of LLVM IR distilled from an actual application or benchmark. +Testing Analysis +---------------- + +An analysis is a pass that infer properties on some part of the IR and not +transforming it. They are tested in general using the same infrastructure as the +regression tests, by creating a separate "Printer" pass to consume the analysis +result and print it on the standard output in a textual format suitable for +FileCheck. +See `llvm/test/Analysis/BranchProbabilityInfo/loop.ll `_ +for an example of such test. + ``test-suite`` -------------- @@ -151,7 +165,7 @@ script which is built as part of LLVM. For example, to run the .. code-block:: bash - % llvm-lit ~/llvm/test/Integer/BitPacked.ll + % llvm-lit ~/llvm/test/Integer/BitPacked.ll or to run all of the ARM CodeGen tests: @@ -184,7 +198,7 @@ Writing new regression tests ---------------------------- The regression test structure is very simple, but does require some -information to be set. This information is gathered via ``configure`` +information to be set. This information is gathered via ``cmake`` and is written to a file, ``test/lit.site.cfg`` in the build directory. The ``llvm/test`` Makefile does this work for you. @@ -426,7 +440,7 @@ will be a failure if its execution succeeds. ``REQUIRES`` and ``UNSUPPORTED`` and ``XFAIL`` all accept a comma-separated list of boolean expressions. The values in each expression may be: -- Features added to ``config.available_features`` by +- Features added to ``config.available_features`` by configuration files such as ``lit.cfg``. - Substrings of the target triple (``UNSUPPORTED`` and ``XFAIL`` only). @@ -491,7 +505,7 @@ RUN lines: character with a ``/``. This is useful to normalize path separators. Example: ``%s: C:\Desktop Files/foo_test.s.tmp`` - + Example: ``%/s: C:/Desktop Files/foo_test.s.tmp`` ``%:s, %:S, %:t, %:T:`` diff --git a/llvm/examples/ExceptionDemo/ExceptionDemo.cpp b/llvm/examples/ExceptionDemo/ExceptionDemo.cpp index 0ecb527f4ec05..1b3ec7c91ddee 100644 --- a/llvm/examples/ExceptionDemo/ExceptionDemo.cpp +++ b/llvm/examples/ExceptionDemo/ExceptionDemo.cpp @@ -792,7 +792,7 @@ _Unwind_Reason_Code ourPersonality(int version, _Unwind_Action actions, } #endif - const uint8_t *lsda = _Unwind_GetLanguageSpecificData(context); + const uint8_t *lsda = (const uint8_t *)_Unwind_GetLanguageSpecificData(context); #ifdef DEBUG fprintf(stderr, @@ -1959,11 +1959,13 @@ int main(int argc, char *argv[]) { executionEngine->finalizeObject(); +#ifndef NDEBUG fprintf(stderr, "\nBegin module dump:\n\n"); module->dump(); fprintf(stderr, "\nEnd module dump:\n"); +#endif fprintf(stderr, "\n\nBegin Test:\n"); diff --git a/llvm/examples/OrcV2Examples/CMakeLists.txt b/llvm/examples/OrcV2Examples/CMakeLists.txt index 2c737296ae10f..0f1be0e35f239 100644 --- a/llvm/examples/OrcV2Examples/CMakeLists.txt +++ b/llvm/examples/OrcV2Examples/CMakeLists.txt @@ -5,6 +5,7 @@ add_subdirectory(LLJITWithInitializers) add_subdirectory(LLJITWithLazyReexports) add_subdirectory(LLJITWithObjectCache) add_subdirectory(LLJITWithObjectLinkingLayerPlugin) +add_subdirectory(LLJITWithTargetProcessControl) add_subdirectory(OrcV2CBindingsAddObjectFile) add_subdirectory(OrcV2CBindingsBasicUsage) add_subdirectory(OrcV2CBindingsReflectProcessSymbols) diff --git a/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/CMakeLists.txt b/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/CMakeLists.txt new file mode 100644 index 0000000000000..10e0de8d64bda --- /dev/null +++ b/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/CMakeLists.txt @@ -0,0 +1,12 @@ +set(LLVM_LINK_COMPONENTS + Core + ExecutionEngine + IRReader + OrcJIT + Support + nativecodegen + ) + +add_llvm_example(LLJITWithTargetProcessControl + LLJITWithTargetProcessControl.cpp + ) diff --git a/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/LLJITWithTargetProcessControl.cpp b/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/LLJITWithTargetProcessControl.cpp new file mode 100644 index 0000000000000..a88360e3a8cd6 --- /dev/null +++ b/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/LLJITWithTargetProcessControl.cpp @@ -0,0 +1,178 @@ +//===--- LLJITWithLazyReexports.cpp - LLJIT example with custom laziness --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// In this example we will use the lazy re-exports utility to lazily compile +// IR modules. We will do this in seven steps: +// +// 1. Create an LLJIT instance. +// 2. Install a transform so that we can see what is being compiled. +// 3. Create an indirect stubs manager and lazy call-through manager. +// 4. Add two modules that will be conditionally compiled, plus a main module. +// 5. Add lazy-rexports of the symbols in the conditionally compiled modules. +// 6. Dump the ExecutionSession state to see the symbol table prior to +// executing any code. +// 7. Verify that only modules containing executed code are compiled. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringMap.h" +#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/LLJIT.h" +#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/Orc/OrcABISupport.h" +#include "llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h" +#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" + +#include "../ExampleModules.h" + +#include + +using namespace llvm; +using namespace llvm::orc; + +ExitOnError ExitOnErr; + +// Example IR modules. +// +// Note that in the conditionally compiled modules, FooMod and BarMod, functions +// have been given an _body suffix. This is to ensure that their names do not +// clash with their lazy-reexports. +// For clients who do not wish to rename function bodies (e.g. because they want +// to re-use cached objects between static and JIT compiles) techniques exist to +// avoid renaming. See the lazy-reexports section of the ORCv2 design doc. + +const llvm::StringRef FooMod = + R"( + define i32 @foo_body() { + entry: + ret i32 1 + } +)"; + +const llvm::StringRef BarMod = + R"( + define i32 @bar_body() { + entry: + ret i32 2 + } +)"; + +const llvm::StringRef MainMod = + R"( + + define i32 @entry(i32 %argc) { + entry: + %and = and i32 %argc, 1 + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %if.end, label %if.then + + if.then: ; preds = %entry + %call = tail call i32 @foo() #2 + br label %return + + if.end: ; preds = %entry + %call1 = tail call i32 @bar() #2 + br label %return + + return: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.end ] + ret i32 %retval.0 + } + + declare i32 @foo() + declare i32 @bar() +)"; + +static void *reenter(void *Ctx, void *TrampolineAddr) { + std::promise LandingAddressP; + auto LandingAddressF = LandingAddressP.get_future(); + + auto *TPCIU = static_cast(Ctx); + TPCIU->getLazyCallThroughManager().resolveTrampolineLandingAddress( + pointerToJITTargetAddress(TrampolineAddr), + [&](JITTargetAddress LandingAddress) { + LandingAddressP.set_value( + jitTargetAddressToPointer(LandingAddress)); + }); + return LandingAddressF.get(); +} + +cl::list InputArgv(cl::Positional, + cl::desc("...")); + +int main(int argc, char *argv[]) { + // Initialize LLVM. + InitLLVM X(argc, argv); + + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + + cl::ParseCommandLineOptions(argc, argv, "LLJITWithLazyReexports"); + ExitOnErr.setBanner(std::string(argv[0]) + ": "); + + // (1) Create LLJIT instance. + auto J = ExitOnErr(LLJITBuilder().create()); + + // (2) Install transform to print modules as they are compiled: + J->getIRTransformLayer().setTransform( + [](ThreadSafeModule TSM, + const MaterializationResponsibility &R) -> Expected { + TSM.withModuleDo([](Module &M) { dbgs() << "---Compiling---\n" << M; }); + return std::move(TSM); // Not a redundant move: fix build on gcc-7.5 + }); + + // (3) Create stubs and call-through managers: + + auto TPC = ExitOnErr(SelfTargetProcessControl::Create()); + auto TPCIU = ExitOnErr(TPCIndirectionUtils::Create(*TPC)); + ExitOnErr(TPCIU->writeResolverBlock(pointerToJITTargetAddress(&reenter), + pointerToJITTargetAddress(TPCIU.get()))); + TPCIU->createLazyCallThroughManager(J->getExecutionSession(), 0); + auto ISM = TPCIU->createIndirectStubsManager(); + + // (4) Add modules. + ExitOnErr(J->addIRModule(ExitOnErr(parseExampleModule(FooMod, "foo-mod")))); + ExitOnErr(J->addIRModule(ExitOnErr(parseExampleModule(BarMod, "bar-mod")))); + ExitOnErr(J->addIRModule(ExitOnErr(parseExampleModule(MainMod, "main-mod")))); + + // (5) Add lazy reexports. + MangleAndInterner Mangle(J->getExecutionSession(), J->getDataLayout()); + SymbolAliasMap ReExports( + {{Mangle("foo"), + {Mangle("foo_body"), + JITSymbolFlags::Exported | JITSymbolFlags::Callable}}, + {Mangle("bar"), + {Mangle("bar_body"), + JITSymbolFlags::Exported | JITSymbolFlags::Callable}}}); + ExitOnErr(J->getMainJITDylib().define( + lazyReexports(TPCIU->getLazyCallThroughManager(), *ISM, + J->getMainJITDylib(), std::move(ReExports)))); + + // (6) Dump the ExecutionSession state. + dbgs() << "---Session state---\n"; + J->getExecutionSession().dump(dbgs()); + dbgs() << "\n"; + + // (7) Execute the JIT'd main function and pass the example's command line + // arguments unmodified. This should cause either ExampleMod1 or ExampleMod2 + // to be compiled, and either "1" or "2" returned depending on the number of + // arguments passed. + + // Look up the JIT'd function, cast it to a function pointer, then call it. + auto EntrySym = ExitOnErr(J->lookup("entry")); + auto *Entry = (int (*)(int))EntrySym.getAddress(); + + int Result = Entry(argc); + outs() << "---Result---\n" + << "entry(" << argc << ") = " << Result << "\n"; + + return 0; +} diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index c88d9651d68da..f7df648d27ed6 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -2286,7 +2286,7 @@ void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes); /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting /// from Src into IntVal, which is assumed to be wide enough and to hold zero. -void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes); +void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes); } // namespace llvm diff --git a/llvm/include/llvm/ADT/DenseSet.h b/llvm/include/llvm/ADT/DenseSet.h index 07edc3d8e4ec7..dca217358ff8e 100644 --- a/llvm/include/llvm/ADT/DenseSet.h +++ b/llvm/include/llvm/ADT/DenseSet.h @@ -173,6 +173,11 @@ class DenseSetImpl { return ConstIterator(TheMap.find(V)); } + /// Check if the set contains the given element. + bool contains(const_arg_type_t V) const { + return TheMap.find(V) != TheMap.end(); + } + /// Alternative version of find() which allows a different, and possibly less /// expensive, key type. /// The DenseMapInfo is responsible for supplying methods diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h index 9ee310c879fd3..cb53b7fa74692 100644 --- a/llvm/include/llvm/ADT/Hashing.h +++ b/llvm/include/llvm/ADT/Hashing.h @@ -52,6 +52,7 @@ #include #include #include +#include #include namespace llvm { @@ -112,6 +113,10 @@ template hash_code hash_value(const T *ptr); template hash_code hash_value(const std::pair &arg); +/// Compute a hash_code for a tuple. +template +hash_code hash_value(const std::tuple &arg); + /// Compute a hash_code for a standard string. template hash_code hash_value(const std::basic_string &arg); @@ -645,6 +650,26 @@ hash_code hash_value(const std::pair &arg) { return hash_combine(arg.first, arg.second); } +// Implementation details for the hash_value overload for std::tuple<...>(...). +namespace hashing { +namespace detail { + +template +hash_code hash_value_tuple_helper(const std::tuple &arg, + std::index_sequence indices) { + return hash_combine(std::get(arg)...); +} + +} // namespace detail +} // namespace hashing + +template +hash_code hash_value(const std::tuple &arg) { + // TODO: Use std::apply when LLVM starts using C++17. + return ::llvm::hashing::detail::hash_value_tuple_helper( + arg, typename std::index_sequence_for()); +} + // Declared and documented above, but defined here so that any of the hashing // infrastructure is available. template diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 50b688b366489..eed676bb74e14 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -1480,9 +1480,9 @@ auto size(R &&Range, /// Provide wrappers to std::for_each which take ranges instead of having to /// pass begin/end explicitly. -template -UnaryPredicate for_each(R &&Range, UnaryPredicate P) { - return std::for_each(adl_begin(Range), adl_end(Range), P); +template +UnaryFunction for_each(R &&Range, UnaryFunction F) { + return std::for_each(adl_begin(Range), adl_end(Range), F); } /// Provide wrappers to std::all_of which take ranges instead of having to pass @@ -1577,9 +1577,9 @@ auto count_if(R &&Range, UnaryPredicate P) { /// Wrapper function around std::transform to apply a function to a range and /// store the result elsewhere. -template -OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P) { - return std::transform(adl_begin(Range), adl_end(Range), d_first, P); +template +OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F) { + return std::transform(adl_begin(Range), adl_end(Range), d_first, F); } /// Provide wrappers to std::partition which take ranges instead of having to diff --git a/llvm/include/llvm/ADT/SetVector.h b/llvm/include/llvm/ADT/SetVector.h index 901d5b1bcd90f..32bcd50966cca 100644 --- a/llvm/include/llvm/ADT/SetVector.h +++ b/llvm/include/llvm/ADT/SetVector.h @@ -205,6 +205,11 @@ class SetVector { return true; } + /// Check if the SetVector contains the given key. + bool contains(const key_type &key) const { + return set_.find(key) != set_.end(); + } + /// Count the number of elements of a given key in the SetVector. /// \returns 0 if the element is not in the SetVector, 1 if it is. size_type count(const key_type &key) const { @@ -263,6 +268,11 @@ class SetVector { remove(*SI); } + void swap(SetVector &RHS) { + set_.swap(RHS.set_); + vector_.swap(RHS.vector_); + } + private: /// A wrapper predicate designed for use with std::remove_if. /// @@ -308,4 +318,22 @@ class SmallSetVector } // end namespace llvm +namespace std { + +/// Implement std::swap in terms of SetVector swap. +template +inline void +swap(llvm::SetVector &LHS, llvm::SetVector &RHS) { + LHS.swap(RHS); +} + +/// Implement std::swap in terms of SmallSetVector swap. +template +inline void +swap(llvm::SmallSetVector &LHS, llvm::SmallSetVector &RHS) { + LHS.swap(RHS); +} + +} // end namespace std + #endif // LLVM_ADT_SETVECTOR_H diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h index 0ab05cfe611aa..57dd8f6b695d7 100644 --- a/llvm/include/llvm/ADT/SmallPtrSet.h +++ b/llvm/include/llvm/ADT/SmallPtrSet.h @@ -378,6 +378,9 @@ class SmallPtrSetImpl : public SmallPtrSetImplBase { iterator find(ConstPtrType Ptr) const { return makeIterator(find_imp(ConstPtrTraits::getAsVoidPointer(Ptr))); } + bool contains(ConstPtrType Ptr) const { + return find_imp(ConstPtrTraits::getAsVoidPointer(Ptr)) != EndPointer(); + } template void insert(IterT I, IterT E) { diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h index a03fa7dd84235..0600e528ee692 100644 --- a/llvm/include/llvm/ADT/SmallSet.h +++ b/llvm/include/llvm/ADT/SmallSet.h @@ -232,6 +232,13 @@ class SmallSet { return {Set.end()}; } + /// Check if the SmallSet contains the given element. + bool contains(const T &V) const { + if (isSmall()) + return vfind(V) != Vector.end(); + return Set.find(V) != Set.end(); + } + private: bool isSmall() const { return Set.empty(); } diff --git a/llvm/include/llvm/ADT/SparseSet.h b/llvm/include/llvm/ADT/SparseSet.h index 74457d5fd679a..d8acf1ee2f3a2 100644 --- a/llvm/include/llvm/ADT/SparseSet.h +++ b/llvm/include/llvm/ADT/SparseSet.h @@ -229,12 +229,15 @@ class SparseSet { return const_cast(this)->findIndex(KeyIndexOf(Key)); } + /// Check if the set contains the given \c Key. + /// + /// @param Key A valid key to find. + bool contains(const KeyT &Key) const { return find(Key) == end() ? 0 : 1; } + /// count - Returns 1 if this set contains an element identified by Key, /// 0 otherwise. /// - size_type count(const KeyT &Key) const { - return find(Key) == end() ? 0 : 1; - } + size_type count(const KeyT &Key) const { return contains(Key) ? 1 : 0; } /// insert - Attempts to insert a new element. /// diff --git a/llvm/include/llvm/ADT/StringSet.h b/llvm/include/llvm/ADT/StringSet.h index 63d929399a4ec..c4245175544b4 100644 --- a/llvm/include/llvm/ADT/StringSet.h +++ b/llvm/include/llvm/ADT/StringSet.h @@ -45,6 +45,9 @@ class StringSet : public StringMap { insert(const StringMapEntry &mapEntry) { return insert(mapEntry.getKey()); } + + /// Check if the set contains the given \c key. + bool contains(StringRef key) const { return Base::FindKey(key) != -1; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h index eb0d3ae8fedfc..e70af71b3da67 100644 --- a/llvm/include/llvm/Analysis/CGSCCPassManager.h +++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h @@ -355,6 +355,8 @@ class ModuleToPostOrderCGSCCPassAdaptor /// Runs the CGSCC pass across every SCC in the module. PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } + private: CGSCCPassT Pass; }; @@ -543,6 +545,8 @@ class CGSCCToFunctionPassAdaptor return PA; } + static bool isRequired() { return true; } + private: FunctionPassT Pass; }; diff --git a/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h new file mode 100644 index 0000000000000..5fc919a6dc56f --- /dev/null +++ b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h @@ -0,0 +1,45 @@ +//===- InlineSizeEstimatorAnalysis.h - ML size estimator --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// + +#ifndef LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H +#define LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class Function; + +class TFModelEvaluator; +class InlineSizeEstimatorAnalysis + : public AnalysisInfoMixin { +public: + InlineSizeEstimatorAnalysis(); + InlineSizeEstimatorAnalysis(InlineSizeEstimatorAnalysis &&); + ~InlineSizeEstimatorAnalysis(); + + static AnalysisKey Key; + using Result = Optional; + Result run(const Function &F, FunctionAnalysisManager &FAM); + static bool isEvaluatorRequested(); + +private: + std::unique_ptr Evaluator; +}; + +class InlineSizeEstimatorAnalysisPrinterPass + : public PassInfoMixin { + raw_ostream &OS; + +public: + explicit InlineSizeEstimatorAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // namespace llvm +#endif // LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H \ No newline at end of file diff --git a/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h b/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h index 0391f2cdd9136..46bc974c4a7f7 100644 --- a/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h +++ b/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h @@ -110,7 +110,7 @@ class ImplicitControlFlowTracking : public InstructionPrecedenceTracking { return isPreceededBySpecialInstruction(Insn); } - virtual bool isSpecialInstruction(const Instruction *Insn) const; + bool isSpecialInstruction(const Instruction *Insn) const override; }; class MemoryWriteTracking : public InstructionPrecedenceTracking { @@ -133,7 +133,7 @@ class MemoryWriteTracking : public InstructionPrecedenceTracking { return isPreceededBySpecialInstruction(Insn); } - virtual bool isSpecialInstruction(const Instruction *Insn) const; + bool isSpecialInstruction(const Instruction *Insn) const override; }; } // llvm diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h index c169416f73f90..a3b7bee97808c 100644 --- a/llvm/include/llvm/Analysis/MustExecute.h +++ b/llvm/include/llvm/Analysis/MustExecute.h @@ -111,17 +111,15 @@ class SimpleLoopSafetyInfo: public LoopSafetyInfo { bool HeaderMayThrow = false; // Same as previous, but specific to loop header public: - virtual bool blockMayThrow(const BasicBlock *BB) const; + bool blockMayThrow(const BasicBlock *BB) const override; - virtual bool anyBlockMayThrow() const; + bool anyBlockMayThrow() const override; - virtual void computeLoopSafetyInfo(const Loop *CurLoop); + void computeLoopSafetyInfo(const Loop *CurLoop) override; - virtual bool isGuaranteedToExecute(const Instruction &Inst, - const DominatorTree *DT, - const Loop *CurLoop) const; - - virtual ~SimpleLoopSafetyInfo() {}; + bool isGuaranteedToExecute(const Instruction &Inst, + const DominatorTree *DT, + const Loop *CurLoop) const override; }; /// This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to @@ -138,15 +136,15 @@ class ICFLoopSafetyInfo: public LoopSafetyInfo { mutable MemoryWriteTracking MW; public: - virtual bool blockMayThrow(const BasicBlock *BB) const; + bool blockMayThrow(const BasicBlock *BB) const override; - virtual bool anyBlockMayThrow() const; + bool anyBlockMayThrow() const override; - virtual void computeLoopSafetyInfo(const Loop *CurLoop); + void computeLoopSafetyInfo(const Loop *CurLoop) override; - virtual bool isGuaranteedToExecute(const Instruction &Inst, - const DominatorTree *DT, - const Loop *CurLoop) const; + bool isGuaranteedToExecute(const Instruction &Inst, + const DominatorTree *DT, + const Loop *CurLoop) const override; /// Returns true if we could not execute a memory-modifying instruction before /// we enter \p BB under assumption that \p CurLoop is entered. @@ -167,8 +165,6 @@ class ICFLoopSafetyInfo: public LoopSafetyInfo { /// from its block. It will make all cache updates to keep it correct after /// this removal. void removeInstruction(const Instruction *Inst); - - virtual ~ICFLoopSafetyInfo() {}; }; bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI); diff --git a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h index cad1c52f7f879..945d41c376779 100644 --- a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h +++ b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h @@ -146,7 +146,7 @@ inline bool IsPotentialRetainableObjPtr(const Value *Op) { return false; // Special arguments can not be a valid retainable object pointer. if (const Argument *Arg = dyn_cast(Op)) - if (Arg->hasPassPointeeByValueAttr() || Arg->hasNestAttr() || + if (Arg->hasPassPointeeByValueCopyAttr() || Arg->hasNestAttr() || Arg->hasStructRetAttr()) return false; // Only consider values with pointer types. diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index 3864d4955104b..36b39f4a0e231 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -262,6 +262,12 @@ TLI_DEFINE_STRING_INTERNAL("__atanhf_finite") /// long double __atanhl_finite(long double x); TLI_DEFINE_ENUM_INTERNAL(atanhl_finite) TLI_DEFINE_STRING_INTERNAL("__atanhl_finite") +/// void __atomic_load(size_t size, void *mptr, void *vptr, int smodel); +TLI_DEFINE_ENUM_INTERNAL(atomic_load) +TLI_DEFINE_STRING_INTERNAL("__atomic_load") +/// void __atomic_store(size_t size, void *mptr, void *vptr, int smodel); +TLI_DEFINE_ENUM_INTERNAL(atomic_store) +TLI_DEFINE_STRING_INTERNAL("__atomic_store") /// double __cosh_finite(double x); TLI_DEFINE_ENUM_INTERNAL(cosh_finite) TLI_DEFINE_STRING_INTERNAL("__cosh_finite") diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h new file mode 100644 index 0000000000000..2ab2c7a57d941 --- /dev/null +++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h @@ -0,0 +1,115 @@ +//===- TFUtils.h - utilities for tensorflow C API ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +#ifndef LLVM_ANALYSIS_UTILS_TFUTILS_H +#define LLVM_ANALYSIS_UTILS_TFUTILS_H + +#include "llvm/Config/config.h" + +#ifdef LLVM_HAVE_TF_API +#include "llvm/IR/LLVMContext.h" + +#include +#include + +namespace llvm { + +/// Load a SavedModel, find the given inputs and outputs, and setup storage +/// for input tensors. The user is responsible for correctly dimensioning the +/// input tensors and setting their values before calling evaluate(). +/// To initialize: +/// - construct the object +/// - initialize the input tensors using initInput. Indices must correspond to +/// indices in the InputNames used at construction. +/// To use: +/// - set input values by using getInput to get each input tensor, and then +/// setting internal scalars, for all dimensions (tensors are row-major: +/// https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/c/c_api.h#L205) +/// - call evaluate. The input tensors' values are not consumed after this, and +/// may still be read. +/// - use the outputs in the output vector +class TFModelEvaluatorImpl; +class EvaluationResultImpl; + +class TFModelEvaluator final { +public: + /// The result of a model evaluation. Handles the lifetime of the output + /// tensors, which means that their values need to be used before + /// the EvaluationResult's dtor is called. + class EvaluationResult { + public: + EvaluationResult(const EvaluationResult &) = delete; + EvaluationResult(EvaluationResult &&Other); + ~EvaluationResult(); + + /// Get a pointer to the first element of the tensor at Index. + template T *getTensorValue(size_t Index) { + return static_cast(getUntypedTensorValue(Index)); + } + + private: + friend class TFModelEvaluator; + EvaluationResult(std::unique_ptr Impl); + void *getUntypedTensorValue(size_t Index); + std::unique_ptr Impl; + }; + + TFModelEvaluator(StringRef SavedModelPath, + const std::vector &InputNames, + const std::vector &OutputNames, + const char *Tags = "serve"); + ~TFModelEvaluator(); + TFModelEvaluator(const TFModelEvaluator &) = delete; + TFModelEvaluator(TFModelEvaluator &&) = delete; + + /// Evaluate the model, assuming it is valid. Returns None if the evaluation + /// fails or the model is invalid, or an EvaluationResult otherwise. The + /// inputs are assumed to have been already provided via getInput(). When + /// returning None, it also invalidates this object. + Optional evaluate(); + + /// Provides access to the input vector. + template T *getInput(size_t Index) { + return static_cast(getUntypedInput(Index)); + } + + /// Returns true if the tensorflow model was loaded successfully, false + /// otherwise. + bool isValid() const { return !!Impl; } + + /// Initialize the input at Index as a tensor of the given type and + /// dimensions. + template + void initInput(size_t Index, const std::vector &Dimensions) { + return initInput(Index, getModelTypeIndex(), Dimensions); + } + +private: + void *getUntypedInput(size_t Index); + template int getModelTypeIndex(); + void initInput(size_t Index, int TypeIndex, + const std::vector &Dimensions); + + std::unique_ptr Impl; +}; + +template <> int TFModelEvaluator::getModelTypeIndex(); +template <> int TFModelEvaluator::getModelTypeIndex(); +template <> int TFModelEvaluator::getModelTypeIndex(); +template <> int TFModelEvaluator::getModelTypeIndex(); +template <> int TFModelEvaluator::getModelTypeIndex(); +template <> int TFModelEvaluator::getModelTypeIndex(); +template <> int TFModelEvaluator::getModelTypeIndex(); +template <> int TFModelEvaluator::getModelTypeIndex(); +template <> int TFModelEvaluator::getModelTypeIndex(); +template <> int TFModelEvaluator::getModelTypeIndex(); + +} // namespace llvm + +#endif // LLVM_HAVE_TF_API +#endif // LLVM_ANALYSIS_UTILS_TFUTILS_H diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 9510739ef5ab4..178f61563cd7f 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -21,6 +21,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" #include #include @@ -591,18 +592,25 @@ class Value; /// the parent of I. bool programUndefinedIfPoison(const Instruction *PoisonI); - /// Return true if I can create poison from non-poison operands. - /// For vectors, canCreatePoison returns true if there is potential poison in - /// any element of the result when vectors without poison are given as + /// canCreateUndefOrPoison returns true if Op can create undef or poison from + /// non-undef & non-poison operands. + /// For vectors, canCreateUndefOrPoison returns true if there is potential + /// poison or undef in any element of the result when vectors without + /// undef/poison poison are given as operands. + /// For example, given `Op = shl <2 x i32> %x, <0, 32>`, this function returns + /// true. If Op raises immediate UB but never creates poison or undef + /// (e.g. sdiv I, 0), canCreatePoison returns false. + /// + /// canCreatePoison returns true if Op can create poison from non-poison /// operands. - /// For example, given `I = shl <2 x i32> %x, <0, 32>`, this function returns - /// true. If I raises immediate UB but never creates poison (e.g. sdiv I, 0), - /// canCreatePoison returns false. - bool canCreatePoison(const Instruction *I); + bool canCreateUndefOrPoison(const Operator *Op); + bool canCreatePoison(const Operator *Op); /// Return true if this function can prove that V is never undef value /// or poison value. - // + /// Note that this is different from canCreateUndefOrPoison because the + /// function assumes Op's operands are not poison/undef. + /// /// If CtxI and DT are specified this method performs flow-sensitive analysis /// and returns true if it is guaranteed to be never undef or poison /// immediately before the CtxI. diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 9acb1fcf11029..b1d7850442fba 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -224,6 +224,9 @@ class VFDatabase { /// a vector Function ABI. static void getVFABIMappings(const CallInst &CI, SmallVectorImpl &Mappings) { + if (!CI.getCalledFunction()) + return; + const StringRef ScalarName = CI.getCalledFunction()->getName(); SmallVector ListOfStrings; diff --git a/llvm/include/llvm/BinaryFormat/WasmRelocs.def b/llvm/include/llvm/BinaryFormat/WasmRelocs.def index 05c5147e63144..b6ea2c59b4bb3 100644 --- a/llvm/include/llvm/BinaryFormat/WasmRelocs.def +++ b/llvm/include/llvm/BinaryFormat/WasmRelocs.def @@ -20,3 +20,5 @@ WASM_RELOC(R_WASM_MEMORY_ADDR_LEB64, 14) WASM_RELOC(R_WASM_MEMORY_ADDR_SLEB64, 15) WASM_RELOC(R_WASM_MEMORY_ADDR_I64, 16) WASM_RELOC(R_WASM_MEMORY_ADDR_REL_SLEB64, 17) +WASM_RELOC(R_WASM_TABLE_INDEX_SLEB64, 18) +WASM_RELOC(R_WASM_TABLE_INDEX_I64, 19) diff --git a/llvm/include/llvm/CMakeLists.txt b/llvm/include/llvm/CMakeLists.txt index 7cf8699aa21e4..b46319f24fc8e 100644 --- a/llvm/include/llvm/CMakeLists.txt +++ b/llvm/include/llvm/CMakeLists.txt @@ -1,6 +1,6 @@ add_subdirectory(IR) add_subdirectory(Support) -add_subdirectory(Frontend/OpenMP) +add_subdirectory(Frontend) # If we're doing an out-of-tree build, copy a module map for generated # header files into the build area. diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index f9d32eadd23e2..0ea9a70f07f58 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -658,7 +658,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { if (auto *VTy = dyn_cast(Ty)) { unsigned Num = cast(VTy)->getNumElements(); unsigned Cost = thisT()->getArithmeticInstrCost( - Opcode, VTy->getScalarType(), CostKind); + Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo, Args, CxtI); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. return getScalarizationOverhead(VTy, Args) + Num * Cost; @@ -702,8 +703,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { std::pair SrcLT = TLI->getTypeLegalizationCost(DL, Src); std::pair DstLT = TLI->getTypeLegalizationCost(DL, Dst); - unsigned SrcSize = SrcLT.second.getSizeInBits(); - unsigned DstSize = DstLT.second.getSizeInBits(); + TypeSize SrcSize = SrcLT.second.getSizeInBits(); + TypeSize DstSize = DstLT.second.getSizeInBits(); bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy(); bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy(); @@ -777,8 +778,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // Check vector-to-vector casts. if (DstVTy && SrcVTy) { // If the cast is between same-sized registers, then the check is simple. - if (SrcLT.first == DstLT.first && - SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { + if (SrcLT.first == DstLT.first && SrcSize == DstSize) { // Assume that Zext is done using AND. if (Opcode == Instruction::ZExt) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 43a8cb2a1d51c..c317b7ed4c54b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -107,6 +107,9 @@ class CombinerHelper { bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo); void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo); + bool matchSextAlreadyExtended(MachineInstr &MI); + bool applySextAlreadyExtended(MachineInstr &MI); + bool matchElideBrByInvertingCond(MachineInstr &MI); void applyElideBrByInvertingCond(MachineInstr &MI); bool tryElideBrByInvertingCond(MachineInstr &MI); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 0fe1d60b630db..058aacf38634d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -163,6 +163,8 @@ class LegalizerHelper { widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); LegalizeResult widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); + LegalizeResult widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy); /// Helper function to split a wide generic register into bitwise blocks with /// the given Type (which implies the number of blocks needed). The generic diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h index efb0fa85a0fef..815f779e4f426 100644 --- a/llvm/include/llvm/CodeGen/LiveVariables.h +++ b/llvm/include/llvm/CodeGen/LiveVariables.h @@ -274,9 +274,10 @@ class LiveVariables : public MachineFunctionPass { void MarkVirtRegAliveInBlock(VarInfo& VRInfo, MachineBasicBlock* DefBlock, MachineBasicBlock *BB); - void MarkVirtRegAliveInBlock(VarInfo& VRInfo, MachineBasicBlock* DefBlock, + void MarkVirtRegAliveInBlock(VarInfo &VRInfo, MachineBasicBlock *DefBlock, MachineBasicBlock *BB, - std::vector &WorkList); + SmallVectorImpl &WorkList); + void HandleVirtRegDef(unsigned reg, MachineInstr &MI); void HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, MachineInstr &MI); diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index b69f6584fe6c2..d6cb7211cf70e 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -143,6 +143,10 @@ class MachineBasicBlock /// branch. bool AddressTaken = false; + /// Indicate that this basic block needs its symbol be emitted regardless of + /// whether the flow just falls-through to it. + bool LabelMustBeEmitted = false; + /// Indicate that this basic block is the entry block of an EH scope, i.e., /// the block that used to have a catchpad or cleanuppad instruction in the /// LLVM IR. @@ -202,6 +206,13 @@ class MachineBasicBlock /// branch. void setHasAddressTaken() { AddressTaken = true; } + /// Test whether this block must have its label emitted. + bool hasLabelMustBeEmitted() const { return LabelMustBeEmitted; } + + /// Set this block to reflect that, regardless how we flow to it, we need + /// its label be emitted. + void setLabelMustBeEmitted() { LabelMustBeEmitted = true; } + /// Return the MachineFunction containing this basic block. const MachineFunction *getParent() const { return xParent; } MachineFunction *getParent() { return xParent; } diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 970d6d7db3345..2c912b177384b 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -280,6 +280,9 @@ class MachineInstr const MachineBasicBlock* getParent() const { return Parent; } MachineBasicBlock* getParent() { return Parent; } + /// Move the instruction before \p MovePos. + void moveBefore(MachineInstr *MovePos); + /// Return the function that contains the basic block that this instruction /// belongs to. /// diff --git a/llvm/include/llvm/CodeGen/StackMaps.h b/llvm/include/llvm/CodeGen/StackMaps.h index e33ee226e41a5..ce4eb85d64525 100644 --- a/llvm/include/llvm/CodeGen/StackMaps.h +++ b/llvm/include/llvm/CodeGen/StackMaps.h @@ -166,21 +166,23 @@ class StatepointOpers { enum { CCOffset = 1, FlagsOffset = 3, NumDeoptOperandsOffset = 5 }; public: - explicit StatepointOpers(const MachineInstr *MI) : MI(MI) {} + explicit StatepointOpers(const MachineInstr *MI) : MI(MI) { + NumDefs = MI->getNumDefs(); + } /// Get index of statepoint ID operand. - unsigned getIDPos() const { return IDPos; } + unsigned getIDPos() const { return NumDefs + IDPos; } /// Get index of Num Patch Bytes operand. - unsigned getNBytesPos() const { return NBytesPos; } + unsigned getNBytesPos() const { return NumDefs + NBytesPos; } /// Get index of Num Call Arguments operand. - unsigned getNCallArgsPos() const { return NCallArgsPos; } + unsigned getNCallArgsPos() const { return NumDefs + NCallArgsPos; } /// Get starting index of non call related arguments /// (calling convention, statepoint flags, vm state and gc state). unsigned getVarIdx() const { - return MI->getOperand(NCallArgsPos).getImm() + MetaEnd; + return MI->getOperand(NumDefs + NCallArgsPos).getImm() + MetaEnd + NumDefs; } /// Get index of Calling Convention operand. @@ -195,16 +197,16 @@ class StatepointOpers { } /// Return the ID for the given statepoint. - uint64_t getID() const { return MI->getOperand(IDPos).getImm(); } + uint64_t getID() const { return MI->getOperand(NumDefs + IDPos).getImm(); } /// Return the number of patchable bytes the given statepoint should emit. uint32_t getNumPatchBytes() const { - return MI->getOperand(NBytesPos).getImm(); + return MI->getOperand(NumDefs + NBytesPos).getImm(); } /// Return the target of the underlying call. const MachineOperand &getCallTarget() const { - return MI->getOperand(CallTargetPos); + return MI->getOperand(NumDefs + CallTargetPos); } /// Return the calling convention. @@ -217,6 +219,7 @@ class StatepointOpers { private: const MachineInstr *MI; + unsigned NumDefs; }; class StackMaps { diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index cd4a682deeb7f..c3a11b1996759 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -202,6 +202,17 @@ class TargetFrameLowering { virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const = 0; + /// With basic block sections, emit callee saved frame moves for basic blocks + /// that are in a different section. + virtual void + emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const {} + + virtual void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, + bool IsPrologue) const {} + /// Replace a StackProbe stub (if any) with the actual probe code inline virtual void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {} diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake index 475c93efd653d..82b682ddb3dc5 100644 --- a/llvm/include/llvm/Config/llvm-config.h.cmake +++ b/llvm/include/llvm/Config/llvm-config.h.cmake @@ -79,4 +79,7 @@ */ #cmakedefine01 LLVM_FORCE_ENABLE_STATS +/* Define if LLVM was built with a dependency to the libtensorflow dynamic library */ +#cmakedefine LLVM_HAVE_TF_API + #endif diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h index 1aff2624990f7..edfa68d49a60d 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h @@ -126,6 +126,10 @@ class DWARFExpression { return Op; } + iterator skipBytes(uint64_t Add) { + return iterator(Expr, Op.EndOffset + Add); + } + // Comparison operators are provided out of line. friend bool operator==(const iterator &, const iterator &); }; diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h index ec40eec5a05ee..62e0f4765a69a 100644 --- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h +++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h @@ -313,8 +313,8 @@ struct PrimitiveTypeNode : public TypeNode { explicit PrimitiveTypeNode(PrimitiveKind K) : TypeNode(NodeKind::PrimitiveType), PrimKind(K) {} - void outputPre(OutputStream &OS, OutputFlags Flags) const; - void outputPost(OutputStream &OS, OutputFlags Flags) const {} + void outputPre(OutputStream &OS, OutputFlags Flags) const override; + void outputPost(OutputStream &OS, OutputFlags Flags) const override {} PrimitiveKind PrimKind; }; @@ -474,8 +474,8 @@ struct PointerTypeNode : public TypeNode { struct TagTypeNode : public TypeNode { explicit TagTypeNode(TagKind Tag) : TypeNode(NodeKind::TagType), Tag(Tag) {} - void outputPre(OutputStream &OS, OutputFlags Flags) const; - void outputPost(OutputStream &OS, OutputFlags Flags) const; + void outputPre(OutputStream &OS, OutputFlags Flags) const override; + void outputPost(OutputStream &OS, OutputFlags Flags) const override; QualifiedNameNode *QualifiedName = nullptr; TagKind Tag; @@ -484,8 +484,8 @@ struct TagTypeNode : public TypeNode { struct ArrayTypeNode : public TypeNode { ArrayTypeNode() : TypeNode(NodeKind::ArrayType) {} - void outputPre(OutputStream &OS, OutputFlags Flags) const; - void outputPost(OutputStream &OS, OutputFlags Flags) const; + void outputPre(OutputStream &OS, OutputFlags Flags) const override; + void outputPost(OutputStream &OS, OutputFlags Flags) const override; void outputDimensionsImpl(OutputStream &OS, OutputFlags Flags) const; void outputOneDimension(OutputStream &OS, OutputFlags Flags, Node *N) const; diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h index 7860088f35692..1e1e282a89972 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h @@ -46,6 +46,8 @@ enum ELFX86RelocationKind : Edge::Kind { /// jit-link the given object buffer, which must be a ELF x86-64 object file. void jitLink_ELF_x86_64(std::unique_ptr Ctx); +/// Return the string name of the given ELF x86-64 edge kind. +StringRef getELFX86RelocationKindName(Edge::Kind R); } // end namespace jitlink } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h index ac5a593bb77ba..0c8514a60a507 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h @@ -17,7 +17,10 @@ #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/Support/Error.h" #include "llvm/Support/Memory.h" +#include "llvm/Support/MSVCErrorWorkarounds.h" + #include +#include namespace llvm { namespace jitlink { @@ -74,6 +77,15 @@ class JITLinkMemoryManager { /// working memory. virtual void finalizeAsync(FinalizeContinuation OnFinalize) = 0; + /// Calls finalizeAsync and waits for completion. + Error finalize() { + std::promise FinalizeResultP; + auto FinalizeResultF = FinalizeResultP.get_future(); + finalizeAsync( + [&](Error Err) { FinalizeResultP.set_value(std::move(Err)); }); + return FinalizeResultF.get(); + } + /// Should deallocate target memory. virtual Error deallocate() = 0; }; diff --git a/llvm/include/llvm/ExecutionEngine/JITSymbol.h b/llvm/include/llvm/ExecutionEngine/JITSymbol.h index 6f0030a18f475..9bbdd21f77de1 100644 --- a/llvm/include/llvm/ExecutionEngine/JITSymbol.h +++ b/llvm/include/llvm/ExecutionEngine/JITSymbol.h @@ -429,7 +429,7 @@ class LegacyJITSymbolResolver : public JITSymbolResolver { virtual JITSymbol findSymbol(const std::string &Name) = 0; private: - virtual void anchor(); + void anchor() override; }; } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h index b3e2bddd716bb..e0cfd8bf24099 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h @@ -17,6 +17,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/OrcABISupport.h" #include "llvm/Support/Error.h" #include "llvm/Support/Memory.h" #include "llvm/Support/Process.h" @@ -139,8 +140,10 @@ template class LocalTrampolinePool : public TrampolinePool { return; } - ORCABI::writeResolverCode(static_cast(ResolverBlock.base()), - &reenter, this); + ORCABI::writeResolverCode(static_cast(ResolverBlock.base()), + pointerToJITTargetAddress(ResolverBlock.base()), + pointerToJITTargetAddress(&reenter), + pointerToJITTargetAddress(this)); EC = sys::Memory::protectMappedMemory(ResolverBlock.getMemoryBlock(), sys::Memory::MF_READ | @@ -166,14 +169,14 @@ template class LocalTrampolinePool : public TrampolinePool { (sys::Process::getPageSizeEstimate() - ORCABI::PointerSize) / ORCABI::TrampolineSize; - uint8_t *TrampolineMem = static_cast(TrampolineBlock.base()); - ORCABI::writeTrampolines(TrampolineMem, ResolverBlock.base(), - NumTrampolines); + char *TrampolineMem = static_cast(TrampolineBlock.base()); + ORCABI::writeTrampolines( + TrampolineMem, pointerToJITTargetAddress(TrampolineMem), + pointerToJITTargetAddress(ResolverBlock.base()), NumTrampolines); for (unsigned I = 0; I < NumTrampolines; ++I) - this->AvailableTrampolines.push_back( - static_cast(reinterpret_cast( - TrampolineMem + (I * ORCABI::TrampolineSize)))); + this->AvailableTrampolines.push_back(pointerToJITTargetAddress( + TrampolineMem + (I * ORCABI::TrampolineSize))); if (auto EC = sys::Memory::protectMappedMemory( TrampolineBlock.getMemoryBlock(), @@ -302,6 +305,61 @@ class IndirectStubsManager { virtual void anchor(); }; +template class LocalIndirectStubsInfo { +public: + LocalIndirectStubsInfo(unsigned NumStubs, sys::OwningMemoryBlock StubsMem) + : NumStubs(NumStubs), StubsMem(std::move(StubsMem)) {} + + static Expected create(unsigned MinStubs, + unsigned PageSize) { + auto ISAS = getIndirectStubsBlockSizes(MinStubs, PageSize); + + assert((ISAS.StubBytes % PageSize == 0) && + "StubBytes is not a page size multiple"); + uint64_t PointerAlloc = alignTo(ISAS.PointerBytes, PageSize); + + // Allocate memory for stubs and pointers in one call. + std::error_code EC; + auto StubsAndPtrsMem = + sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory( + ISAS.StubBytes + PointerAlloc, nullptr, + sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC)); + if (EC) + return errorCodeToError(EC); + + sys::MemoryBlock StubsBlock(StubsAndPtrsMem.base(), ISAS.StubBytes); + auto StubsBlockMem = static_cast(StubsAndPtrsMem.base()); + auto PtrBlockAddress = + pointerToJITTargetAddress(StubsBlockMem) + ISAS.StubBytes; + + ORCABI::writeIndirectStubsBlock(StubsBlockMem, + pointerToJITTargetAddress(StubsBlockMem), + PtrBlockAddress, ISAS.NumStubs); + + if (auto EC = sys::Memory::protectMappedMemory( + StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC)) + return errorCodeToError(EC); + + return LocalIndirectStubsInfo(ISAS.NumStubs, std::move(StubsAndPtrsMem)); + } + + unsigned getNumStubs() const { return NumStubs; } + + void *getStub(unsigned Idx) const { + return static_cast(StubsMem.base()) + Idx * ORCABI::StubSize; + } + + void **getPtr(unsigned Idx) const { + char *PtrsBase = + static_cast(StubsMem.base()) + NumStubs * ORCABI::StubSize; + return reinterpret_cast(PtrsBase) + Idx; + } + +private: + unsigned NumStubs = 0; + sys::OwningMemoryBlock StubsMem; +}; + /// IndirectStubsManager implementation for the host architecture, e.g. /// OrcX86_64. (See OrcArchitectureSupport.h). template @@ -379,13 +437,13 @@ class LocalIndirectStubsManager : public IndirectStubsManager { unsigned NewStubsRequired = NumStubs - FreeStubs.size(); unsigned NewBlockId = IndirectStubsInfos.size(); - typename TargetT::IndirectStubsInfo ISI; - if (auto Err = - TargetT::emitIndirectStubsBlock(ISI, NewStubsRequired, nullptr)) - return Err; - for (unsigned I = 0; I < ISI.getNumStubs(); ++I) + auto ISI = + LocalIndirectStubsInfo::create(NewStubsRequired, PageSize); + if (!ISI) + return ISI.takeError(); + for (unsigned I = 0; I < ISI->getNumStubs(); ++I) FreeStubs.push_back(std::make_pair(NewBlockId, I)); - IndirectStubsInfos.push_back(std::move(ISI)); + IndirectStubsInfos.push_back(std::move(*ISI)); return Error::success(); } @@ -394,12 +452,13 @@ class LocalIndirectStubsManager : public IndirectStubsManager { auto Key = FreeStubs.back(); FreeStubs.pop_back(); *IndirectStubsInfos[Key.first].getPtr(Key.second) = - reinterpret_cast(static_cast(InitAddr)); + jitTargetAddressToPointer(InitAddr); StubIndexes[StubName] = std::make_pair(Key, StubFlags); } + unsigned PageSize = sys::Process::getPageSizeEstimate(); std::mutex StubsMutex; - std::vector IndirectStubsInfos; + std::vector> IndirectStubsInfos; using StubKey = std::pair; std::vector FreeStubs; StringMap> StubIndexes; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h index 01a2b9712e9a4..9206e40fffb1c 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h @@ -40,20 +40,25 @@ class LazyCallThroughManager { using NotifyResolvedFunction = unique_function; + LazyCallThroughManager(ExecutionSession &ES, + JITTargetAddress ErrorHandlerAddr, TrampolinePool *TP); + // Return a free call-through trampoline and bind it to look up and call // through to the given symbol. Expected getCallThroughTrampoline(JITDylib &SourceJD, SymbolStringPtr SymbolName, NotifyResolvedFunction NotifyResolved); + void resolveTrampolineLandingAddress( + JITTargetAddress TrampolineAddr, + TrampolinePool::NotifyLandingResolvedFunction NotifyLandingResolved); + + virtual ~LazyCallThroughManager() = default; + protected: using NotifyLandingResolvedFunction = TrampolinePool::NotifyLandingResolvedFunction; - LazyCallThroughManager(ExecutionSession &ES, - JITTargetAddress ErrorHandlerAddr, - std::unique_ptr TP); - struct ReexportsEntry { JITDylib *SourceJD; SymbolStringPtr SymbolName; @@ -63,13 +68,7 @@ class LazyCallThroughManager { Expected findReexport(JITTargetAddress TrampolineAddr); Error notifyResolved(JITTargetAddress TrampolineAddr, JITTargetAddress ResolvedAddr); - void resolveTrampolineLandingAddress( - JITTargetAddress TrampolineAddr, - NotifyLandingResolvedFunction NotifyLandingResolved); - - void setTrampolinePool(std::unique_ptr TP) { - this->TP = std::move(TP); - } + void setTrampolinePool(TrampolinePool &TP) { this->TP = &TP; } private: using ReexportsMap = std::map; @@ -79,7 +78,7 @@ class LazyCallThroughManager { std::mutex LCTMMutex; ExecutionSession &ES; JITTargetAddress ErrorHandlerAddr; - std::unique_ptr TP; + TrampolinePool *TP = nullptr; ReexportsMap Reexports; NotifiersMap Notifiers; }; @@ -105,10 +104,13 @@ class LocalLazyCallThroughManager : public LazyCallThroughManager { if (!TP) return TP.takeError(); - setTrampolinePool(std::move(*TP)); + this->TP = std::move(*TP); + setTrampolinePool(*this->TP); return Error::success(); } + std::unique_ptr TP; + public: /// Create a LocalLazyCallThroughManager using the given ABI. See /// createLocalLazyCallThroughManager. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h index 2e58ddd75d318..5061c15cf4c96 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h @@ -20,13 +20,33 @@ #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Memory.h" +#include "llvm/Support/MathExtras.h" #include #include namespace llvm { namespace orc { +struct IndirectStubsAllocationSizes { + uint64_t StubBytes = 0; + uint64_t PointerBytes = 0; + unsigned NumStubs = 0; +}; + +template +IndirectStubsAllocationSizes +getIndirectStubsBlockSizes(unsigned MinStubs, unsigned RoundToMultipleOf = 0) { + assert( + (RoundToMultipleOf == 0 || (RoundToMultipleOf % ORCABI::StubSize == 0)) && + "RoundToMultipleOf is not a multiple of stub size"); + uint64_t StubBytes = MinStubs * ORCABI::StubSize; + if (RoundToMultipleOf) + StubBytes = alignTo(StubBytes, RoundToMultipleOf); + unsigned NumStubs = StubBytes / ORCABI::StubSize; + uint64_t PointerBytes = NumStubs * ORCABI::PointerSize; + return {StubBytes, PointerBytes, NumStubs}; +} + /// Generic ORC ABI support. /// /// This class can be substituted as the target architecture support class for @@ -35,113 +55,72 @@ namespace orc { /// will result in execution of an llvm_unreachable. class OrcGenericABI { public: - static const unsigned PointerSize = sizeof(uintptr_t); - static const unsigned TrampolineSize = 1; - static const unsigned ResolverCodeSize = 1; - - using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr, - void *TrampolineId); - - static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry, - void *CallbackMgr) { + static constexpr unsigned PointerSize = sizeof(uintptr_t); + static constexpr unsigned TrampolineSize = 1; + static constexpr unsigned StubSize = 1; + static constexpr unsigned StubToPointerMaxDisplacement = 1; + static constexpr unsigned ResolverCodeSize = 1; + + static void writeResolverCode(char *ResolveWorkingMem, + JITTargetAddress ResolverTargetAddr, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) { llvm_unreachable("writeResolverCode is not supported by the generic host " "support class"); } - static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr, + static void writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddr, + JITTargetAddress ResolverAddr, unsigned NumTrampolines) { llvm_unreachable("writeTrampolines is not supported by the generic host " "support class"); } - class IndirectStubsInfo { - public: - const static unsigned StubSize = 1; - - unsigned getNumStubs() const { llvm_unreachable("Not supported"); } - void *getStub(unsigned Idx) const { llvm_unreachable("Not supported"); } - void **getPtr(unsigned Idx) const { llvm_unreachable("Not supported"); } - }; - - static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, - unsigned MinStubs, void *InitialPtrVal) { - llvm_unreachable("emitIndirectStubsBlock is not supported by the generic " - "host support class"); - } -}; - -/// Provide information about stub blocks generated by the -/// makeIndirectStubsBlock function. -template class GenericIndirectStubsInfo { -public: - const static unsigned StubSize = StubSizeVal; - - GenericIndirectStubsInfo() = default; - GenericIndirectStubsInfo(unsigned NumStubs, sys::OwningMemoryBlock StubsMem) - : NumStubs(NumStubs), StubsMem(std::move(StubsMem)) {} - GenericIndirectStubsInfo(GenericIndirectStubsInfo &&Other) - : NumStubs(Other.NumStubs), StubsMem(std::move(Other.StubsMem)) { - Other.NumStubs = 0; - } - - GenericIndirectStubsInfo &operator=(GenericIndirectStubsInfo &&Other) { - NumStubs = Other.NumStubs; - Other.NumStubs = 0; - StubsMem = std::move(Other.StubsMem); - return *this; - } - - /// Number of stubs in this block. - unsigned getNumStubs() const { return NumStubs; } - - /// Get a pointer to the stub at the given index, which must be in - /// the range 0 .. getNumStubs() - 1. - void *getStub(unsigned Idx) const { - return static_cast(StubsMem.base()) + Idx * StubSize; + static void writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { + llvm_unreachable( + "writeIndirectStubsBlock is not supported by the generic host " + "support class"); } - - /// Get a pointer to the implementation-pointer at the given index, - /// which must be in the range 0 .. getNumStubs() - 1. - void **getPtr(unsigned Idx) const { - char *PtrsBase = static_cast(StubsMem.base()) + NumStubs * StubSize; - return reinterpret_cast(PtrsBase) + Idx; - } - -private: - unsigned NumStubs = 0; - sys::OwningMemoryBlock StubsMem; }; class OrcAArch64 { public: - static const unsigned PointerSize = 8; - static const unsigned TrampolineSize = 12; - static const unsigned ResolverCodeSize = 0x120; - - using IndirectStubsInfo = GenericIndirectStubsInfo<8>; - - using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr, - void *TrampolineId); + static constexpr unsigned PointerSize = 8; + static constexpr unsigned TrampolineSize = 12; + static constexpr unsigned StubSize = 8; + static constexpr unsigned StubToPointerMaxDisplacement = 1U << 27; + static constexpr unsigned ResolverCodeSize = 0x120; /// Write the resolver code into the given memory. The user is /// responsible for allocating the memory and setting permissions. - static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry, - void *CallbackMgr); + /// + /// ReentryFnAddr should be the address of a function whose signature matches + /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr + /// argument of writeResolverCode will be passed as the second argument to + /// the function at ReentryFnAddr. + static void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress RentryCtxAddr); /// Write the requested number of trampolines into the given memory, /// which must be big enough to hold 1 pointer, plus NumTrampolines /// trampolines. - static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr, + static void writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverAddr, unsigned NumTrampolines); - /// Emit at least MinStubs worth of indirect call stubs, rounded out to - /// the nearest page size. - /// - /// E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k - /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513 - /// will return a block of 1024 (2-pages worth). - static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, - unsigned MinStubs, void *InitialPtrVal); + /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem. + /// Stubs will be written as if linked at StubsBlockTargetAddress, with the + /// Nth stub using the Nth pointer in memory starting at + /// PointersBlockTargetAddress. + static void writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned MinStubs); }; /// X86_64 code that's common to all ABIs. @@ -149,25 +128,26 @@ class OrcAArch64 { /// X86_64 supports lazy JITing. class OrcX86_64_Base { public: - static const unsigned PointerSize = 8; - static const unsigned TrampolineSize = 8; - - using IndirectStubsInfo = GenericIndirectStubsInfo<8>; + static constexpr unsigned PointerSize = 8; + static constexpr unsigned TrampolineSize = 8; + static constexpr unsigned StubSize = 8; + static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31; /// Write the requested number of trampolines into the given memory, /// which must be big enough to hold 1 pointer, plus NumTrampolines /// trampolines. - static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr, + static void writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverAddr, unsigned NumTrampolines); - /// Emit at least MinStubs worth of indirect call stubs, rounded out to - /// the nearest page size. - /// - /// E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k - /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513 - /// will return a block of 1024 (2-pages worth). - static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, - unsigned MinStubs, void *InitialPtrVal); + /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem. + /// Stubs will be written as if linked at StubsBlockTargetAddress, with the + /// Nth stub using the Nth pointer in memory starting at + /// PointersBlockTargetAddress. + static void writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); }; /// X86_64 support for SysV ABI (Linux, MacOSX). @@ -175,15 +155,19 @@ class OrcX86_64_Base { /// X86_64_SysV supports lazy JITing. class OrcX86_64_SysV : public OrcX86_64_Base { public: - static const unsigned ResolverCodeSize = 0x6C; - - using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr, - void *TrampolineId); + static constexpr unsigned ResolverCodeSize = 0x6C; /// Write the resolver code into the given memory. The user is /// responsible for allocating the memory and setting permissions. - static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry, - void *CallbackMgr); + /// + /// ReentryFnAddr should be the address of a function whose signature matches + /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr + /// argument of writeResolverCode will be passed as the second argument to + /// the function at ReentryFnAddr. + static void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr); }; /// X86_64 support for Win32. @@ -191,15 +175,19 @@ class OrcX86_64_SysV : public OrcX86_64_Base { /// X86_64_Win32 supports lazy JITing. class OrcX86_64_Win32 : public OrcX86_64_Base { public: - static const unsigned ResolverCodeSize = 0x74; - - using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr, - void *TrampolineId); + static constexpr unsigned ResolverCodeSize = 0x74; /// Write the resolver code into the given memory. The user is /// responsible for allocating the memory and setting permissions. - static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry, - void *CallbackMgr); + /// + /// ReentryFnAddr should be the address of a function whose signature matches + /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr + /// argument of writeResolverCode will be passed as the second argument to + /// the function at ReentryFnAddr. + static void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr); }; /// I386 support. @@ -207,34 +195,39 @@ class OrcX86_64_Win32 : public OrcX86_64_Base { /// I386 supports lazy JITing. class OrcI386 { public: - static const unsigned PointerSize = 4; - static const unsigned TrampolineSize = 8; - static const unsigned ResolverCodeSize = 0x4a; - - using IndirectStubsInfo = GenericIndirectStubsInfo<8>; - - using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr, - void *TrampolineId); + static constexpr unsigned PointerSize = 4; + static constexpr unsigned TrampolineSize = 8; + static constexpr unsigned StubSize = 8; + static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31; + static constexpr unsigned ResolverCodeSize = 0x4a; /// Write the resolver code into the given memory. The user is /// responsible for allocating the memory and setting permissions. - static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry, - void *CallbackMgr); + /// + /// ReentryFnAddr should be the address of a function whose signature matches + /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr + /// argument of writeResolverCode will be passed as the second argument to + /// the function at ReentryFnAddr. + static void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr); /// Write the requested number of trampolines into the given memory, /// which must be big enough to hold 1 pointer, plus NumTrampolines /// trampolines. - static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr, + static void writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverAddr, unsigned NumTrampolines); - /// Emit at least MinStubs worth of indirect call stubs, rounded out to - /// the nearest page size. - /// - /// E.g. Asking for 4 stubs on i386, where stubs are 8-bytes, with 4k - /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513 - /// will return a block of 1024 (2-pages worth). - static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, - unsigned MinStubs, void *InitialPtrVal); + /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem. + /// Stubs will be written as if linked at StubsBlockTargetAddress, with the + /// Nth stub using the Nth pointer in memory starting at + /// PointersBlockTargetAddress. + static void writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); }; // @brief Mips32 support. @@ -242,41 +235,61 @@ class OrcI386 { // Mips32 supports lazy JITing. class OrcMips32_Base { public: - static const unsigned PointerSize = 4; - static const unsigned TrampolineSize = 20; - static const unsigned ResolverCodeSize = 0xfc; - using IndirectStubsInfo = GenericIndirectStubsInfo<16>; + static constexpr unsigned PointerSize = 4; + static constexpr unsigned TrampolineSize = 20; + static constexpr unsigned StubSize = 8; + static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31; + static constexpr unsigned ResolverCodeSize = 0xfc; - using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr, - void *TrampolineId); /// Write the requested number of trampolines into the given memory, /// which must be big enough to hold 1 pointer, plus NumTrampolines /// trampolines. - static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,unsigned NumTrampolines); + static void writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverAddr, + unsigned NumTrampolines); /// Write the resolver code into the given memory. The user is /// responsible for allocating the memory and setting permissions. - static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr, bool isBigEndian); - /// Emit at least MinStubs worth of indirect call stubs, rounded out to - /// the nearest page size. /// - /// E.g. Asking for 4 stubs on Mips32, where stubs are 8-bytes, with 4k - /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513 - /// will return a block of 1024 (2-pages worth). - static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,unsigned MinStubs, void *InitialPtrVal); + /// ReentryFnAddr should be the address of a function whose signature matches + /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr + /// argument of writeResolverCode will be passed as the second argument to + /// the function at ReentryFnAddr. + static void writeResolverCode(char *ResolverBlockWorkingMem, + JITTargetAddress ResolverBlockTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr, + bool isBigEndian); + /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem. + /// Stubs will be written as if linked at StubsBlockTargetAddress, with the + /// Nth stub using the Nth pointer in memory starting at + /// PointersBlockTargetAddress. + static void writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); }; - class OrcMips32Le : public OrcMips32_Base { public: - static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr) - { OrcMips32_Base::writeResolverCode(ResolveMem, Reentry, CallbackMgr, false); } + static void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) { + OrcMips32_Base::writeResolverCode(ResolverWorkingMem, ResolverTargetAddress, + ReentryFnAddr, ReentryCtxAddr, false); + } }; class OrcMips32Be : public OrcMips32_Base { public: - static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr) - { OrcMips32_Base::writeResolverCode(ResolveMem, Reentry, CallbackMgr, true); } + static void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) { + OrcMips32_Base::writeResolverCode(ResolverWorkingMem, ResolverTargetAddress, + ReentryFnAddr, ReentryCtxAddr, true); + } }; // @brief Mips64 support. @@ -284,31 +297,41 @@ class OrcMips32Be : public OrcMips32_Base { // Mips64 supports lazy JITing. class OrcMips64 { public: - static const unsigned PointerSize = 8; - static const unsigned TrampolineSize = 40; - static const unsigned ResolverCodeSize = 0x120; + static constexpr unsigned PointerSize = 8; + static constexpr unsigned TrampolineSize = 40; + static constexpr unsigned StubSize = 32; + static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31; + static constexpr unsigned ResolverCodeSize = 0x120; - using IndirectStubsInfo = GenericIndirectStubsInfo<32>; - using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr, - void *TrampolineId); /// Write the resolver code into the given memory. The user is /// responsible for allocating the memory and setting permissions. - static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr); + /// + /// ReentryFnAddr should be the address of a function whose signature matches + /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr + /// argument of writeResolverCode will be passed as the second argument to + /// the function at ReentryFnAddr. + static void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr); /// Write the requested number of trampolines into the given memory, /// which must be big enough to hold 1 pointer, plus NumTrampolines /// trampolines. - static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,unsigned NumTrampolines); - - /// Emit at least MinStubs worth of indirect call stubs, rounded out to - /// the nearest page size. - /// - /// E.g. Asking for 4 stubs on Mips64, where stubs are 8-bytes, with 4k - /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513 - /// will return a block of 1024 (2-pages worth). - static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,unsigned MinStubs, void *InitialPtrVal); + static void writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverFnAddr, + unsigned NumTrampolines); + /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem. + /// Stubs will be written as if linked at StubsBlockTargetAddress, with the + /// Nth stub using the Nth pointer in memory starting at + /// PointersBlockTargetAddress. + static void writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); }; - } // end namespace orc - } // end namespace llvm +} // end namespace orc +} // end namespace llvm + #endif // LLVM_EXECUTIONENGINE_ORC_ORCABISUPPORT_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h index ac1df847cf7e2..50c155d77db17 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h @@ -15,6 +15,7 @@ #define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/OrcError.h" #include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h" #include "llvm/Support/Debug.h" @@ -262,19 +263,17 @@ class OrcRemoteTargetServer return errorCodeToError( orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist)); - typename TargetT::IndirectStubsInfo IS; - if (auto Err = - TargetT::emitIndirectStubsBlock(IS, NumStubsRequired, nullptr)) - return std::move(Err); + auto IS = LocalIndirectStubsInfo::create( + NumStubsRequired, sys::Process::getPageSizeEstimate()); + if (!IS) + return IS.takeError(); - JITTargetAddress StubsBase = static_cast( - reinterpret_cast(IS.getStub(0))); - JITTargetAddress PtrsBase = static_cast( - reinterpret_cast(IS.getPtr(0))); - uint32_t NumStubsEmitted = IS.getNumStubs(); + JITTargetAddress StubsBase = pointerToJITTargetAddress(IS->getStub(0)); + JITTargetAddress PtrsBase = pointerToJITTargetAddress(IS->getPtr(0)); + uint32_t NumStubsEmitted = IS->getNumStubs(); auto &BlockList = StubOwnerItr->second; - BlockList.push_back(std::move(IS)); + BlockList.push_back(std::move(*IS)); return std::make_tuple(StubsBase, PtrsBase, NumStubsEmitted); } @@ -287,8 +286,10 @@ class OrcRemoteTargetServer if (EC) return errorCodeToError(EC); - TargetT::writeResolverCode(static_cast(ResolverBlock.base()), - &reenter, this); + TargetT::writeResolverCode(static_cast(ResolverBlock.base()), + pointerToJITTargetAddress(ResolverBlock.base()), + pointerToJITTargetAddress(&reenter), + pointerToJITTargetAddress(this)); return errorCodeToError(sys::Memory::protectMappedMemory( ResolverBlock.getMemoryBlock(), @@ -308,9 +309,10 @@ class OrcRemoteTargetServer (sys::Process::getPageSizeEstimate() - TargetT::PointerSize) / TargetT::TrampolineSize; - uint8_t *TrampolineMem = static_cast(TrampolineBlock.base()); - TargetT::writeTrampolines(TrampolineMem, ResolverBlock.base(), - NumTrampolines); + char *TrampolineMem = static_cast(TrampolineBlock.base()); + TargetT::writeTrampolines( + TrampolineMem, pointerToJITTargetAddress(TrampolineMem), + pointerToJITTargetAddress(ResolverBlock.base()), NumTrampolines); EC = sys::Memory::protectMappedMemory(TrampolineBlock.getMemoryBlock(), sys::Memory::MF_READ | @@ -318,10 +320,8 @@ class OrcRemoteTargetServer TrampolineBlocks.push_back(std::move(TrampolineBlock)); - auto TrampolineBaseAddr = static_cast( - reinterpret_cast(TrampolineMem)); - - return std::make_tuple(TrampolineBaseAddr, NumTrampolines); + return std::make_tuple(pointerToJITTargetAddress(TrampolineMem), + NumTrampolines); } Expected handleGetSymbolAddress(const std::string &Name) { @@ -337,7 +337,7 @@ class OrcRemoteTargetServer uint32_t PointerSize = TargetT::PointerSize; uint32_t PageSize = sys::Process::getPageSizeEstimate(); uint32_t TrampolineSize = TargetT::TrampolineSize; - uint32_t IndirectStubSize = TargetT::IndirectStubsInfo::StubSize; + uint32_t IndirectStubSize = TargetT::StubSize; LLVM_DEBUG(dbgs() << " Remote info:\n" << " triple = '" << ProcessTriple << "'\n" << " pointer size = " << PointerSize << "\n" @@ -433,7 +433,7 @@ class OrcRemoteTargetServer SymbolLookupFtor SymbolLookup; EHFrameRegistrationFtor EHFramesRegister, EHFramesDeregister; std::map Allocators; - using ISBlockOwnerList = std::vector; + using ISBlockOwnerList = std::vector>; std::map IndirectStubsOwners; sys::OwningMemoryBlock ResolverBlock; std::vector TrampolineBlocks; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h index d8213d3b35e8f..a6537dd3093bd 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h @@ -185,7 +185,7 @@ class IRSpeculationLayer : public IRLayer { : IRLayer(ES, BaseLayer.getManglingOptions()), NextLayer(BaseLayer), S(Spec), Mangle(Mangle), QueryAnalysis(Interpreter) {} - void emit(MaterializationResponsibility R, ThreadSafeModule TSM); + void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; private: TargetAndLikelies diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h new file mode 100644 index 0000000000000..db9cd1b98cf9c --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h @@ -0,0 +1,209 @@ +//===--- TPCIndirectionUtils.h - TPC based indirection utils ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Indirection utilities (stubs, trampolines, lazy call-throughs) that use the +// TargetProcessControl API to interact with the target process. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TPCINDIRECTIONUTILS_H +#define LLVM_EXECUTIONENGINE_ORC_TPCINDIRECTIONUTILS_H + +#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" +#include "llvm/ExecutionEngine/Orc/LazyReexports.h" + +#include + +namespace llvm { +namespace orc { + +class TargetProcessControl; + +/// Provides TargetProcessControl based indirect stubs, trampoline pool and +/// lazy call through manager. +class TPCIndirectionUtils { + friend class TPCIndirectionUtilsAccess; + +public: + /// ABI support base class. Used to write resolver, stub, and trampoline + /// blocks. + class ABISupport { + protected: + ABISupport(unsigned PointerSize, unsigned TrampolineSize, unsigned StubSize, + unsigned StubToPointerMaxDisplacement, unsigned ResolverCodeSize) + : PointerSize(PointerSize), TrampolineSize(TrampolineSize), + StubSize(StubSize), + StubToPointerMaxDisplacement(StubToPointerMaxDisplacement), + ResolverCodeSize(ResolverCodeSize) {} + + public: + virtual ~ABISupport(); + + unsigned getPointerSize() const { return PointerSize; } + unsigned getTrampolineSize() const { return TrampolineSize; } + unsigned getStubSize() const { return StubSize; } + unsigned getStubToPointerMaxDisplacement() const { + return StubToPointerMaxDisplacement; + } + unsigned getResolverCodeSize() const { return ResolverCodeSize; } + + virtual void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddr, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) const = 0; + + virtual void writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTragetAddr, + JITTargetAddress ResolverAddr, + unsigned NumTrampolines) const = 0; + + virtual void + writeIndirectStubsBlock(char *StubsBlockWorkingMem, + JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, + unsigned NumStubs) const = 0; + + private: + unsigned PointerSize = 0; + unsigned TrampolineSize = 0; + unsigned StubSize = 0; + unsigned StubToPointerMaxDisplacement = 0; + unsigned ResolverCodeSize = 0; + }; + + /// Create using the given ABI class. + template + static std::unique_ptr + CreateWithABI(TargetProcessControl &TPC); + + /// Create based on the TargetProcessControl triple. + static Expected> + Create(TargetProcessControl &TPC); + + /// Return a reference to the TargetProcessControl object. + TargetProcessControl &getTargetProcessControl() const { return TPC; } + + /// Return a reference to the ABISupport object for this instance. + ABISupport &getABISupport() const { return *ABI; } + + /// Release memory for resources held by this instance. This *must* be called + /// prior to destruction of the class. + Error cleanup(); + + /// Write resolver code to the target process and return its address. + /// This must be called before any call to createTrampolinePool or + /// createLazyCallThroughManager. + Expected + writeResolverBlock(JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr); + + /// Returns the address of the Resolver block. Returns zero if the + /// writeResolverBlock method has not previously been called. + JITTargetAddress getResolverBlockAddress() const { return ResolverBlockAddr; } + + /// Create an IndirectStubsManager for the target process. + std::unique_ptr createIndirectStubsManager(); + + /// Create a TrampolinePool for the target process. + TrampolinePool &getTrampolinePool(); + + /// Create a LazyCallThroughManager. + /// This function should only be called once. + LazyCallThroughManager & + createLazyCallThroughManager(ExecutionSession &ES, + JITTargetAddress ErrorHandlerAddr); + + /// Create a LazyCallThroughManager for the target process. + LazyCallThroughManager &getLazyCallThroughManager() { + assert(LCTM && "createLazyCallThroughManager must be called first"); + return *LCTM; + } + +private: + using Allocation = jitlink::JITLinkMemoryManager::Allocation; + + struct IndirectStubInfo { + IndirectStubInfo() = default; + IndirectStubInfo(JITTargetAddress StubAddress, + JITTargetAddress PointerAddress) + : StubAddress(StubAddress), PointerAddress(PointerAddress) {} + JITTargetAddress StubAddress = 0; + JITTargetAddress PointerAddress = 0; + }; + + using IndirectStubInfoVector = std::vector; + + /// Create a TPCIndirectionUtils instance. + TPCIndirectionUtils(TargetProcessControl &TPC, + std::unique_ptr ABI); + + Expected getIndirectStubs(unsigned NumStubs); + + std::mutex TPCUIMutex; + TargetProcessControl &TPC; + std::unique_ptr ABI; + JITTargetAddress ResolverBlockAddr; + std::unique_ptr ResolverBlock; + std::unique_ptr TP; + std::unique_ptr LCTM; + + std::vector AvailableIndirectStubs; + std::vector> IndirectStubAllocs; +}; + +namespace detail { + +template +class ABISupportImpl : public TPCIndirectionUtils::ABISupport { +public: + ABISupportImpl() + : ABISupport(ORCABI::PointerSize, ORCABI::TrampolineSize, + ORCABI::StubSize, ORCABI::StubToPointerMaxDisplacement, + ORCABI::ResolverCodeSize) {} + + void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddr, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) const override { + ORCABI::writeResolverCode(ResolverWorkingMem, ResolverTargetAddr, + ReentryFnAddr, ReentryCtxAddr); + } + + void writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddr, + JITTargetAddress ResolverAddr, + unsigned NumTrampolines) const override { + ORCABI::writeTrampolines(TrampolineBlockWorkingMem, + TrampolineBlockTargetAddr, ResolverAddr, + NumTrampolines); + } + + void writeIndirectStubsBlock(char *StubsBlockWorkingMem, + JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, + unsigned NumStubs) const override { + ORCABI::writeIndirectStubsBlock(StubsBlockWorkingMem, + StubsBlockTargetAddress, + PointersBlockTargetAddress, NumStubs); + } +}; + +} // end namespace detail + +template +std::unique_ptr +TPCIndirectionUtils::CreateWithABI(TargetProcessControl &TPC) { + return std::unique_ptr(new TPCIndirectionUtils( + TPC, std::make_unique>())); +} + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_T_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h new file mode 100644 index 0000000000000..facafd8836530 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h @@ -0,0 +1,162 @@ +//===--- TargetProcessControl.h - Target process control APIs ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utilities for interacting with target processes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESSCONTROL_H +#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESSCONTROL_H + +#include "llvm/ADT/Triple.h" +#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" +#include "llvm/Support/MSVCErrorWorkarounds.h" + +#include + +namespace llvm { +namespace orc { + +/// TargetProcessControl supports interaction with a JIT target process. +class TargetProcessControl { +public: + /// APIs for manipulating memory in the target process. + class MemoryAccess { + public: + template struct UIntWrite { + UIntWrite() = default; + UIntWrite(JITTargetAddress Address, T Value) + : Address(Address), Value(Value) {} + + JITTargetAddress Address = 0; + T Value = 0; + }; + + using UInt8Write = UIntWrite; + using UInt16Write = UIntWrite; + using UInt32Write = UIntWrite; + using UInt64Write = UIntWrite; + + struct BufferWrite { + BufferWrite(JITTargetAddress Address, StringRef Buffer) + : Address(Address), Buffer(Buffer) {} + + JITTargetAddress Address = 0; + StringRef Buffer; + }; + + using WriteResultFn = unique_function; + + virtual ~MemoryAccess(); + + virtual void writeUInt8s(ArrayRef Ws, + WriteResultFn OnWriteComplete) = 0; + + virtual void writeUInt16s(ArrayRef Ws, + WriteResultFn OnWriteComplete) = 0; + + virtual void writeUInt32s(ArrayRef Ws, + WriteResultFn OnWriteComplete) = 0; + + virtual void writeUInt64s(ArrayRef Ws, + WriteResultFn OnWriteComplete) = 0; + + virtual void writeBuffers(ArrayRef Ws, + WriteResultFn OnWriteComplete) = 0; + + Error writeUInt8s(ArrayRef Ws) { + std::promise ResultP; + auto ResultF = ResultP.get_future(); + writeUInt8s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); }); + return ResultF.get(); + } + + Error writeUInt16s(ArrayRef Ws) { + std::promise ResultP; + auto ResultF = ResultP.get_future(); + writeUInt16s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); }); + return ResultF.get(); + } + + Error writeUInt32s(ArrayRef Ws) { + std::promise ResultP; + auto ResultF = ResultP.get_future(); + writeUInt32s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); }); + return ResultF.get(); + } + + Error writeUInt64s(ArrayRef Ws) { + std::promise ResultP; + auto ResultF = ResultP.get_future(); + writeUInt64s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); }); + return ResultF.get(); + } + + Error writeBuffers(ArrayRef Ws) { + std::promise ResultP; + auto ResultF = ResultP.get_future(); + writeBuffers(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); }); + return ResultF.get(); + } + }; + + virtual ~TargetProcessControl(); + + /// Return the Triple for the target process. + const Triple &getTargetTriple() const { return TT; } + + /// Get the page size for the target process. + unsigned getPageSize() const { return PageSize; } + + /// Return a JITLinkMemoryManager for the target process. + jitlink::JITLinkMemoryManager &getMemMgr() const { return *MemMgr; } + + /// Return a MemoryAccess object for the target process. + MemoryAccess &getMemoryAccess() const { return *MemAccess; } + +protected: + TargetProcessControl(Triple TT, unsigned PageSize); + + Triple TT; + unsigned PageSize = 0; + jitlink::JITLinkMemoryManager *MemMgr = nullptr; + MemoryAccess *MemAccess = nullptr; +}; + +/// A TargetProcessControl +class SelfTargetProcessControl : public TargetProcessControl, + private TargetProcessControl::MemoryAccess { +public: + SelfTargetProcessControl(Triple TT, unsigned PageSize); + + static Expected> Create(); + +private: + void writeUInt8s(ArrayRef Ws, + WriteResultFn OnWriteComplete) override; + + void writeUInt16s(ArrayRef Ws, + WriteResultFn OnWriteComplete) override; + + void writeUInt32s(ArrayRef Ws, + WriteResultFn OnWriteComplete) override; + + void writeUInt64s(ArrayRef Ws, + WriteResultFn OnWriteComplete) override; + + void writeBuffers(ArrayRef Ws, + WriteResultFn OnWriteComplete) override; + + std::unique_ptr IPMM = + std::make_unique(); +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESSCONTROL_H diff --git a/llvm/include/llvm/Frontend/CMakeLists.txt b/llvm/include/llvm/Frontend/CMakeLists.txt new file mode 100644 index 0000000000000..ea66917b8936a --- /dev/null +++ b/llvm/include/llvm/Frontend/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory(OpenACC) +add_subdirectory(OpenMP) diff --git a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td index 785a520613b96..26049ca60db39 100644 --- a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td +++ b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td @@ -43,6 +43,9 @@ class DirectiveLanguage { // Header file included in the implementation code generated. Ususally the // output file of the declaration code generation. Can be left blank. string includeHeader = ""; + + // EnumSet class name used for clauses to generated the allowed clauses map. + string clauseEnumSetClass = ""; } // Information about a specific clause. @@ -56,6 +59,9 @@ class Clause { // Optional class holding value of the clause in clang AST. string clangClass = ?; + // Optional class holding value of the clause in flang AST. + string flangClass = ?; + // Is clause implicit? If clause is set as implicit, the default kind will // be return in getClauseKind instead of their own kind. bit isImplicit = 0; @@ -92,6 +98,9 @@ class Directive { // List of clauses that are allowed to appear only once. list allowedOnceClauses = []; + // List of clauses that are allowed but mutually exclusive. + list allowedExclusiveClauses = []; + // List of clauses that are required. list requiredClauses = []; diff --git a/llvm/include/llvm/Frontend/OpenACC/ACC.td b/llvm/include/llvm/Frontend/OpenACC/ACC.td new file mode 100644 index 0000000000000..e96b7e8466628 --- /dev/null +++ b/llvm/include/llvm/Frontend/OpenACC/ACC.td @@ -0,0 +1,604 @@ +//===-- ACC.td - OpenACC directive definition file ---------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the definition file for OpenACC directives and clauses. +// +//===----------------------------------------------------------------------===// + +include "llvm/Frontend/Directive/DirectiveBase.td" + +//===----------------------------------------------------------------------===// +// Definition of general OpenACC information +//===----------------------------------------------------------------------===// + +def OpenACC : DirectiveLanguage { + let name = "OpenACC"; + let cppNamespace = "acc"; // final namespace will be llvm::acc + let directivePrefix = "ACCD_"; + let clausePrefix = "ACCC_"; + let makeEnumAvailableInNamespace = 1; + let enableBitmaskEnumInNamespace = 1; + let includeHeader = "llvm/Frontend/OpenACC/ACC.h.inc"; + let clauseEnumSetClass = "AccClauseSet"; +} + +//===----------------------------------------------------------------------===// +// Definition of OpenACC clauses +//===----------------------------------------------------------------------===// + +// 2.9.6 +def ACCC_Auto : Clause<"auto"> {} + +// 2.16.1 +def ACCC_Async : Clause<"async"> { + let flangClass = "std::optional"; +} + +// 2.7.11 +def ACCC_Attach : Clause<"attach"> { + let flangClass = "AccObjectList"; +} + +// 2.15.1 +def ACCC_Bind : Clause<"bind"> { + let flangClass = "Name"; +} + +// 2.12 +def ACCC_Capture : Clause<"capture"> { +} + +// 2.9.1 +def ACCC_Collapse : Clause<"collapse"> { + let flangClass = "ScalarIntConstantExpr"; +} + +// 2.7.5 +def ACCC_Copy : Clause<"copy"> { + let flangClass = "AccObjectList"; +} +// 2.7.6 +def ACCC_Copyin : Clause<"copyin"> { + let flangClass = "AccObjectListWithModifier"; +} + +// 2.7.7 +def ACCC_Copyout : Clause<"copyout"> { + let flangClass = "AccObjectListWithModifier"; +} + +// 2.7.8 +def ACCC_Create : Clause<"create"> { + let flangClass = "AccObjectListWithModifier"; +} + +// 2.5.14 +def ACCC_Default : Clause<"default"> { + let flangClass = "AccDefaultClause"; +} + +// 2.4.12 +def ACCC_DefaultAsync : Clause<"default_async"> { + let flangClass = "ScalarIntExpr"; +} + +// 2.7.10 +def ACCC_Delete : Clause<"delete"> { + let flangClass = "AccObjectList"; +} + +// 2.7.12 +def ACCC_Detach : Clause<"detach"> { + let flangClass = "AccObjectList"; +} + +// 2.14.4 +def ACCC_Device : Clause<"device"> { + let flangClass = "AccObjectList"; +} + +// 2.14.1 +def ACCC_DeviceNum : Clause<"device_num"> { + let flangClass = "ScalarIntConstantExpr"; +} + +// 2.7.3 +def ACCC_DevicePtr : Clause<"deviceptr"> { + let flangClass = "AccObjectList"; +} + +// 2.13 +def ACCC_DeviceResident : Clause<"device_resident"> { + let flangClass = "AccObjectList"; +} + +// 2.4 +def ACCC_DeviceType : Clause<"device_type"> { + // (DeviceType, "*" + let flangClass = "std::optional>"; +} + +// 2.6.6 +def ACCC_Finalize : Clause<"finalize"> {} + +// 2.5.12 +def ACCC_FirstPrivate : Clause<"firstprivate"> { + let flangClass = "AccObjectList"; +} + +// 2.9.2 +def ACCC_Gang : Clause<"gang"> { + let flangClass = "std::optional"; +} + +// 2.14.4 +def ACCC_Host : Clause<"host"> { + let flangClass = "AccObjectList"; +} + +// 2.5.4 +def ACCC_If : Clause <"if"> { + let flangClass = "ScalarLogicalExpr"; +} + +// 2.14.4 +def ACCC_IfPresent : Clause<"if_present"> {} + +// 2.9.9 +def ACCC_Independent : Clause<"independent"> {} + +// 2.13 +def ACCC_Link : Clause<"link"> { + let flangClass = "AccObjectList"; +} + +// 2.7.9 +def ACCC_NoCreate : Clause<"no_create"> { + let flangClass = "AccObjectList"; +} + +// 2.15.1 +def ACCC_NoHost : Clause<"nohost"> {} + +// 2.5.8 +def ACCC_NumGangs : Clause<"num_gangs"> { + let flangClass = "ScalarIntExpr"; +} + +// 2.5.9 +def ACCC_NumWorkers : Clause<"num_workers"> { + let flangClass = "ScalarIntExpr"; +} + +// 2.7.4 +def ACCC_Present : Clause<"present"> { + let flangClass = "AccObjectList"; +} + +// 2.5.11 +def ACCC_Private : Clause<"private"> { + let flangClass = "AccObjectList"; +} + +// 2.9.7 +def ACCC_Tile : Clause <"tile"> { + let flangClass = "AccSizeExprList"; +} + +// 2.8.1 +def ACCC_UseDevice : Clause <"use_device"> { + let flangClass = "AccObjectList"; +} + +// 2.12 +def ACCC_Read : Clause<"read"> {} + +// 2.5.13 +def ACCC_Reduction : Clause<"reduction"> { + let flangClass = "AccObjectListWithReduction"; +} + +// 2.5.5 +def ACCC_Self : Clause<"self"> { + let flangClass = "std::optional"; +} + +// 2.9.5 +def ACCC_Seq : Clause<"seq"> {} + +// 2.9.4 +def ACCC_Vector : Clause<"vector"> { + let flangClass = "std::optional"; +} + +// 2.5.10 +def ACCC_VectorLength : Clause<"vector_length"> { + let flangClass = "ScalarIntExpr"; +} + +// 2.16.2 +def ACCC_Wait : Clause<"wait"> { + let flangClass = "std::optional"; +} + +// 2.9.3 +def ACCC_Worker: Clause<"worker"> { + let flangClass = "std::optional"; +} + +// 2.12 +def ACCC_Write : Clause<"write"> {} + +def ACCC_Unknown : Clause<"unknown"> { + let isDefault = 1; +} + +//===----------------------------------------------------------------------===// +// Definition of OpenACC directives +//===----------------------------------------------------------------------===// + +// 2.12 +def ACC_Atomic : Directive<"atomic"> {} + +// 2.6.5 +def ACC_Data : Directive<"data"> { + let allowedOnceClauses = [ + VersionedClause + ]; + let requiredClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; +} + +// 2.13 +def ACC_Declare : Directive<"declare"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; +} + +// 2.5.2 +def ACC_Kernels : Directive<"kernels"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; +} + +// 2.5.1 +def ACC_Parallel : Directive<"parallel"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; +} + +// 2.5.3 +def ACC_Serial : Directive<"serial"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; +} + +// 2.9 +def ACC_Loop : Directive<"loop"> { + let allowedClauses = [ + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedExclusiveClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; +} + +// 2.10 +def ACC_Cache : Directive<"cache"> {} + +// 2.14.1 +def ACC_Init : Directive<"init"> { + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; +} + +// 2.15.1 +def ACC_Routine : Directive<"routine"> { + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; + let requiredClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; +} + +// 2.14.3 +def ACC_Set : Directive<"set"> { + let allowedOnceClauses = [ + VersionedClause + ]; + let requiredClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; +} + +// 2.14.2 +def ACC_Shutdown : Directive<"shutdown"> { + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; +} + +// 2.14.4 +def ACC_Update : Directive<"update"> { + let allowedClauses = [ + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; + let requiredClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; +} + +// 2.16.3 +def ACC_Wait : Directive<"wait"> { + let allowedOnceClauses = [ + VersionedClause, + VersionedClause + ]; +} + +// 2.14.6 +def ACC_EnterData : Directive<"enter data"> { + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; + let requiredClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; +} + +// 2.14.7 +def ACC_ExitData : Directive<"exit data"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let requiredClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; +} +def ACC_HostData : Directive<"host_data"> { + let allowedClauses = [ + VersionedClause, + VersionedClause + ]; + let requiredClauses = [ + VersionedClause + ]; +} + +// 2.11 +def ACC_KernelsLoop : Directive<"kernels loop"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedExclusiveClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; +} + +// 2.11 +def ACC_ParallelLoop : Directive<"parallel loop"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedExclusiveClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; +} + +// 2.11 +def ACC_SerialLoop : Directive<"serial loop"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedExclusiveClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; +} + +def ACC_Unknown : Directive<"unknown"> { + let isDefault = 1; +} \ No newline at end of file diff --git a/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt b/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt new file mode 100644 index 0000000000000..31086ec9a47bc --- /dev/null +++ b/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt @@ -0,0 +1,4 @@ +set(LLVM_TARGET_DEFINITIONS ACC.td) +tablegen(LLVM ACC.h.inc --gen-directive-decl) +tablegen(LLVM ACC.cpp.inc --gen-directive-gen) +add_public_tablegen_target(acc_gen) diff --git a/llvm/include/llvm/Frontend/OpenMP/CMakeLists.txt b/llvm/include/llvm/Frontend/OpenMP/CMakeLists.txt index 69f503675940d..3ff89888bfd64 100644 --- a/llvm/include/llvm/Frontend/OpenMP/CMakeLists.txt +++ b/llvm/include/llvm/Frontend/OpenMP/CMakeLists.txt @@ -1,3 +1,4 @@ set(LLVM_TARGET_DEFINITIONS OMP.td) tablegen(LLVM OMP.h.inc --gen-directive-decl) +tablegen(LLVM OMP.cpp.inc --gen-directive-gen) add_public_tablegen_target(omp_gen) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index bd81eeb011272..2df1d2c2bec26 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -24,6 +24,7 @@ def OpenMP : DirectiveLanguage { let makeEnumAvailableInNamespace = 1; let enableBitmaskEnumInNamespace = 1; let includeHeader = "llvm/Frontend/OpenMP/OMP.h.inc"; + let clauseEnumSetClass = "OmpClauseSet"; } //===----------------------------------------------------------------------===// @@ -201,10 +202,7 @@ def OMPC_Notinbranch : Clause<"notinbranch"> {} def OMP_ThreadPrivate : Directive<"threadprivate"> {} def OMP_Parallel : Directive<"parallel"> { let allowedClauses = [ - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -212,11 +210,14 @@ def OMP_Parallel : Directive<"parallel"> { VersionedClause, VersionedClause ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + ]; } def OMP_Task : Directive<"task"> { let allowedClauses = [ - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -224,12 +225,16 @@ def OMP_Task : Directive<"task"> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; } def OMP_Simd : Directive<"simd"> { let allowedClauses = [ @@ -237,15 +242,17 @@ def OMP_Simd : Directive<"simd"> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; } def OMP_For : Directive<"for"> { let allowedClauses = [ @@ -273,7 +280,8 @@ def OMP_Do : Directive<"do"> { let allowedOnceClauses = [ VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause ]; } def OMP_Sections : Directive<"sections"> { @@ -304,7 +312,11 @@ def OMP_Critical : Directive<"critical"> { } def OMP_TaskYield : Directive<"taskyield"> {} def OMP_Barrier : Directive<"barrier"> {} -def OMP_TaskWait : Directive<"taskwait"> {} +def OMP_TaskWait : Directive<"taskwait"> { + let allowedClauses = [ + VersionedClause + ]; +} def OMP_TaskGroup : Directive<"taskgroup"> { let allowedClauses = [ VersionedClause, @@ -345,30 +357,34 @@ def OMP_Atomic : Directive<"atomic"> { def OMP_Target : Directive<"target"> { let allowedClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; } def OMP_Teams : Directive<"teams"> { let allowedClauses = [ - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; } def OMP_Cancel : Directive<"cancel"> { let allowedClauses = [ @@ -386,50 +402,64 @@ def OMP_Requires : Directive<"requires"> { } def OMP_TargetData : Directive<"target data"> { let allowedClauses = [ - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause + ]; + let requiredClauses = [ + VersionedClause + ]; } def OMP_TargetEnterData : Directive<"target enter data"> { let allowedClauses = [ + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause + VersionedClause ]; } def OMP_TargetExitData : Directive<"target exit data"> { let allowedClauses = [ - VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause + VersionedClause, + VersionedClause + ]; + let requiredClauses = [ + VersionedClause ]; } def OMP_TargetParallel : Directive<"target parallel"> { let allowedClauses = [ - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; } def OMP_TargetParallelFor : Directive<"target parallel for"> { let allowedClauses = [ @@ -459,27 +489,31 @@ def OMP_TargetParallelFor : Directive<"target parallel for"> { } def OMP_TargetParallelDo : Directive<"target parallel do"> { let allowedClauses = [ - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause ]; } def OMP_TargetUpdate : Directive<"target update"> { @@ -558,27 +592,29 @@ def OMP_ParallelForSimd : Directive<"parallel for simd"> { } def OMP_ParallelDoSimd : Directive<"parallel do simd"> { let allowedClauses = [ - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; } def OMP_ParallelMaster : Directive<"parallel master"> { let allowedClauses = [ @@ -597,7 +633,6 @@ def OMP_ParallelMaster : Directive<"parallel master"> { def OMP_ParallelSections : Directive<"parallel sections"> { let allowedClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -608,6 +643,9 @@ def OMP_ParallelSections : Directive<"parallel sections"> { VersionedClause, VersionedClause ]; + let allowedOnceClauses = [ + VersionedClause + ]; } def OMP_ForSimd : Directive<"for simd"> { let allowedClauses = [ @@ -643,7 +681,8 @@ def OMP_DoSimd : Directive<"do simd"> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause ]; } def OMP_CancellationPoint : Directive<"cancellation point"> {} @@ -653,53 +692,74 @@ def OMP_DeclareMapper : Directive<"declare mapper"> { VersionedClause ]; } -def OMP_DeclareSimd : Directive<"declare simd"> {} +def OMP_DeclareSimd : Directive<"declare simd"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause + ]; + let allowedExclusiveClauses = [ + VersionedClause, + VersionedClause + ]; +} def OMP_TaskLoop : Directive<"taskloop"> { let allowedClauses = [ - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedExclusiveClauses = [ + VersionedClause, + VersionedClause + ]; } def OMP_TaskLoopSimd : Directive<"taskloop simd"> { let allowedClauses = [ - VersionedClause, - VersionedClause, - VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, + VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, + VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedExclusiveClauses = [ + VersionedClause, + VersionedClause ]; } def OMP_Distribute : Directive<"distribute"> { @@ -707,10 +767,12 @@ def OMP_Distribute : Directive<"distribute"> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause + ]; } def OMP_DeclareTarget : Directive<"declare target"> {} def OMP_EndDeclareTarget : Directive<"end declare target"> {} @@ -735,21 +797,25 @@ def OMP_DistributeParallelFor : Directive<"distribute parallel for"> { } def OMP_DistributeParallelDo : Directive<"distribute parallel do"> { let allowedClauses = [ + VersionedClause, VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause + VersionedClause ]; } def OMP_DistributeParallelForSimd : Directive<"distribute parallel for simd"> { @@ -802,22 +868,31 @@ def OMP_DistributeParallelDoSimd : Directive<"distribute parallel do simd"> { } def OMP_DistributeSimd : Directive<"distribute simd"> { let allowedClauses = [ - VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause ]; } + def OMP_TargetParallelForSimd : Directive<"target parallel for simd"> { let allowedClauses = [ VersionedClause, @@ -880,27 +955,33 @@ def OMP_TargetParallelDoSimd : Directive<"target parallel do simd"> { } def OMP_TargetSimd : Directive<"target simd"> { let allowedClauses = [ - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, + VersionedClause, VersionedClause, + VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; } def OMP_TeamsDistribute : Directive<"teams distribute"> { let allowedClauses = [ @@ -919,26 +1000,29 @@ def OMP_TeamsDistribute : Directive<"teams distribute"> { } def OMP_TeamsDistributeSimd : Directive<"teams distribute simd"> { let allowedClauses = [ - VersionedClause, - VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ VersionedClause, + VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause + VersionedClause ]; } + def OMP_TeamsDistributeParallelForSimd : Directive<"teams distribute parallel for simd"> { let allowedClauses = [ @@ -968,27 +1052,29 @@ def OMP_TeamsDistributeParallelForSimd : def OMP_TeamsDistributeParallelDoSimd : Directive<"teams distribute parallel do simd"> { let allowedClauses = [ + VersionedClause, VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause + VersionedClause, ]; } def OMP_TeamsDistributeParallelFor : @@ -1016,68 +1102,78 @@ def OMP_TeamsDistributeParallelFor : def OMP_TeamsDistributeParallelDo : Directive<"teams distribute parallel do"> { let allowedClauses = [ + VersionedClause, VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; +let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause + VersionedClause ]; } def OMP_TargetTeams : Directive<"target teams"> { let allowedClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause + ]; + + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause ]; } def OMP_TargetTeamsDistribute : Directive<"target teams distribute"> { let allowedClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause + VersionedClause ]; } + def OMP_TargetTeamsDistributeParallelFor : Directive<"target teams distribute parallel for"> { let allowedClauses = [ @@ -1110,28 +1206,33 @@ def OMP_TargetTeamsDistributeParallelDo : Directive<"target teams distribute parallel do"> { let allowedClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause ]; } def OMP_TargetTeamsDistributeParallelForSimd : @@ -1170,63 +1271,69 @@ def OMP_TargetTeamsDistributeParallelForSimd : def OMP_TargetTeamsDistributeParallelDoSimd : Directive<"target teams distribute parallel do simd"> { let allowedClauses = [ - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause + VersionedClause ]; } def OMP_TargetTeamsDistributeSimd : Directive<"target teams distribute simd"> { let allowedClauses = [ - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, + VersionedClause, VersionedClause, - VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause ]; } def OMP_Allocate : Directive<"allocate"> { @@ -1359,7 +1466,22 @@ def OMP_Scan : Directive<"scan"> { } def OMP_BeginDeclareVariant : Directive<"begin declare variant"> {} def OMP_EndDeclareVariant : Directive<"end declare variant"> {} -def OMP_ParallelWorkshare : Directive<"parallel workshare"> {} +def OMP_ParallelWorkshare : Directive<"parallel workshare"> { + let allowedClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause + ]; +} def OMP_Workshare : Directive<"workshare"> {} def OMP_EndDo : Directive<"end do"> {} def OMP_EndDoSimd : Directive<"end do simd"> {} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 2a3a64a5f4ac2..95eed59f1b3d0 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -285,9 +285,14 @@ class OpenMPIRBuilder { /// Helper that contains information about regions we need to outline /// during finalization. struct OutlineInfo { - SmallVector Blocks; using PostOutlineCBTy = std::function; PostOutlineCBTy PostOutlineCB; + BasicBlock *EntryBB, *ExitBB; + + /// Collect all blocks in between EntryBB and ExitBB in both the given + /// vector and set. + void collectBlocks(SmallPtrSetImpl &BlockSet, + SmallVectorImpl &BlockVector); }; /// Collection of regions that need to be outlined during finalization. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index f286403e657c9..4f2fcb8af5d1d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -383,7 +383,8 @@ __OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32, - Int8Ptr, Int32, Int8Ptr) + /* kmp_task_t */ VoidPtr, Int32, + /* kmp_task_affinity_info_t */ VoidPtr) __OMP_RTL(omp_get_thread_num, false, Int32, ) __OMP_RTL(omp_get_num_threads, false, Int32, ) @@ -430,8 +431,7 @@ __OMP_RTL(__kmpc_reduce, false, Int32, IdentPtr, Int32, Int32, SizeTy, VoidPtr, ReduceFunctionPtr, KmpCriticalNamePtrTy) __OMP_RTL(__kmpc_reduce_nowait, false, Int32, IdentPtr, Int32, Int32, SizeTy, VoidPtr, ReduceFunctionPtr, KmpCriticalNamePtrTy) -__OMP_RTL(__kmpc_end_reduce, false, Void, IdentPtr, Int32, - KmpCriticalNamePtrTy) +__OMP_RTL(__kmpc_end_reduce, false, Void, IdentPtr, Int32, KmpCriticalNamePtrTy) __OMP_RTL(__kmpc_end_reduce_nowait, false, Void, IdentPtr, Int32, KmpCriticalNamePtrTy) @@ -514,10 +514,10 @@ __OMP_RTL(__kmpc_taskloop, false, Void, IdentPtr, /* Int */ Int32, VoidPtr, /* Int */ Int32, Int64, VoidPtr) __OMP_RTL(__kmpc_omp_target_task_alloc, false, /* kmp_task_t */ VoidPtr, IdentPtr, Int32, Int32, SizeTy, SizeTy, TaskRoutineEntryPtr, Int64) -__OMP_RTL(__kmpc_taskred_modifier_init, false, VoidPtr, IdentPtr, - /* Int */ Int32, /* Int */ Int32, /* Int */ Int32, VoidPtr) -__OMP_RTL(__kmpc_taskred_init, false, VoidPtr, /* Int */ Int32, - /* Int */ Int32, VoidPtr) +__OMP_RTL(__kmpc_taskred_modifier_init, false, /* kmp_taskgroup */ VoidPtr, + IdentPtr, /* Int */ Int32, /* Int */ Int32, /* Int */ Int32, VoidPtr) +__OMP_RTL(__kmpc_taskred_init, false, /* kmp_taskgroup */ VoidPtr, + /* Int */ Int32, /* Int */ Int32, VoidPtr) __OMP_RTL(__kmpc_task_reduction_modifier_fini, false, Void, IdentPtr, /* Int */ Int32, /* Int */ Int32) __OMP_RTL(__kmpc_task_reduction_get_th_data, false, VoidPtr, Int32, VoidPtr, @@ -557,39 +557,46 @@ __OMP_RTL(__kmpc_destroy_allocator, false, Void, /* Int */ Int32, /* omp_allocator_handle_t */ VoidPtr) __OMP_RTL(__kmpc_push_target_tripcount, false, Void, Int64, Int64) -__OMP_RTL(__tgt_target, false, Int32, Int64, VoidPtr, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr) -__OMP_RTL(__tgt_target_nowait, false, Int32, Int64, VoidPtr, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr) -__OMP_RTL(__tgt_target_teams, false, Int32, Int64, VoidPtr, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr, Int32, Int32) -__OMP_RTL(__tgt_target_teams_nowait, false, Int32, Int64, VoidPtr, Int32, - VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, Int32, Int32) +__OMP_RTL(__tgt_target_mapper, false, Int32, Int64, VoidPtr, Int32, VoidPtrPtr, + VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr) +__OMP_RTL(__tgt_target_nowait_mapper, false, Int32, Int64, VoidPtr, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr) +__OMP_RTL(__tgt_target_teams_mapper, false, Int32, Int64, VoidPtr, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, Int32, Int32) +__OMP_RTL(__tgt_target_teams_nowait_mapper, false, Int32, Int64, VoidPtr, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, Int32, Int32) __OMP_RTL(__tgt_register_requires, false, Void, Int64) -__OMP_RTL(__tgt_target_data_begin, false, Void, Int64, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr) -__OMP_RTL(__tgt_target_data_begin_nowait, false, Void, Int64, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr) -__OMP_RTL(__tgt_target_data_end, false, Void, Int64, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr) -__OMP_RTL(__tgt_target_data_end_nowait, false, Void, Int64, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr) -__OMP_RTL(__tgt_target_data_update, false, Void, Int64, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr) -__OMP_RTL(__tgt_target_data_update_nowait, false, Void, Int64, Int32, - VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr) +__OMP_RTL(__tgt_target_data_begin_mapper, false, Void, Int64, Int32, VoidPtrPtr, + VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr) +__OMP_RTL(__tgt_target_data_begin_nowait_mapper, false, Void, Int64, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr) +__OMP_RTL(__tgt_target_data_end_mapper, false, Void, Int64, Int32, VoidPtrPtr, + VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr) +__OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, Int64, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr) +__OMP_RTL(__tgt_target_data_update_mapper, false, Void, Int64, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr) +__OMP_RTL(__tgt_target_data_update_nowait_mapper, false, Void, Int64, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr) __OMP_RTL(__tgt_mapper_num_components, false, Int64, VoidPtr) __OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr, Int64, Int64) __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr, /* Int */ Int32, /* kmp_task_t */ VoidPtr) +/// Note that device runtime functions (in the following) do not necessarily +/// need attributes as we expect to see the definitions. +__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) +__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL #undef OMP_RTL +#define ParamAttrs(...) ArrayRef({__VA_ARGS__}) #define EnumAttr(Kind) Attribute::get(Ctx, Attribute::AttrKind::Kind) +#define EnumAttrInt(Kind, N) Attribute::get(Ctx, Attribute::AttrKind::Kind, N) #define AttributeSet(...) \ AttributeSet::get(Ctx, ArrayRef({__VA_ARGS__})) @@ -602,18 +609,93 @@ __OMP_RTL(__last, false, Void, ) __OMP_ATTRS_SET(GetterAttrs, OptimisticAttributes ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(ReadOnly), - EnumAttr(NoSync), EnumAttr(NoFree), EnumAttr(InaccessibleMemOnly)) + EnumAttr(NoSync), EnumAttr(NoFree), + EnumAttr(InaccessibleMemOnly), + EnumAttr(WillReturn)) : AttributeSet(EnumAttr(NoUnwind))) __OMP_ATTRS_SET(GetterArgWriteAttrs, OptimisticAttributes ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), - EnumAttr(NoFree), EnumAttr(InaccessibleMemOrArgMemOnly)) + EnumAttr(NoFree), + EnumAttr(InaccessibleMemOrArgMemOnly), + EnumAttr(WillReturn)) : AttributeSet(EnumAttr(NoUnwind))) __OMP_ATTRS_SET(SetterAttrs, OptimisticAttributes ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(WriteOnly), - EnumAttr(NoSync), EnumAttr(NoFree), EnumAttr(InaccessibleMemOnly)) + EnumAttr(NoSync), EnumAttr(NoFree), + EnumAttr(InaccessibleMemOnly), + EnumAttr(WillReturn)) + : AttributeSet(EnumAttr(NoUnwind))) + +__OMP_ATTRS_SET(DefaultAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), + EnumAttr(WillReturn), EnumAttr(NoFree)) + : AttributeSet(EnumAttr(NoUnwind))) + +__OMP_ATTRS_SET(BarrierAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind)) + : AttributeSet(EnumAttr(NoUnwind))) + +__OMP_ATTRS_SET(InaccessibleArgOnlyAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), + EnumAttr(InaccessibleMemOrArgMemOnly), + EnumAttr(WillReturn), EnumAttr(NoFree)) + : AttributeSet(EnumAttr(NoUnwind))) + +#if 0 +__OMP_ATTRS_SET(InaccessibleOnlyAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), + EnumAttr(InaccessibleMemOnly), + EnumAttr(WillReturn), EnumAttr(NoFree)) : AttributeSet(EnumAttr(NoUnwind))) +#endif + +__OMP_ATTRS_SET(AllocAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), + EnumAttr(WillReturn)) + : AttributeSet(EnumAttr(NoUnwind))) + +__OMP_ATTRS_SET(ForkAttrs, OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind)) + : AttributeSet(EnumAttr(NoUnwind))) + +__OMP_ATTRS_SET(ReadOnlyPtrAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoFree), + EnumAttr(NoCapture)) + : AttributeSet()) + +#if 0 +__OMP_ATTRS_SET(WriteOnlyPtrAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(WriteOnly), EnumAttr(NoFree), + EnumAttr(NoCapture)) + : AttributeSet()) +#endif + +__OMP_ATTRS_SET(ArgPtrAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoCapture), EnumAttr(NoFree)) + : AttributeSet()) + +__OMP_ATTRS_SET(ReturnPtrAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoAlias)) + : AttributeSet()) + +#if 0 +__OMP_ATTRS_SET(ReturnAlignedPtrAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoAlias), EnumAttrInt(Alignment, 8), + EnumAttrInt(DereferenceableOrNull, 8)) + : AttributeSet()) +#endif #undef __OMP_ATTRS_SET #undef OMP_ATTRS_SET @@ -625,295 +707,309 @@ __OMP_ATTRS_SET(SetterAttrs, #define __OMP_RTL_ATTRS(Name, FnAttrSet, RetAttrSet, ArgAttrSets) \ OMP_RTL_ATTRS(OMPRTL_##Name, FnAttrSet, RetAttrSet, ArgAttrSets) -__OMP_RTL_ATTRS(__kmpc_barrier, AttributeSet(), AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_cancel, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_cancel_barrier, AttributeSet(), AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_flush, AttributeSet(), AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_fork_call, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_omp_taskwait, AttributeSet(), AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_omp_taskyield, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_push_num_threads, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_push_proc_bind, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_serialized_parallel, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_serialized_parallel, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_cancellation, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_nested, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_schedule, GetterArgWriteAttrs, AttributeSet(), - ArrayRef( - {AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)), - AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly))})) -__OMP_RTL_ATTRS(omp_get_thread_limit, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_cancel, InaccessibleArgOnlyAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_cancel_barrier, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_flush, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_fork_call, ForkAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_taskwait, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_taskyield, InaccessibleArgOnlyAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_push_num_threads, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_push_proc_bind, InaccessibleArgOnlyAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_serialized_parallel, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_end_serialized_parallel, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_reg_task_with_affinity, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs, + AttributeSet(), ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_cancellation, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_nested, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS( + omp_get_schedule, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)), + AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)))) +__OMP_RTL_ATTRS(omp_get_thread_limit, GetterAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(omp_get_supported_active_levels, GetterAttrs, AttributeSet(), - {}) -__OMP_RTL_ATTRS(omp_get_max_active_levels, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_level, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_team_size, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_active_level, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_in_final, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_proc_bind, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_num_places, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_num_procs, GetterAttrs, AttributeSet(), {}) + ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_max_active_levels, GetterAttrs, AttributeSet(), + ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_level, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, GetterAttrs, AttributeSet(), + ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_team_size, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_active_level, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_in_final, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_proc_bind, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_num_places, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_num_procs, GetterAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(omp_get_place_proc_ids, GetterArgWriteAttrs, AttributeSet(), - ArrayRef({AttributeSet(), - AttributeSet(EnumAttr(NoCapture), - EnumAttr(WriteOnly))})) -__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(), {}) - -__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_set_nested, SetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_set_schedule, SetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_set_max_active_levels, SetterAttrs, AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_master, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_master, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_critical, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_critical_with_hint, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_critical, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_begin, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_reduce, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_reduce_nowait, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_reduce, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_reduce_nowait, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_ordered, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_ordered, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_for_static_init_4, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_for_static_init_4u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_for_static_init_8, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_for_static_init_8u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_for_static_fini, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_init_4, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_init_4u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_init_8, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_init_8u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_next_4, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_next_4u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_next_8, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_next_8u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_team_static_init_4, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_team_static_init_4u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_team_static_init_8, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_team_static_init_8u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_single, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_single, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_omp_task_alloc, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_omp_task, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_taskgroup, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_taskgroup, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_omp_task_begin_if0, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_omp_task_complete_if0, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_omp_task_with_deps, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_taskloop, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_omp_target_task_alloc, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_taskred_modifier_init, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_taskred_init, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_fini, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_task_reduction_get_th_data, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_task_reduction_init, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_init, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_proxy_task_completed_ooo, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_omp_wait_deps, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_cancellationpoint, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_fork_teams, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_push_num_teams, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_copyprivate, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_threadprivate_cached, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_threadprivate_register, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_doacross_init, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_doacross_post, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_doacross_wait, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_doacross_fini, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_alloc, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_free, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_init_allocator, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_destroy_allocator, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_push_target_tripcount, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_nowait, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_teams, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_teams_nowait, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_register_requires, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_data_begin, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_data_end, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_data_end_nowait, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_data_update, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_data_update_nowait, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_mapper_num_components, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_push_mapper_component, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) + ParamAttrs(AttributeSet(), AttributeSet(EnumAttr(NoCapture), + EnumAttr(WriteOnly)))) +__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(), + ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(), + ParamAttrs()) + +__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_set_nested, SetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_set_schedule, SetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_set_max_active_levels, SetterAttrs, AttributeSet(), + ParamAttrs()) + +__OMP_RTL_ATTRS(__kmpc_master, InaccessibleArgOnlyAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_end_master, InaccessibleArgOnlyAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_critical, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_critical_with_hint, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_end_critical, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet())) + +__OMP_RTL_ATTRS(__kmpc_begin, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_end, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_reduce, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + AttributeSet(), ReadOnlyPtrAttrs, AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_reduce_nowait, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + AttributeSet(), ReadOnlyPtrAttrs, AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_end_reduce, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_end_reduce_nowait, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet())) + +__OMP_RTL_ATTRS(__kmpc_ordered, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_end_ordered, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_for_static_init_4, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_for_static_init_4u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_for_static_init_8, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_for_static_init_8u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_for_static_fini, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4u, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8u, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_init_4, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_init_4u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_init_8, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_init_8u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_next_4, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_next_4u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_next_8, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_next_8u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4u, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8u, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_team_static_init_4, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_team_static_init_4u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_team_static_init_8, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_team_static_init_8u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4u, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8u, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + ArgPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_single, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_end_single, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_omp_task_alloc, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + AttributeSet(), AttributeSet(), ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_task, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_end_taskgroup, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_taskgroup, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_task_begin_if0, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_task_complete_if0, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_task_with_deps, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + AttributeSet(), ReadOnlyPtrAttrs, AttributeSet(), + ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_taskloop, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + AttributeSet(), ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet(), AttributeSet(), + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_omp_target_task_alloc, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + AttributeSet(), AttributeSet(), ReadOnlyPtrAttrs, + AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_taskred_modifier_init, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_taskred_init, DefaultAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_fini, BarrierAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_task_reduction_get_th_data, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_task_reduction_init, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_init, DefaultAttrs, + ReturnPtrAttrs, ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_proxy_task_completed_ooo, DefaultAttrs, AttributeSet(), + ParamAttrs()) + +__OMP_RTL_ATTRS(__kmpc_omp_wait_deps, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_cancellationpoint, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_fork_teams, ForkAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_push_num_teams, InaccessibleArgOnlyAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_copyprivate, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_threadprivate_cached, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_threadprivate_register, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs, + ReadOnlyPtrAttrs, ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_doacross_init, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_doacross_post, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_doacross_wait, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, {}) +__OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), {}) + +__OMP_RTL_ATTRS(__kmpc_init_allocator, DefaultAttrs, ReturnPtrAttrs, {}) +__OMP_RTL_ATTRS(__kmpc_destroy_allocator, AllocAttrs, AttributeSet(), {}) + +__OMP_RTL_ATTRS(__kmpc_push_target_tripcount, SetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_mapper, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_nowait_mapper, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_teams_mapper, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_teams_nowait_mapper, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_register_requires, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_data_begin_mapper, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper, ForkAttrs, + AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_data_end_mapper, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_data_end_nowait_mapper, ForkAttrs, + AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_data_update_mapper, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper, ForkAttrs, + AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_mapper_num_components, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_push_mapper_component, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs, + ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs)) #undef __OMP_RTL_ATTRS #undef OMP_RTL_ATTRS #undef AttributeSet #undef EnumAttr +#undef EnumAttrInt +#undef ParamAttrs ///} @@ -977,6 +1073,7 @@ __OMP_CANCEL_KIND(taskgroup, 4) __OMP_DEFAULT_KIND(none) __OMP_DEFAULT_KIND(shared) +__OMP_DEFAULT_KIND(firstprivate) __OMP_DEFAULT_KIND(unknown) #undef __OMP_DEFAULT_KIND diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h index af469e8a5d1aa..2ca18c6103495 100644 --- a/llvm/include/llvm/IR/Argument.h +++ b/llvm/include/llvm/IR/Argument.h @@ -72,8 +72,9 @@ class Argument final : public Value { bool hasSwiftErrorAttr() const; /// Return true if this argument has the byval, inalloca, or preallocated - /// attribute. These attributes represent arguments being passed by value. - bool hasPassPointeeByValueAttr() const; + /// attribute. These attributes represent arguments being passed by value, + /// with an associated copy between the caller and callee + bool hasPassPointeeByValueCopyAttr() const; /// If this argument satisfies has hasPassPointeeByValueAttr, return the /// in-memory ABI size copied to the stack for the call. Otherwise, return 0. diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index 3579c9f1ee332..8e2dba9b2417c 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -308,6 +308,7 @@ class ConstantFP final : public ConstantData { /// Return true if Ty is big enough to represent V. static bool isValueValidForType(Type *Ty, const APFloat &V); inline const APFloat &getValueAPF() const { return Val; } + inline const APFloat &getValue() const { return Val; } /// Return true if the value is positive or negative zero. bool isZero() const { return Val.isZero(); } diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h index a8e8a7915b2a4..b7e0ecde8629e 100644 --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -213,7 +213,7 @@ class DiagnosticInfoResourceLimit : public DiagnosticInfo { }; class DiagnosticInfoStackSize : public DiagnosticInfoResourceLimit { - virtual void anchor() override; + void anchor() override; public: DiagnosticInfoStackSize(const Function &Fn, uint64_t StackSize, DiagnosticSeverity Severity = DS_Warning, @@ -364,7 +364,7 @@ class DiagnosticLocation { /// Common features for diagnostics with an associated location. class DiagnosticInfoWithLocationBase : public DiagnosticInfo { - virtual void anchor() override; + void anchor() override; public: /// \p Fn is the function where the diagnostic is being emitted. \p Loc is /// the location information to use in the diagnostic. @@ -611,7 +611,7 @@ operator<<(RemarkT &R, /// Common features for diagnostics dealing with optimization remarks /// that are used by IR passes. class DiagnosticInfoIROptimization : public DiagnosticInfoOptimizationBase { - virtual void anchor() override; + void anchor() override; public: /// \p PassName is the name of the pass emitting this diagnostic. \p /// RemarkName is a textual identifier for the remark (single-word, @@ -832,7 +832,7 @@ class OptimizationRemarkAnalysis : public DiagnosticInfoIROptimization { /// Diagnostic information for optimization analysis remarks related to /// floating-point non-commutativity. class OptimizationRemarkAnalysisFPCommute : public OptimizationRemarkAnalysis { - virtual void anchor(); + void anchor() override; public: /// \p PassName is the name of the pass emitting this diagnostic. If this name /// matches the regular expression given in -Rpass-analysis=, then the @@ -874,7 +874,7 @@ class OptimizationRemarkAnalysisFPCommute : public OptimizationRemarkAnalysis { /// Diagnostic information for optimization analysis remarks related to /// pointer aliasing. class OptimizationRemarkAnalysisAliasing : public OptimizationRemarkAnalysis { - virtual void anchor(); + void anchor() override; public: /// \p PassName is the name of the pass emitting this diagnostic. If this name /// matches the regular expression given in -Rpass-analysis=, then the diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index ee66abc3eaed6..bb4ec13c7610f 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -830,9 +830,11 @@ class Function : public GlobalObject, public ilist_node { /// hasAddressTaken - returns true if there are any uses of this function /// other than direct calls or invokes to it, or blockaddress expressions. - /// Optionally passes back an offending user for diagnostic purposes. + /// Optionally passes back an offending user for diagnostic purposes and + /// ignores callback uses. /// - bool hasAddressTaken(const User** = nullptr) const; + bool hasAddressTaken(const User ** = nullptr, + bool IgnoreCallbackUses = false) const; /// isDefTriviallyDead - Return true if it is trivially safe to remove /// this function definition from the module (because it isn't externally diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 94741229a2a75..4918ea876df65 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -458,6 +458,9 @@ def int_read_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty], [IntrReadMem], "llvm.read_register">; def int_write_register : Intrinsic<[], [llvm_metadata_ty, llvm_anyint_ty], [], "llvm.write_register">; +def int_read_volatile_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty], + [IntrHasSideEffects], + "llvm.read_volatile_register">; // Gets the address of the local variable area. This is typically a copy of the // stack, frame, or base pointer depending on the type of prologue. @@ -1458,7 +1461,7 @@ def int_matrix_multiply def int_matrix_column_major_load : Intrinsic<[llvm_anyvector_ty], - [LLVMAnyPointerType>, llvm_i64_ty, llvm_i1_ty, + [LLVMPointerToElt<0>, llvm_i64_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrReadMem, NoCapture>, ImmArg>, ImmArg>, @@ -1466,7 +1469,7 @@ def int_matrix_column_major_load def int_matrix_column_major_store : Intrinsic<[], - [llvm_anyvector_ty, LLVMAnyPointerType>, + [llvm_anyvector_ty, LLVMPointerToElt<0>, llvm_i64_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrWriteMem, WriteOnly>, NoCapture>, diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 12f4a3ce8e28f..2abb6b4e55fe7 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -468,27 +468,27 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". // P10 Vector Insert. def int_ppc_altivec_vinsblx : GCCBuiltin<"__builtin_altivec_vinsblx">, Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_ppc_altivec_vinsbrx : GCCBuiltin<"__builtin_altivec_vinsbrx">, Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_ppc_altivec_vinshlx : GCCBuiltin<"__builtin_altivec_vinshlx">, Intrinsic<[llvm_v8i16_ty], - [llvm_v8i16_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_ppc_altivec_vinshrx : GCCBuiltin<"__builtin_altivec_vinshrx">, Intrinsic<[llvm_v8i16_ty], - [llvm_v8i16_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_ppc_altivec_vinswlx : GCCBuiltin<"__builtin_altivec_vinswlx">, Intrinsic<[llvm_v4i32_ty], - [llvm_v4i32_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_ppc_altivec_vinswrx : GCCBuiltin<"__builtin_altivec_vinswrx">, Intrinsic<[llvm_v4i32_ty], - [llvm_v4i32_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_ppc_altivec_vinsdlx : GCCBuiltin<"__builtin_altivec_vinsdlx">, Intrinsic<[llvm_v2i64_ty], @@ -525,7 +525,7 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". // P10 Vector Insert with immediate. def int_ppc_altivec_vinsw : Intrinsic<[llvm_v4i32_ty], - [llvm_v4i32_ty, llvm_i64_ty, llvm_i32_ty], + [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_ppc_altivec_vinsd : Intrinsic<[llvm_v2i64_ty], @@ -1067,6 +1067,9 @@ def int_ppc_vsx_xxinsertw : PowerPC_VSX_Intrinsic<"xxinsertw",[llvm_v4i32_ty], [llvm_v4i32_ty,llvm_v2i64_ty,llvm_i32_ty], [IntrNoMem]>; +def int_ppc_vsx_xvtlsbb : + PowerPC_VSX_Intrinsic<"xvtlsbb", [llvm_i32_ty], + [llvm_v16i8_ty, llvm_i1_ty], [IntrNoMem]>; def int_ppc_vsx_xxeval : PowerPC_VSX_Intrinsic<"xxeval", [llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h index 515b86ba1e78f..5d04b3563dd5d 100644 --- a/llvm/include/llvm/IR/MatrixBuilder.h +++ b/llvm/include/llvm/IR/MatrixBuilder.h @@ -69,7 +69,7 @@ template class MatrixBuilder { Value *Ops[] = {DataPtr, Stride, B.getInt1(IsVolatile), B.getInt32(Rows), B.getInt32(Columns)}; - Type *OverloadedTypes[] = {RetType, PtrTy}; + Type *OverloadedTypes[] = {RetType}; Function *TheFn = Intrinsic::getDeclaration( getModule(), Intrinsic::matrix_column_major_load, OverloadedTypes); @@ -92,7 +92,7 @@ template class MatrixBuilder { Value *Ops[] = {Matrix, Ptr, Stride, B.getInt1(IsVolatile), B.getInt32(Rows), B.getInt32(Columns)}; - Type *OverloadedTypes[] = {Matrix->getType(), Ptr->getType()}; + Type *OverloadedTypes[] = {Matrix->getType()}; Function *TheFn = Intrinsic::getDeclaration( getModule(), Intrinsic::matrix_column_major_store, OverloadedTypes); diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 4cfd4e916200b..12a829b14e36a 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -894,7 +894,8 @@ struct TypeTestResolution { Single, ///< Single element (last example in "Short Inline Bit Vectors") AllOnes, ///< All-ones bit vector ("Eliminating Bit Vector Checks for /// All-Ones Bit Vectors") - } TheKind = Unsat; + Unknown, ///< Unknown (analysis not performed, don't lower) + } TheKind = Unknown; /// Range of size-1 expressed as a bit width. For example, if the size is in /// range [1,256], this number will be 8. This helps generate the most compact @@ -1092,7 +1093,7 @@ class ModuleSummaryIndex { // in the way some record are interpreted, like flags for instance. // Note that incrementing this may require changes in both BitcodeReader.cpp // and BitcodeWriter.cpp. - static constexpr uint64_t BitcodeSummaryVersion = 8; + static constexpr uint64_t BitcodeSummaryVersion = 9; // Regular LTO module name for ASM writer static constexpr const char *getRegularLTOModuleName() { diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h index 756388ce54988..f7fa16df11003 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -17,6 +17,7 @@ namespace yaml { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, TypeTestResolution::Kind &value) { + io.enumCase(value, "Unknown", TypeTestResolution::Unknown); io.enumCase(value, "Unsat", TypeTestResolution::Unsat); io.enumCase(value, "ByteArray", TypeTestResolution::ByteArray); io.enumCase(value, "Inline", TypeTestResolution::Inline); diff --git a/llvm/include/llvm/IR/PassInstrumentation.h b/llvm/include/llvm/IR/PassInstrumentation.h index bcc434548e670..37390e4e682ba 100644 --- a/llvm/include/llvm/IR/PassInstrumentation.h +++ b/llvm/include/llvm/IR/PassInstrumentation.h @@ -129,6 +129,26 @@ class PassInstrumentationCallbacks { class PassInstrumentation { PassInstrumentationCallbacks *Callbacks; + // Template argument PassT of PassInstrumentation::runBeforePass could be two + // kinds: (1) a regular pass inherited from PassInfoMixin (happen when + // creating a adaptor pass for a regular pass); (2) a type-erased PassConcept + // created from (1). Here we want to make case (1) skippable unconditionally + // since they are regular passes. We call PassConcept::isRequired to decide + // for case (2). + template + using has_required_t = decltype(std::declval().isRequired()); + + template + static std::enable_if_t::value, bool> + isRequired(const PassT &Pass) { + return Pass.isRequired(); + } + template + static std::enable_if_t::value, bool> + isRequired(const PassT &Pass) { + return false; + } + public: /// Callbacks object is not owned by PassInstrumentation, its life-time /// should at least match the life-time of corresponding @@ -148,6 +168,7 @@ class PassInstrumentation { bool ShouldRun = true; for (auto &C : Callbacks->BeforePassCallbacks) ShouldRun &= C(Pass.name(), llvm::Any(&IR)); + ShouldRun = ShouldRun || isRequired(Pass); return ShouldRun; } diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h index 4d5f292ba9a13..f503871e23609 100644 --- a/llvm/include/llvm/IR/PassManager.h +++ b/llvm/include/llvm/IR/PassManager.h @@ -559,6 +559,8 @@ class PassManager : public PassInfoMixin< Passes.emplace_back(new PassModelT(std::move(Pass))); } + static bool isRequired() { return true; } + private: using PassConceptT = detail::PassConcept; @@ -1260,6 +1262,8 @@ class ModuleToFunctionPassAdaptor return PA; } + static bool isRequired() { return true; } + private: FunctionPassT Pass; }; diff --git a/llvm/include/llvm/IR/PassManagerInternal.h b/llvm/include/llvm/IR/PassManagerInternal.h index c602c0b5cc20a..986ed0b5a7ac6 100644 --- a/llvm/include/llvm/IR/PassManagerInternal.h +++ b/llvm/include/llvm/IR/PassManagerInternal.h @@ -48,6 +48,12 @@ struct PassConcept { /// Polymorphic method to access the name of a pass. virtual StringRef name() const = 0; + + /// Polymorphic method to to let a pass optionally exempted from skipping by + /// PassInstrumentation. + /// To opt-in, pass should implement `static bool isRequired()`. It's no-op + /// to have `isRequired` always return false since that is the default. + virtual bool isRequired() const = 0; }; /// A template wrapper used to implement the polymorphic API. @@ -81,6 +87,22 @@ struct PassModel : PassConcept { StringRef name() const override { return PassT::name(); } + template + using has_required_t = decltype(std::declval().isRequired()); + + template + static std::enable_if_t::value, bool> + passIsRequiredImpl() { + return T::isRequired(); + } + template + static std::enable_if_t::value, bool> + passIsRequiredImpl() { + return false; + } + + bool isRequired() const override { return passIsRequiredImpl(); } + PassT Pass; }; diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 98182bc3d85d9..4c11bc82510b4 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -262,17 +262,23 @@ template inline constantint_match m_ConstantInt() { return constantint_match(); } -/// This helper class is used to match scalar and fixed width vector integer -/// constants that satisfy a specified predicate. -/// For vector constants, undefined elements are ignored. -template struct cst_pred_ty : public Predicate { +/// This helper class is used to match constant scalars, vector splats, +/// and fixed width vectors that satisfy a specified predicate. +/// For fixed width vector constants, undefined elements are ignored. +template +struct cstval_pred_ty : public Predicate { template bool match(ITy *V) { - if (const auto *CI = dyn_cast(V)) - return this->isValue(CI->getValue()); - if (const auto *FVTy = dyn_cast(V->getType())) { + if (const auto *CV = dyn_cast(V)) + return this->isValue(CV->getValue()); + if (const auto *VTy = dyn_cast(V->getType())) { if (const auto *C = dyn_cast(V)) { - if (const auto *CI = dyn_cast_or_null(C->getSplatValue())) - return this->isValue(CI->getValue()); + if (const auto *CV = dyn_cast_or_null(C->getSplatValue())) + return this->isValue(CV->getValue()); + + // Number of elements of a scalable vector unknown at compile time + auto *FVTy = dyn_cast(VTy); + if (!FVTy) + return false; // Non-splat vector constant: check each element for a match. unsigned NumElts = FVTy->getNumElements(); @@ -284,8 +290,8 @@ template struct cst_pred_ty : public Predicate { return false; if (isa(Elt)) continue; - auto *CI = dyn_cast(Elt); - if (!CI || !this->isValue(CI->getValue())) + auto *CV = dyn_cast(Elt); + if (!CV || !this->isValue(CV->getValue())) return false; HasNonUndefElements = true; } @@ -296,6 +302,14 @@ template struct cst_pred_ty : public Predicate { } }; +/// specialization of cstval_pred_ty for ConstantInt +template +using cst_pred_ty = cstval_pred_ty; + +/// specialization of cstval_pred_ty for ConstantFP +template +using cstfp_pred_ty = cstval_pred_ty; + /// This helper class is used to match scalar and vector constants that /// satisfy a specified predicate, and bind them to an APInt. template struct api_pred_ty : public Predicate { @@ -321,44 +335,6 @@ template struct api_pred_ty : public Predicate { } }; -/// This helper class is used to match scalar and vector floating-point -/// constants that satisfy a specified predicate. -/// For vector constants, undefined elements are ignored. -template struct cstfp_pred_ty : public Predicate { - template bool match(ITy *V) { - if (const auto *CF = dyn_cast(V)) - return this->isValue(CF->getValueAPF()); - if (V->getType()->isVectorTy()) { - if (const auto *C = dyn_cast(V)) { - if (const auto *CF = dyn_cast_or_null(C->getSplatValue())) - return this->isValue(CF->getValueAPF()); - - // Number of elements of a scalable vector unknown at compile time - if (isa(V->getType())) - return false; - - // Non-splat vector constant: check each element for a match. - unsigned NumElts = cast(V->getType())->getNumElements(); - assert(NumElts != 0 && "Constant vector with no elements?"); - bool HasNonUndefElements = false; - for (unsigned i = 0; i != NumElts; ++i) { - Constant *Elt = C->getAggregateElement(i); - if (!Elt) - return false; - if (isa(Elt)) - continue; - auto *CF = dyn_cast(Elt); - if (!CF || !this->isValue(CF->getValueAPF())) - return false; - HasNonUndefElements = true; - } - return HasNonUndefElements; - } - } - return false; - } -}; - /////////////////////////////////////////////////////////////////////////////// // // Encapsulate constant value queries for use in templated predicate matchers. diff --git a/llvm/include/llvm/MC/MCELFObjectWriter.h b/llvm/include/llvm/MC/MCELFObjectWriter.h index faf5e330afcf0..8f78b99d37949 100644 --- a/llvm/include/llvm/MC/MCELFObjectWriter.h +++ b/llvm/include/llvm/MC/MCELFObjectWriter.h @@ -65,7 +65,7 @@ class MCELFObjectTargetWriter : public MCObjectTargetWriter { public: virtual ~MCELFObjectTargetWriter() = default; - virtual Triple::ObjectFormatType getFormat() const { return Triple::ELF; } + Triple::ObjectFormatType getFormat() const override { return Triple::ELF; } static bool classof(const MCObjectTargetWriter *W) { return W->getFormat() == Triple::ELF; } diff --git a/llvm/include/llvm/MC/MCMachObjectWriter.h b/llvm/include/llvm/MC/MCMachObjectWriter.h index bff8808cf4ffd..38ba68b78fe13 100644 --- a/llvm/include/llvm/MC/MCMachObjectWriter.h +++ b/llvm/include/llvm/MC/MCMachObjectWriter.h @@ -45,7 +45,7 @@ class MCMachObjectTargetWriter : public MCObjectTargetWriter { public: virtual ~MCMachObjectTargetWriter(); - virtual Triple::ObjectFormatType getFormat() const { return Triple::MachO; } + Triple::ObjectFormatType getFormat() const override { return Triple::MachO; } static bool classof(const MCObjectTargetWriter *W) { return W->getFormat() == Triple::MachO; } diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h index f37f2ad760029..c3f3ae5de921e 100644 --- a/llvm/include/llvm/MC/MCObjectStreamer.h +++ b/llvm/include/llvm/MC/MCObjectStreamer.h @@ -171,9 +171,9 @@ class MCObjectStreamer : public MCStreamer { void emitTPRel64Value(const MCExpr *Value) override; void emitGPRel32Value(const MCExpr *Value) override; void emitGPRel64Value(const MCExpr *Value) override; - bool emitRelocDirective(const MCExpr &Offset, StringRef Name, - const MCExpr *Expr, SMLoc Loc, - const MCSubtargetInfo &STI) override; + Optional> + emitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr, + SMLoc Loc, const MCSubtargetInfo &STI) override; using MCStreamer::emitFill; void emitFill(const MCExpr &NumBytes, uint64_t FillValue, SMLoc Loc = SMLoc()) override; diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h index 2040089759594..a68066e0f50b5 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h @@ -170,8 +170,12 @@ class MCAsmParser { virtual bool isParsingMasm() const { return false; } - virtual bool LookUpFieldOffset(StringRef Base, StringRef Member, - unsigned &Offset) { + virtual bool lookUpField(StringRef Name, StringRef &Type, + unsigned &Offset) const { + return true; + } + virtual bool lookUpField(StringRef Base, StringRef Member, StringRef &Type, + unsigned &Offset) const { return true; } diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index d7255a22e9415..484c62538366e 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -1014,13 +1014,12 @@ class MCStreamer { virtual void emitSyntaxDirective(); - /// Emit a .reloc directive. - /// Returns true if the relocation could not be emitted because Name is not - /// known. - virtual bool emitRelocDirective(const MCExpr &Offset, StringRef Name, - const MCExpr *Expr, SMLoc Loc, - const MCSubtargetInfo &STI) { - return true; + /// Record a relocation described by the .reloc directive. Return None if + /// succeeded. Otherwise, return a pair (Name is invalid, error message). + virtual Optional> + emitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr, + SMLoc Loc, const MCSubtargetInfo &STI) { + return None; } virtual void emitAddrsig() {} diff --git a/llvm/include/llvm/MC/MCWasmObjectWriter.h b/llvm/include/llvm/MC/MCWasmObjectWriter.h index fbb68549b5037..382818ad6867a 100644 --- a/llvm/include/llvm/MC/MCWasmObjectWriter.h +++ b/llvm/include/llvm/MC/MCWasmObjectWriter.h @@ -28,7 +28,7 @@ class MCWasmObjectTargetWriter : public MCObjectTargetWriter { public: virtual ~MCWasmObjectTargetWriter(); - virtual Triple::ObjectFormatType getFormat() const { return Triple::Wasm; } + Triple::ObjectFormatType getFormat() const override { return Triple::Wasm; } static bool classof(const MCObjectTargetWriter *W) { return W->getFormat() == Triple::Wasm; } diff --git a/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h b/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h index 3fe124fd7f1c4..3015efe7389e4 100644 --- a/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h +++ b/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h @@ -31,7 +31,7 @@ class raw_pwrite_stream; public: virtual ~MCWinCOFFObjectTargetWriter() = default; - virtual Triple::ObjectFormatType getFormat() const { return Triple::COFF; } + Triple::ObjectFormatType getFormat() const override { return Triple::COFF; } static bool classof(const MCObjectTargetWriter *W) { return W->getFormat() == Triple::COFF; } diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h index c7f2a8e709f0b..b44dd3f486614 100644 --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -205,16 +205,18 @@ class ELFFile { if (getHeader()->e_phnum && getHeader()->e_phentsize != sizeof(Elf_Phdr)) return createError("invalid e_phentsize: " + Twine(getHeader()->e_phentsize)); - if (getHeader()->e_phoff + - (getHeader()->e_phnum * getHeader()->e_phentsize) > - getBufSize()) + + uint64_t HeadersSize = + (uint64_t)getHeader()->e_phnum * getHeader()->e_phentsize; + uint64_t PhOff = getHeader()->e_phoff; + if (PhOff + HeadersSize < PhOff || PhOff + HeadersSize > getBufSize()) return createError("program headers are longer than binary of size " + Twine(getBufSize()) + ": e_phoff = 0x" + Twine::utohexstr(getHeader()->e_phoff) + ", e_phnum = " + Twine(getHeader()->e_phnum) + ", e_phentsize = " + Twine(getHeader()->e_phentsize)); - auto *Begin = - reinterpret_cast(base() + getHeader()->e_phoff); + + auto *Begin = reinterpret_cast(base() + PhOff); return makeArrayRef(Begin, Begin + getHeader()->e_phnum); } diff --git a/llvm/include/llvm/Object/Error.h b/llvm/include/llvm/Object/Error.h index 1e109fa131c8a..07744188444ac 100644 --- a/llvm/include/llvm/Object/Error.h +++ b/llvm/include/llvm/Object/Error.h @@ -51,7 +51,7 @@ inline std::error_code make_error_code(object_error e) { /// Currently inherits from ECError for easy interoperability with /// std::error_code, but this will be removed in the future. class BinaryError : public ErrorInfo { - virtual void anchor(); + void anchor() override; public: static char ID; BinaryError() { diff --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h index dc90c891ab95f..05a04af347fc3 100644 --- a/llvm/include/llvm/Object/Wasm.h +++ b/llvm/include/llvm/Object/Wasm.h @@ -282,6 +282,7 @@ class WasmObjectFile : public ObjectFile { bool HasLinkingSection = false; bool HasDylinkSection = false; bool SeenCodeSection = false; + bool HasMemory64 = false; wasm::WasmLinkingData LinkingData; uint32_t NumImportedGlobals = 0; uint32_t NumImportedFunctions = 0; diff --git a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h index 0ec3f90e1686a..1552ec7c178e1 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h +++ b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h @@ -38,6 +38,7 @@ Error emitPubSection(raw_ostream &OS, const PubSection &Sect, Error emitDebugInfo(raw_ostream &OS, const Data &DI); Error emitDebugLine(raw_ostream &OS, const Data &DI); Error emitDebugAddr(raw_ostream &OS, const Data &DI); +Error emitDebugStrOffsets(raw_ostream &OS, const Data &DI); Expected>> emitDebugSections(StringRef YAMLString, bool ApplyFixups = false, diff --git a/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/llvm/include/llvm/ObjectYAML/DWARFYAML.h index 259152ff5d035..244a5ff19298e 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFYAML.h +++ b/llvm/include/llvm/ObjectYAML/DWARFYAML.h @@ -177,11 +177,20 @@ struct AddrTableEntry { std::vector SegAddrPairs; }; +struct StringOffsetsTable { + dwarf::DwarfFormat Format; + Optional Length; + yaml::Hex16 Version; + yaml::Hex16 Padding; + std::vector Offsets; +}; + struct Data { bool IsLittleEndian; - bool Is64bit; + bool Is64BitAddrSize; std::vector AbbrevDecls; std::vector DebugStrings; + Optional> DebugStrOffsets; std::vector ARanges; std::vector DebugRanges; std::vector DebugAddr; @@ -218,6 +227,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::LineTable) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::LineTableOpcode) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::SegAddrPair) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::AddrTableEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::StringOffsetsTable) namespace llvm { namespace yaml { @@ -290,6 +300,10 @@ template <> struct MappingTraits { static void mapping(IO &IO, DWARFYAML::AddrTableEntry &AddrTable); }; +template <> struct MappingTraits { + static void mapping(IO &IO, DWARFYAML::StringOffsetsTable &StrOffsetsTable); +}; + template <> struct MappingTraits { static void mapping(IO &IO, DWARFYAML::InitialLength &DWARF); }; diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index e7a2411a3e2aa..b1ffb20681ea8 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -81,10 +81,13 @@ struct FileHeader { ELF_EF Flags; llvm::yaml::Hex64 Entry; - Optional SHEntSize; - Optional SHOff; - Optional SHNum; - Optional SHStrNdx; + Optional EPhOff; + Optional EPhEntSize; + Optional EPhNum; + Optional EShEntSize; + Optional EShOff; + Optional EShNum; + Optional EShStrNdx; }; struct SectionHeader { @@ -94,7 +97,7 @@ struct SectionHeader { struct SectionHeaderTable { Optional> Sections; Optional> Excluded; - bool NoHeaders; + Optional NoHeaders; }; struct SectionName { diff --git a/llvm/include/llvm/Option/ArgList.h b/llvm/include/llvm/Option/ArgList.h index 74bfadcba7267..9ce7839781859 100644 --- a/llvm/include/llvm/Option/ArgList.h +++ b/llvm/include/llvm/Option/ArgList.h @@ -412,6 +412,10 @@ class InputArgList final : public ArgList { return ArgStrings[Index]; } + void replaceArgString(unsigned Index, const Twine &S) { + ArgStrings[Index] = MakeArgString(S); + } + unsigned getNumInputArgStrings() const override { return NumInputArgStrings; } diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h index 5db30436069db..b9984bed55a7b 100644 --- a/llvm/include/llvm/Option/OptTable.h +++ b/llvm/include/llvm/Option/OptTable.h @@ -59,6 +59,7 @@ class OptTable { /// The option information table. std::vector OptionInfos; bool IgnoreCase; + bool GroupedShortOptions = false; unsigned TheInputOptionID = 0; unsigned TheUnknownOptionID = 0; @@ -79,6 +80,8 @@ class OptTable { return OptionInfos[id - 1]; } + Arg *parseOneArgGrouped(InputArgList &Args, unsigned &Index) const; + protected: OptTable(ArrayRef OptionInfos, bool IgnoreCase = false); @@ -120,6 +123,9 @@ class OptTable { return getInfo(id).MetaVar; } + /// Support grouped short options. e.g. -ab represents -a -b. + void setGroupedShortOptions(bool Value) { GroupedShortOptions = Value; } + /// Find possible value for given flags. This is used for shell /// autocompletion. /// diff --git a/llvm/include/llvm/Option/Option.h b/llvm/include/llvm/Option/Option.h index 73ee8e0073b85..196cf656355de 100644 --- a/llvm/include/llvm/Option/Option.h +++ b/llvm/include/llvm/Option/Option.h @@ -213,14 +213,16 @@ class Option { /// Index to the position where argument parsing should resume /// (even if the argument is missing values). /// - /// \param ArgSize The number of bytes taken up by the matched Option prefix - /// and name. This is used to determine where joined values - /// start. - Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize) const; + /// \p CurArg The argument to be matched. It may be shorter than the + /// underlying storage to represent a Joined argument. + /// \p GroupedShortOption If true, we are handling the fallback case of + /// parsing a prefix of the current argument as a short option. + Arg *accept(const ArgList &Args, StringRef CurArg, bool GroupedShortOption, + unsigned &Index) const; private: - Arg *acceptInternal(const ArgList &Args, unsigned &Index, - unsigned ArgSize) const; + Arg *acceptInternal(const ArgList &Args, StringRef CurArg, + unsigned &Index) const; public: void print(raw_ostream &O) const; diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index f9b0d939e5f8c..b0703457656b2 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -472,10 +472,21 @@ class PassBuilder { /// module(function(loop(lpass1,lpass2,lpass3))) /// /// This shortcut is especially useful for debugging and testing small pass - /// combinations. Note that these shortcuts don't introduce any other magic. - /// If the sequence of passes aren't all the exact same kind of pass, it will - /// be an error. You cannot mix different levels implicitly, you must - /// explicitly form a pass manager in which to nest passes. + /// combinations. + /// + /// The sequence of passes aren't necessarily the exact same kind of pass. + /// You can mix different levels implicitly if adaptor passes are defined to + /// make them work. For example, + /// + /// mpass1,fpass1,fpass2,mpass2,lpass1 + /// + /// This pipeline uses only one pass manager: the top-level module manager. + /// fpass1,fpass2 and lpass1 are added into the the top-level module manager + /// using only adaptor passes. No nested function/loop pass managers are + /// added. The purpose is to allow easy pass testing when the user + /// specifically want the pass to run under a adaptor directly. This is + /// preferred when a pipeline is largely of one type, but one or just a few + /// passes are of different types(See PassBuilder.cpp for examples). Error parsePassPipeline(ModulePassManager &MPM, StringRef PipelineText, bool VerifyEachPass = true, bool DebugLogging = false); @@ -518,6 +529,9 @@ class PassBuilder { /// Returns true if the pass name is the name of an alias analysis pass. bool isAAPassName(StringRef PassName); + /// Returns true if the pass name is the name of a (non-alias) analysis pass. + bool isAnalysisPassName(StringRef PassName); + /// Register a callback for a default optimizer pipeline extension /// point /// diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 62a0c6955708e..3d9fca9422c41 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -75,10 +75,18 @@ inline StringRef getInstrProfValueProfFuncName() { } /// Return the name profile runtime entry point to do value range profiling. +// FIXME: This is to be removed after switching to the new memop value +// profiling. inline StringRef getInstrProfValueRangeProfFuncName() { return INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR; } +/// Return the name profile runtime entry point to do memop size value +/// profiling. +inline StringRef getInstrProfValueProfMemOpFuncName() { + return INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR; +} + /// Return the name prefix of variables containing instrumented function names. inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; } diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index a6913527e67f0..6d0ffb12294b0 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -157,6 +157,8 @@ VALUE_PROF_FUNC_PARAM(void *, Data, Type::getInt8PtrTy(Ctx)) INSTR_PROF_COMMA #ifndef VALUE_RANGE_PROF VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) #else /* VALUE_RANGE_PROF */ +/* FIXME: This is to be removed after switching to the new memop value + * profiling. */ VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) \ INSTR_PROF_COMMA VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeStart, Type::getInt64Ty(Ctx)) \ @@ -753,9 +755,14 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target #define INSTR_PROF_VALUE_PROF_FUNC_STR \ INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC) +/* FIXME: This is to be removed after switching to the new memop value + * profiling. */ #define INSTR_PROF_VALUE_RANGE_PROF_FUNC __llvm_profile_instrument_range #define INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR \ INSTR_PROF_QUOTE(INSTR_PROF_VALUE_RANGE_PROF_FUNC) +#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC __llvm_profile_instrument_memop +#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_MEMOP_FUNC) /* InstrProfile per-function control data alignment. */ #define INSTR_PROF_DATA_ALIGNMENT 8 @@ -783,3 +790,121 @@ typedef struct InstrProfValueData { #endif #undef COVMAP_V2_OR_V3 + +#ifdef INSTR_PROF_VALUE_PROF_MEMOP_API + +#ifdef __cplusplus +#define INSTR_PROF_INLINE inline +#else +#define INSTR_PROF_INLINE +#endif + +/* The value range buckets (22 buckets) for the memop size value profiling looks + * like: + * + * [0, 0] + * [1, 1] + * [2, 2] + * [3, 3] + * [4, 4] + * [5, 5] + * [6, 6] + * [7, 7] + * [8, 8] + * [9, 15] + * [16, 16] + * [17, 31] + * [32, 32] + * [33, 63] + * [64, 64] + * [65, 127] + * [128, 128] + * [129, 255] + * [256, 256] + * [257, 511] + * [512, 512] + * [513, UINT64_MAX] + * + * Each range has a 'representative value' which is the lower end value of the + * range and used to store in the runtime profile data records and the VP + * metadata. For example, it's 2 for [2, 2] and 64 for [65, 127]. + */ + +/* + * Clz and Popcount. This code was copied from + * compiler-rt/lib/fuzzer/{FuzzerBuiltins.h,FuzzerBuiltinsMsvc.h} and + * llvm/include/llvm/Support/MathExtras.h. + */ +#if defined(_MSC_VER) && !defined(__clang__) + +#include +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfClzll(unsigned long long X) { + unsigned long LeadZeroIdx = 0; +#if !defined(_M_ARM64) && !defined(_M_X64) + // Scan the high 32 bits. + if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X >> 32))) + return (int)(63 - (LeadZeroIdx + 32)); // Create a bit offset + // from the MSB. + // Scan the low 32 bits. + if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X))) + return (int)(63 - LeadZeroIdx); +#else + if (_BitScanReverse64(&LeadZeroIdx, X)) return 63 - LeadZeroIdx; +#endif + return 64; +} +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfPopcountll(unsigned long long X) { + // This code originates from https://reviews.llvm.org/rG30626254510f. + unsigned long long v = X; + v = v - ((v >> 1) & 0x5555555555555555ULL); + v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL); + v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; + return (int)((unsigned long long)(v * 0x0101010101010101ULL) >> 56); +} + +#else + +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfClzll(unsigned long long X) { return __builtin_clzll(X); } +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfPopcountll(unsigned long long X) { return __builtin_popcountll(X); } + +#endif /* defined(_MSC_VER) && !defined(__clang__) */ + +/* Map an (observed) memop size value to the representative value of its range. + * For example, 5 -> 5, 22 -> 17, 99 -> 65, 256 -> 256, 1001 -> 513. */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE uint64_t +InstrProfGetRangeRepValue(uint64_t Value) { + if (Value <= 8) + // The first ranges are individually tracked. Use the value as is. + return Value; + else if (Value >= 513) + // The last range is mapped to its lowest value. + return 513; + else if (InstProfPopcountll(Value) == 1) + // If it's a power of two, use it as is. + return Value; + else + // Otherwise, take to the previous power of two + 1. + return (1 << (64 - InstProfClzll(Value) - 1)) + 1; +} + +/* Return true if the range that an (observed) memop size value belongs to has + * only a single value in the range. For example, 0 -> true, 8 -> true, 10 -> + * false, 64 -> true, 100 -> false, 513 -> false. */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE unsigned +InstrProfIsSingleValRange(uint64_t Value) { + if (Value <= 8) + // The first ranges are individually tracked. + return 1; + else if (InstProfPopcountll(Value) == 1) + // If it's a power of two, there's only one value. + return 1; + else + // Otherwise, there's more than one value in the range. + return 0; +} + +#endif /* INSTR_PROF_VALUE_PROF_MEMOP_API */ diff --git a/llvm/include/llvm/Support/FormatAdapters.h b/llvm/include/llvm/Support/FormatAdapters.h index acf50724d3e3b..495205d11748b 100644 --- a/llvm/include/llvm/Support/FormatAdapters.h +++ b/llvm/include/llvm/Support/FormatAdapters.h @@ -34,7 +34,7 @@ template class AlignAdapter final : public FormatAdapter { : FormatAdapter(std::forward(Item)), Where(Where), Amount(Amount), Fill(Fill) {} - void format(llvm::raw_ostream &Stream, StringRef Style) { + void format(llvm::raw_ostream &Stream, StringRef Style) override { auto Adapter = detail::build_format_adapter(std::forward(this->Item)); FmtAlign(Adapter, Where, Amount, Fill).format(Stream, Style); } @@ -48,7 +48,7 @@ template class PadAdapter final : public FormatAdapter { PadAdapter(T &&Item, size_t Left, size_t Right) : FormatAdapter(std::forward(Item)), Left(Left), Right(Right) {} - void format(llvm::raw_ostream &Stream, StringRef Style) { + void format(llvm::raw_ostream &Stream, StringRef Style) override { auto Adapter = detail::build_format_adapter(std::forward(this->Item)); Stream.indent(Left); Adapter.format(Stream, Style); @@ -63,7 +63,7 @@ template class RepeatAdapter final : public FormatAdapter { RepeatAdapter(T &&Item, size_t Count) : FormatAdapter(std::forward(Item)), Count(Count) {} - void format(llvm::raw_ostream &Stream, StringRef Style) { + void format(llvm::raw_ostream &Stream, StringRef Style) override { auto Adapter = detail::build_format_adapter(std::forward(this->Item)); for (size_t I = 0; I < Count; ++I) { Adapter.format(Stream, Style); @@ -76,7 +76,9 @@ class ErrorAdapter : public FormatAdapter { ErrorAdapter(Error &&Item) : FormatAdapter(std::move(Item)) {} ErrorAdapter(ErrorAdapter &&) = default; ~ErrorAdapter() { consumeError(std::move(Item)); } - void format(llvm::raw_ostream &Stream, StringRef Style) { Stream << Item; } + void format(llvm::raw_ostream &Stream, StringRef Style) override { + Stream << Item; + } }; } diff --git a/llvm/include/llvm/Support/RISCVTargetParser.def b/llvm/include/llvm/Support/RISCVTargetParser.def new file mode 100644 index 0000000000000..28de6cd40132e --- /dev/null +++ b/llvm/include/llvm/Support/RISCVTargetParser.def @@ -0,0 +1,13 @@ +#ifndef PROC +#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH) +#endif + +PROC(INVALID, {"invalid"}, FK_INVALID, {""}) +PROC(GENERIC_RV32, {"generic-rv32"}, FK_NONE, {""}) +PROC(GENERIC_RV64, {"generic-rv64"}, FK_64BIT, {""}) +PROC(ROCKET_RV32, {"rocket-rv32"}, FK_NONE, {""}) +PROC(ROCKET_RV64, {"rocket-rv64"}, FK_64BIT, {""}) +PROC(SIFIVE_E31, {"sifive-e31"}, FK_NONE, {"rv32imac"}) +PROC(SIFIVE_U54, {"sifive-u54"}, FK_64BIT, {"rv64gc"}) + +#undef PROC diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h index a0bd88c153b6e..f521d8f836b4b 100644 --- a/llvm/include/llvm/Support/TargetParser.h +++ b/llvm/include/llvm/Support/TargetParser.h @@ -130,6 +130,32 @@ IsaVersion getIsaVersion(StringRef GPU); } // namespace AMDGPU +namespace RISCV { + +enum CPUKind : unsigned { +#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH) CK_##ENUM, +#include "RISCVTargetParser.def" +}; + +enum FeatureKind : unsigned { + FK_INVALID = 0, + FK_NONE = 1, + FK_STDEXTM = 1 << 2, + FK_STDEXTA = 1 << 3, + FK_STDEXTF = 1 << 4, + FK_STDEXTD = 1 << 5, + FK_STDEXTC = 1 << 6, + FK_64BIT = 1 << 7, +}; + +bool checkCPUKind(CPUKind Kind, bool IsRV64); +CPUKind parseCPUKind(StringRef CPU); +StringRef getMArchFromMcpu(StringRef CPU); +void fillValidCPUArchList(SmallVectorImpl &Values, bool IsRV64); +bool getCPUFeaturesExceptStdExt(CPUKind Kind, std::vector &Features); + +} // namespace RISCV + } // namespace llvm #endif diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def index 4b96c66b0e290..697f8c70f962d 100644 --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -20,106 +20,70 @@ X86_VENDOR(VENDOR_AMD, "amd") #undef X86_VENDOR // This macro is used for cpu types present in compiler-rt/libgcc. -#ifndef X86_CPU_TYPE_COMPAT -#define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) X86_CPU_TYPE(ARCHNAME, ENUM) -#endif - #ifndef X86_CPU_TYPE -#define X86_CPU_TYPE(ARCHNAME, ENUM) +#define X86_CPU_TYPE(ENUM, STR) #endif -#ifndef X86_CPU_TYPE_COMPAT_ALIAS -#define X86_CPU_TYPE_COMPAT_ALIAS(ENUM, STR) +#ifndef X86_CPU_TYPE_ALIAS +#define X86_CPU_TYPE_ALIAS(ENUM, STR) #endif -// The first part of this list must match what is implemented in libgcc and -// compilert-rt. Clang uses this to know how to implement __builtin_cpu_is. -X86_CPU_TYPE_COMPAT("bonnell", INTEL_BONNELL, "bonnell") -X86_CPU_TYPE_COMPAT("core2", INTEL_CORE2, "core2") -X86_CPU_TYPE_COMPAT("nehalem", INTEL_COREI7, "corei7") -X86_CPU_TYPE_COMPAT("amdfam10", AMDFAM10H, "amdfam10h") -X86_CPU_TYPE_COMPAT("bdver1", AMDFAM15H, "amdfam15h") -X86_CPU_TYPE_COMPAT("silvermont", INTEL_SILVERMONT, "silvermont") -X86_CPU_TYPE_COMPAT("knl", INTEL_KNL, "knl") -X86_CPU_TYPE_COMPAT("btver1", AMD_BTVER1, "btver1") -X86_CPU_TYPE_COMPAT("btver2", AMD_BTVER2, "btver2") -X86_CPU_TYPE_COMPAT("znver1", AMDFAM17H, "amdfam17h") -X86_CPU_TYPE_COMPAT("knm", INTEL_KNM, "knm") -X86_CPU_TYPE_COMPAT("goldmont", INTEL_GOLDMONT, "goldmont") -X86_CPU_TYPE_COMPAT("goldmont-plus", INTEL_GOLDMONT_PLUS, "goldmont-plus") -X86_CPU_TYPE_COMPAT("tremont", INTEL_TREMONT, "tremont") -// Entries below this are not in libgcc/compiler-rt. -X86_CPU_TYPE ("i386", INTEL_i386) -X86_CPU_TYPE ("i486", INTEL_i486) -X86_CPU_TYPE ("pentium", INTEL_PENTIUM) -X86_CPU_TYPE ("pentium-mmx", INTEL_PENTIUM_MMX) -X86_CPU_TYPE ("pentiumpro", INTEL_PENTIUM_PRO) -X86_CPU_TYPE ("pentium2", INTEL_PENTIUM_II) -X86_CPU_TYPE ("pentium3", INTEL_PENTIUM_III) -X86_CPU_TYPE ("pentium4", INTEL_PENTIUM_IV) -X86_CPU_TYPE ("pentium-m", INTEL_PENTIUM_M) -X86_CPU_TYPE ("yonah", INTEL_CORE_DUO) -X86_CPU_TYPE ("nocona", INTEL_NOCONA) -X86_CPU_TYPE ("prescott", INTEL_PRESCOTT) -X86_CPU_TYPE ("i486", AMD_i486) -X86_CPU_TYPE ("pentium", AMDPENTIUM) -X86_CPU_TYPE ("athlon", AMD_ATHLON) -X86_CPU_TYPE ("athlon-xp", AMD_ATHLON_XP) -X86_CPU_TYPE ("k8", AMD_K8) -X86_CPU_TYPE ("k8-sse3", AMD_K8SSE3) +// This list must match what is implemented in libgcc and compilert-rt. Clang +// uses this to know how to implement __builtin_cpu_is. +X86_CPU_TYPE(INTEL_BONNELL, "bonnell") +X86_CPU_TYPE(INTEL_CORE2, "core2") +X86_CPU_TYPE(INTEL_COREI7, "corei7") +X86_CPU_TYPE(AMDFAM10H, "amdfam10h") +X86_CPU_TYPE(AMDFAM15H, "amdfam15h") +X86_CPU_TYPE(INTEL_SILVERMONT, "silvermont") +X86_CPU_TYPE(INTEL_KNL, "knl") +X86_CPU_TYPE(AMD_BTVER1, "btver1") +X86_CPU_TYPE(AMD_BTVER2, "btver2") +X86_CPU_TYPE(AMDFAM17H, "amdfam17h") +X86_CPU_TYPE(INTEL_KNM, "knm") +X86_CPU_TYPE(INTEL_GOLDMONT, "goldmont") +X86_CPU_TYPE(INTEL_GOLDMONT_PLUS, "goldmont-plus") +X86_CPU_TYPE(INTEL_TREMONT, "tremont") // Alternate names supported by __builtin_cpu_is and target multiversioning. -X86_CPU_TYPE_COMPAT_ALIAS(INTEL_BONNELL, "atom") -X86_CPU_TYPE_COMPAT_ALIAS(AMDFAM10H, "amdfam10") -X86_CPU_TYPE_COMPAT_ALIAS(AMDFAM15H, "amdfam15") -X86_CPU_TYPE_COMPAT_ALIAS(INTEL_SILVERMONT, "slm") +X86_CPU_TYPE_ALIAS(INTEL_BONNELL, "atom") +X86_CPU_TYPE_ALIAS(AMDFAM10H, "amdfam10") +X86_CPU_TYPE_ALIAS(AMDFAM15H, "amdfam15") +X86_CPU_TYPE_ALIAS(INTEL_SILVERMONT, "slm") -#undef X86_CPU_TYPE_COMPAT_ALIAS -#undef X86_CPU_TYPE_COMPAT +#undef X86_CPU_TYPE_ALIAS #undef X86_CPU_TYPE // This macro is used for cpu subtypes present in compiler-rt/libgcc. -#ifndef X86_CPU_SUBTYPE_COMPAT -#define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) X86_CPU_SUBTYPE(ARCHNAME, ENUM) -#endif - #ifndef X86_CPU_SUBTYPE -#define X86_CPU_SUBTYPE(ARCHNAME, ENUM) +#define X86_CPU_SUBTYPE(ENUM, STR) #endif -// The first part of this list must match what is implemented in libgcc and -// compilert-rt. Clang uses this to know how to implement __builtin_cpu_is. -X86_CPU_SUBTYPE_COMPAT("nehalem", INTEL_COREI7_NEHALEM, "nehalem") -X86_CPU_SUBTYPE_COMPAT("westmere", INTEL_COREI7_WESTMERE, "westmere") -X86_CPU_SUBTYPE_COMPAT("sandybridge", INTEL_COREI7_SANDYBRIDGE, "sandybridge") -X86_CPU_SUBTYPE_COMPAT("amdfam10", AMDFAM10H_BARCELONA, "barcelona") -X86_CPU_SUBTYPE_COMPAT("amdfam10", AMDFAM10H_SHANGHAI, "shanghai") -X86_CPU_SUBTYPE_COMPAT("amdfam10", AMDFAM10H_ISTANBUL, "istanbul") -X86_CPU_SUBTYPE_COMPAT("bdver1", AMDFAM15H_BDVER1, "bdver1") -X86_CPU_SUBTYPE_COMPAT("bdver2", AMDFAM15H_BDVER2, "bdver2") -X86_CPU_SUBTYPE_COMPAT("bdver3", AMDFAM15H_BDVER3, "bdver3") -X86_CPU_SUBTYPE_COMPAT("bdver4", AMDFAM15H_BDVER4, "bdver4") -X86_CPU_SUBTYPE_COMPAT("znver1", AMDFAM17H_ZNVER1, "znver1") -X86_CPU_SUBTYPE_COMPAT("ivybridge", INTEL_COREI7_IVYBRIDGE, "ivybridge") -X86_CPU_SUBTYPE_COMPAT("haswell", INTEL_COREI7_HASWELL, "haswell") -X86_CPU_SUBTYPE_COMPAT("broadwell", INTEL_COREI7_BROADWELL, "broadwell") -X86_CPU_SUBTYPE_COMPAT("skylake", INTEL_COREI7_SKYLAKE, "skylake") -X86_CPU_SUBTYPE_COMPAT("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512, "skylake-avx512") -X86_CPU_SUBTYPE_COMPAT("cannonlake", INTEL_COREI7_CANNONLAKE, "cannonlake") -X86_CPU_SUBTYPE_COMPAT("icelake-client", INTEL_COREI7_ICELAKE_CLIENT, "icelake-client") -X86_CPU_SUBTYPE_COMPAT("icelake-server", INTEL_COREI7_ICELAKE_SERVER, "icelake-server") -X86_CPU_SUBTYPE_COMPAT("znver2", AMDFAM17H_ZNVER2, "znver2") -X86_CPU_SUBTYPE_COMPAT("cascadelake", INTEL_COREI7_CASCADELAKE, "cascadelake") -X86_CPU_SUBTYPE_COMPAT("tigerlake", INTEL_COREI7_TIGERLAKE, "tigerlake") -X86_CPU_SUBTYPE_COMPAT("cooperlake", INTEL_COREI7_COOPERLAKE, "cooperlake") -// Entries below this are not in libgcc/compiler-rt. -X86_CPU_SUBTYPE ("core2", INTEL_CORE2_65) -X86_CPU_SUBTYPE ("penryn", INTEL_CORE2_45) -X86_CPU_SUBTYPE ("k6", AMDPENTIUM_K6) -X86_CPU_SUBTYPE ("k6-2", AMDPENTIUM_K62) -X86_CPU_SUBTYPE ("k6-3", AMDPENTIUM_K63) -X86_CPU_SUBTYPE ("geode", AMDPENTIUM_GEODE) -#undef X86_CPU_SUBTYPE_COMPAT +// This list must match what is implemented in libgcc and compilert-rt. Clang +// uses this to know how to implement __builtin_cpu_is. +X86_CPU_SUBTYPE(INTEL_COREI7_NEHALEM, "nehalem") +X86_CPU_SUBTYPE(INTEL_COREI7_WESTMERE, "westmere") +X86_CPU_SUBTYPE(INTEL_COREI7_SANDYBRIDGE, "sandybridge") +X86_CPU_SUBTYPE(AMDFAM10H_BARCELONA, "barcelona") +X86_CPU_SUBTYPE(AMDFAM10H_SHANGHAI, "shanghai") +X86_CPU_SUBTYPE(AMDFAM10H_ISTANBUL, "istanbul") +X86_CPU_SUBTYPE(AMDFAM15H_BDVER1, "bdver1") +X86_CPU_SUBTYPE(AMDFAM15H_BDVER2, "bdver2") +X86_CPU_SUBTYPE(AMDFAM15H_BDVER3, "bdver3") +X86_CPU_SUBTYPE(AMDFAM15H_BDVER4, "bdver4") +X86_CPU_SUBTYPE(AMDFAM17H_ZNVER1, "znver1") +X86_CPU_SUBTYPE(INTEL_COREI7_IVYBRIDGE, "ivybridge") +X86_CPU_SUBTYPE(INTEL_COREI7_HASWELL, "haswell") +X86_CPU_SUBTYPE(INTEL_COREI7_BROADWELL, "broadwell") +X86_CPU_SUBTYPE(INTEL_COREI7_SKYLAKE, "skylake") +X86_CPU_SUBTYPE(INTEL_COREI7_SKYLAKE_AVX512, "skylake-avx512") +X86_CPU_SUBTYPE(INTEL_COREI7_CANNONLAKE, "cannonlake") +X86_CPU_SUBTYPE(INTEL_COREI7_ICELAKE_CLIENT, "icelake-client") +X86_CPU_SUBTYPE(INTEL_COREI7_ICELAKE_SERVER, "icelake-server") +X86_CPU_SUBTYPE(AMDFAM17H_ZNVER2, "znver2") +X86_CPU_SUBTYPE(INTEL_COREI7_CASCADELAKE, "cascadelake") +X86_CPU_SUBTYPE(INTEL_COREI7_TIGERLAKE, "tigerlake") +X86_CPU_SUBTYPE(INTEL_COREI7_COOPERLAKE, "cooperlake") #undef X86_CPU_SUBTYPE diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h index 4a4fb8ccc4ccf..66c474b5c2750 100644 --- a/llvm/include/llvm/Support/X86TargetParser.h +++ b/llvm/include/llvm/Support/X86TargetParser.h @@ -34,7 +34,7 @@ enum ProcessorVendors : unsigned { // as a proxy for what's in libgcc/compiler-rt. enum ProcessorTypes : unsigned { CPU_TYPE_DUMMY, -#define X86_CPU_TYPE(ARCHNAME, ENUM) \ +#define X86_CPU_TYPE(ENUM, STRING) \ ENUM, #include "llvm/Support/X86TargetParser.def" CPU_TYPE_MAX @@ -44,7 +44,7 @@ enum ProcessorTypes : unsigned { // as a proxy for what's in libgcc/compiler-rt. enum ProcessorSubtypes : unsigned { CPU_SUBTYPE_DUMMY, -#define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \ +#define X86_CPU_SUBTYPE(ENUM, STRING) \ ENUM, #include "llvm/Support/X86TargetParser.def" CPU_SUBTYPE_MAX diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index eeb2761faeb9f..1dd3e374b5245 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -125,6 +125,12 @@ def extending_loads : GICombineRule< (apply [{ Helper.applyCombineExtendingLoads(*${root}, ${matchinfo}); }])>; def combines_for_extload: GICombineGroup<[extending_loads]>; +def sext_already_extended : GICombineRule< + (defs root:$d), + (match (wip_match_opcode G_SEXT_INREG):$d, + [{ return Helper.matchSextAlreadyExtended(*${d}); }]), + (apply [{ Helper.applySextAlreadyExtended(*${d}); }])>; + def combine_indexed_load_store : GICombineRule< (defs root:$root, indexed_load_store_matchdata:$matchinfo), (match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD, G_STORE):$root, diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index b8f03bcec16b8..150834e65b2dc 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -67,6 +67,10 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index aab5376db4535..16a817980f7c6 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1157,7 +1157,7 @@ def PATCHPOINT : StandardPseudoInstruction { let usesCustomInserter = 1; } def STATEPOINT : StandardPseudoInstruction { - let OutOperandList = (outs); + let OutOperandList = (outs variable_ops); let InOperandList = (ins variable_ops); let usesCustomInserter = 1; let mayLoad = 1; diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index c6261845b765a..bed180e6717a2 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -891,6 +891,13 @@ struct Attributor { // No matching attribute found, create one. // Use the static create method. auto &AA = AAType::createForPosition(IRP, *this); + + // If we are currenty seeding attributes, enforce seeding rules. + if (SeedingPeriod && !shouldSeedAttribute(AA)) { + AA.getState().indicatePessimisticFixpoint(); + return AA; + } + registerAA(AA); // For now we ignore naked and optnone functions. @@ -918,8 +925,15 @@ struct Attributor { return AA; } + // Allow seeded attributes to declare dependencies. + // Remember the seeding state. + bool OldSeedingPeriod = SeedingPeriod; + SeedingPeriod = false; + updateAA(AA); + SeedingPeriod = OldSeedingPeriod; + if (TrackDependence && AA.getState().isValidState()) recordDependence(AA, const_cast(*QueryingAA), DepClass); @@ -1345,6 +1359,10 @@ struct Attributor { ChangeStatus rewriteFunctionSignatures(SmallPtrSetImpl &ModifiedFns); + /// Check if the Attribute \p AA should be seeded. + /// See getOrCreateAAFor. + bool shouldSeedAttribute(AbstractAttribute &AA); + /// The set of all abstract attributes. ///{ using AAVector = SmallVector; @@ -1410,6 +1428,10 @@ struct Attributor { /// Invoke instructions with at least a single dead successor block. SmallVector InvokeWithDeadSuccessor; + /// Wheather attributes are being `seeded`, always false after ::run function + /// gets called \see getOrCreateAAFor. + bool SeedingPeriod = true; + /// Functions, blocks, and instructions we delete after manifest is done. /// ///{ @@ -2025,6 +2047,9 @@ struct AbstractAttribute : public IRPosition { /// This function should return the name of the AbstractAttribute virtual const std::string getName() const = 0; + + /// This function should return the address of the ID of the AbstractAttribute + virtual const char *getIdAddr() const = 0; ///} /// Allow the Attributor access to the protected methods. @@ -2142,6 +2167,15 @@ struct AAReturnedValues /// See AbstractAttribute::getName() const std::string getName() const override { return "AAReturnedValues"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAReturnedValues + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2163,6 +2197,14 @@ struct AANoUnwind /// See AbstractAttribute::getName() const std::string getName() const override { return "AANoUnwind"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AANoUnwind + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2184,6 +2226,14 @@ struct AANoSync /// See AbstractAttribute::getName() const std::string getName() const override { return "AANoSync"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AANoSync + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2206,6 +2256,14 @@ struct AANonNull /// See AbstractAttribute::getName() const std::string getName() const override { return "AANonNull"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AANonNull + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2228,6 +2286,14 @@ struct AANoRecurse /// See AbstractAttribute::getName() const std::string getName() const override { return "AANoRecurse"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AANoRecurse + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2250,6 +2316,14 @@ struct AAWillReturn /// See AbstractAttribute::getName() const std::string getName() const override { return "AAWillReturn"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AAWillReturn + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2279,6 +2353,15 @@ struct AAUndefinedBehavior /// See AbstractAttribute::getName() const std::string getName() const override { return "AAUndefinedBehavior"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAUndefineBehavior + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2310,6 +2393,15 @@ struct AAReachability : public StateWrapper { /// See AbstractAttribute::getName() const std::string getName() const override { return "AAReachability"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAReachability + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2332,6 +2424,14 @@ struct AANoAlias /// See AbstractAttribute::getName() const std::string getName() const override { return "AANoAlias"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AANoAlias + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2354,6 +2454,14 @@ struct AANoFree /// See AbstractAttribute::getName() const std::string getName() const override { return "AANoFree"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AANoFree + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2376,6 +2484,14 @@ struct AANoReturn /// See AbstractAttribute::getName() const std::string getName() const override { return "AANoReturn"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AANoReturn + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2433,6 +2549,14 @@ struct AAIsDead : public StateWrapper { /// See AbstractAttribute::getName() const std::string getName() const override { return "AAIsDead"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AAIsDead + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; @@ -2626,6 +2750,15 @@ struct AADereferenceable /// See AbstractAttribute::getName() const std::string getName() const override { return "AADereferenceable"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AADereferenceable + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2647,6 +2780,14 @@ struct AAAlign : public IRAttribute< /// See AbstractAttribute::getName() const std::string getName() const override { return "AAAlign"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AAAlign + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Create an abstract attribute view for the position \p IRP. static AAAlign &createForPosition(const IRPosition &IRP, Attributor &A); @@ -2704,6 +2845,14 @@ struct AANoCapture /// See AbstractAttribute::getName() const std::string getName() const override { return "AANoCapture"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AANoCapture + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2725,6 +2874,15 @@ struct AAValueSimplify : public StateWrapper { /// See AbstractAttribute::getName() const std::string getName() const override { return "AAValueSimplify"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAValueSimplify + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2745,6 +2903,14 @@ struct AAHeapToStack : public StateWrapper { /// See AbstractAttribute::getName() const std::string getName() const override { return "AAHeapToStack"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AAHeapToStack + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2781,6 +2947,15 @@ struct AAPrivatizablePtr /// See AbstractAttribute::getName() const std::string getName() const override { return "AAPrivatizablePtr"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAPricatizablePtr + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2835,6 +3010,15 @@ struct AAMemoryBehavior /// See AbstractAttribute::getName() const std::string getName() const override { return "AAMemoryBehavior"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAMemoryBehavior + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -2996,6 +3180,15 @@ struct AAMemoryLocation /// See AbstractAttribute::getName() const std::string getName() const override { return "AAMemoryLocation"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAMemoryLocation + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; @@ -3044,6 +3237,15 @@ struct AAValueConstantRange /// See AbstractAttribute::getName() const std::string getName() const override { return "AAValueConstantRange"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAValueConstantRange + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + /// Unique ID (due to the unique address) static const char ID; }; diff --git a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h index 0bd81ea8f5431..d96187b73f9bb 100644 --- a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h +++ b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h @@ -17,6 +17,9 @@ namespace llvm { namespace omp { +/// Summary of a kernel (=entry point for target offloading). +using Kernel = Function *; + /// Helper to remember if the module contains OpenMP (runtime calls), to be used /// foremost with containsOpenMP. struct OpenMPInModule { @@ -30,8 +33,17 @@ struct OpenMPInModule { bool isKnown() { return Value != OpenMP::UNKNOWN; } operator bool() { return Value != OpenMP::NOT_FOUND; } + /// Return the known kernels (=GPU entry points) in the module. + SmallPtrSetImpl &getKernels() { return Kernels; } + + /// Identify kernels in the module and populate the Kernels set. + void identifyKernels(Module &M); + private: enum class OpenMP { FOUND, NOT_FOUND, UNKNOWN } Value = OpenMP::UNKNOWN; + + /// Collection of known kernels (=GPU entry points) in the module. + SmallPtrSet Kernels; }; /// Helper to determine if \p M contains OpenMP (runtime calls). diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h index 263d3b629589c..a7052f7b6a2b1 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -68,6 +68,8 @@ class InstrProfiling : public PassInfoMixin { // vector of counter load/store pairs to be register promoted. std::vector PromotionCandidates; + // FIXME: These are to be removed after switching to the new memop value + // profiling. // The start value of precise value profile range for memory intrinsic sizes. int64_t MemOPSizeRangeStart; // The end value of precise value profile range for memory intrinsic sizes. diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index a1aacec769794..7f5583570a44f 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -14,6 +14,7 @@ #ifndef LLVM_TRANSFORMS_SCALAR_H #define LLVM_TRANSFORMS_SCALAR_H +#include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include namespace llvm { @@ -256,8 +257,7 @@ FunctionPass *createJumpThreadingPass(int Threshold = -1); // simplify terminator instructions, convert switches to lookup tables, etc. // FunctionPass *createCFGSimplificationPass( - unsigned Threshold = 1, bool ForwardSwitchCond = false, - bool ConvertSwitch = false, bool KeepLoops = true, bool SinkCommon = false, + SimplifyCFGOptions Options = SimplifyCFGOptions(), std::function Ftor = nullptr); //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h index 9b2f0fcab95be..aff80ef1dcfab 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -366,6 +366,8 @@ class FunctionToLoopPassAdaptor return PA; } + static bool isRequired() { return true; } + private: LoopPassT Pass; diff --git a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h index f9792d38bbe6b..978562186ebae 100644 --- a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h +++ b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h @@ -14,9 +14,9 @@ #ifndef LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H #define LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H -#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Utils/SimplifyCFGOptions.h" namespace llvm { @@ -34,13 +34,7 @@ class SimplifyCFGPass : public PassInfoMixin { /// rather than optimal IR. That is, by default we bypass transformations that /// are likely to improve performance but make analysis for other passes more /// difficult. - SimplifyCFGPass() - : SimplifyCFGPass(SimplifyCFGOptions() - .forwardSwitchCondToPhi(false) - .convertSwitchToLookupTable(false) - .needCanonicalLoops(true) - .sinkCommonInsts(false)) {} - + SimplifyCFGPass() {} /// Construct a pass with optional optimizations. SimplifyCFGPass(const SimplifyCFGOptions &PassOptions); diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index f55e336f1f6aa..5cc8d1fa74376 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -30,6 +30,7 @@ #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include #include @@ -58,73 +59,6 @@ class StoreInst; class TargetLibraryInfo; class TargetTransformInfo; -/// A set of parameters used to control the transforms in the SimplifyCFG pass. -/// Options may change depending on the position in the optimization pipeline. -/// For example, canonical form that includes switches and branches may later be -/// replaced by lookup tables and selects. -struct SimplifyCFGOptions { - int BonusInstThreshold; - bool ForwardSwitchCondToPhi; - bool ConvertSwitchToLookupTable; - bool NeedCanonicalLoop; - bool SinkCommonInsts; - bool SimplifyCondBranch; - bool FoldTwoEntryPHINode; - - AssumptionCache *AC; - - SimplifyCFGOptions(unsigned BonusThreshold = 1, - bool ForwardSwitchCond = false, - bool SwitchToLookup = false, bool CanonicalLoops = true, - bool SinkCommon = false, - AssumptionCache *AssumpCache = nullptr, - bool SimplifyCondBranch = true, - bool FoldTwoEntryPHINode = true) - : BonusInstThreshold(BonusThreshold), - ForwardSwitchCondToPhi(ForwardSwitchCond), - ConvertSwitchToLookupTable(SwitchToLookup), - NeedCanonicalLoop(CanonicalLoops), - SinkCommonInsts(SinkCommon), - SimplifyCondBranch(SimplifyCondBranch), - FoldTwoEntryPHINode(FoldTwoEntryPHINode), - AC(AssumpCache) {} - - // Support 'builder' pattern to set members by name at construction time. - SimplifyCFGOptions &bonusInstThreshold(int I) { - BonusInstThreshold = I; - return *this; - } - SimplifyCFGOptions &forwardSwitchCondToPhi(bool B) { - ForwardSwitchCondToPhi = B; - return *this; - } - SimplifyCFGOptions &convertSwitchToLookupTable(bool B) { - ConvertSwitchToLookupTable = B; - return *this; - } - SimplifyCFGOptions &needCanonicalLoops(bool B) { - NeedCanonicalLoop = B; - return *this; - } - SimplifyCFGOptions &sinkCommonInsts(bool B) { - SinkCommonInsts = B; - return *this; - } - SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) { - AC = Cache; - return *this; - } - SimplifyCFGOptions &setSimplifyCondBranch(bool B) { - SimplifyCondBranch = B; - return *this; - } - - SimplifyCFGOptions &setFoldTwoEntryPHINode(bool B) { - FoldTwoEntryPHINode = B; - return *this; - } -}; - //===----------------------------------------------------------------------===// // Local constant propagation. // @@ -160,7 +94,9 @@ bool wouldInstructionBeTriviallyDead(Instruction *I, /// recursively. Return true if any instructions were deleted. bool RecursivelyDeleteTriviallyDeadInstructions( Value *V, const TargetLibraryInfo *TLI = nullptr, - MemorySSAUpdater *MSSAU = nullptr); + MemorySSAUpdater *MSSAU = nullptr, + std::function AboutToDeleteCallback = + std::function()); /// Delete all of the instructions in `DeadInsts`, and all other instructions /// that deleting these in turn causes to be trivially dead. @@ -172,7 +108,9 @@ bool RecursivelyDeleteTriviallyDeadInstructions( /// empty afterward. void RecursivelyDeleteTriviallyDeadInstructions( SmallVectorImpl &DeadInsts, - const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr); + const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr, + std::function AboutToDeleteCallback = + std::function()); /// Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow /// instructions that are not trivially dead. These will be ignored. @@ -180,7 +118,9 @@ void RecursivelyDeleteTriviallyDeadInstructions( /// were found and deleted. bool RecursivelyDeleteTriviallyDeadInstructionsPermissive( SmallVectorImpl &DeadInsts, - const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr); + const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr, + std::function AboutToDeleteCallback = + std::function()); /// If the specified value is an effectively dead PHI node, due to being a /// def-use chain of single-use nodes that either forms a cycle or is terminated diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h index 657b97c67a8b3..c922476ac79da 100644 --- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h +++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h @@ -70,6 +70,13 @@ class raw_ostream; enum PredicateType { PT_Branch, PT_Assume, PT_Switch }; +/// Constraint for a predicate of the form "cmp Pred Op, OtherOp", where Op +/// is the value the constraint applies to (the ssa.copy result). +struct PredicateConstraint { + CmpInst::Predicate Predicate; + Value *OtherOp; +}; + // Base class for all predicate information we provide. // All of our predicate information has at least a comparison. class PredicateBase : public ilist_node { @@ -83,37 +90,34 @@ class PredicateBase : public ilist_node { // predicates, this is different to OriginalOp which refers to the initial // operand. Value *RenamedOp; + // The condition associated with this predicate. + Value *Condition; + PredicateBase(const PredicateBase &) = delete; PredicateBase &operator=(const PredicateBase &) = delete; PredicateBase() = delete; virtual ~PredicateBase() = default; - -protected: - PredicateBase(PredicateType PT, Value *Op) : Type(PT), OriginalOp(Op) {} -}; - -class PredicateWithCondition : public PredicateBase { -public: - Value *Condition; static bool classof(const PredicateBase *PB) { return PB->Type == PT_Assume || PB->Type == PT_Branch || PB->Type == PT_Switch; } + /// Fetch condition in the form of PredicateConstraint, if possible. + Optional getConstraint() const; + protected: - PredicateWithCondition(PredicateType PT, Value *Op, Value *Condition) - : PredicateBase(PT, Op), Condition(Condition) {} + PredicateBase(PredicateType PT, Value *Op, Value *Condition) + : Type(PT), OriginalOp(Op), Condition(Condition) {} }; // Provides predicate information for assumes. Since assumes are always true, // we simply provide the assume instruction, so you can tell your relative // position to it. -class PredicateAssume : public PredicateWithCondition { +class PredicateAssume : public PredicateBase { public: IntrinsicInst *AssumeInst; PredicateAssume(Value *Op, IntrinsicInst *AssumeInst, Value *Condition) - : PredicateWithCondition(PT_Assume, Op, Condition), - AssumeInst(AssumeInst) {} + : PredicateBase(PT_Assume, Op, Condition), AssumeInst(AssumeInst) {} PredicateAssume() = delete; static bool classof(const PredicateBase *PB) { return PB->Type == PT_Assume; @@ -123,7 +127,7 @@ class PredicateAssume : public PredicateWithCondition { // Mixin class for edge predicates. The FROM block is the block where the // predicate originates, and the TO block is the block where the predicate is // valid. -class PredicateWithEdge : public PredicateWithCondition { +class PredicateWithEdge : public PredicateBase { public: BasicBlock *From; BasicBlock *To; @@ -135,7 +139,7 @@ class PredicateWithEdge : public PredicateWithCondition { protected: PredicateWithEdge(PredicateType PType, Value *Op, BasicBlock *From, BasicBlock *To, Value *Cond) - : PredicateWithCondition(PType, Op, Cond), From(From), To(To) {} + : PredicateBase(PType, Op, Cond), From(From), To(To) {} }; // Provides predicate information for branches. diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h new file mode 100644 index 0000000000000..ca9a7e7223dbe --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h @@ -0,0 +1,72 @@ +//===- SimplifyCFGOptions.h - Control structure for SimplifyCFG -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A set of parameters used to control the transforms in the SimplifyCFG pass. +// Options may change depending on the position in the optimization pipeline. +// For example, canonical form that includes switches and branches may later be +// replaced by lookup tables and selects. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H +#define LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H + +namespace llvm { + +class AssumptionCache; + +struct SimplifyCFGOptions { + int BonusInstThreshold = 1; + bool ForwardSwitchCondToPhi = false; + bool ConvertSwitchToLookupTable = false; + bool NeedCanonicalLoop = true; + bool SinkCommonInsts = false; + bool SimplifyCondBranch = true; + bool FoldTwoEntryPHINode = true; + + AssumptionCache *AC = nullptr; + + // Support 'builder' pattern to set members by name at construction time. + SimplifyCFGOptions &bonusInstThreshold(int I) { + BonusInstThreshold = I; + return *this; + } + SimplifyCFGOptions &forwardSwitchCondToPhi(bool B) { + ForwardSwitchCondToPhi = B; + return *this; + } + SimplifyCFGOptions &convertSwitchToLookupTable(bool B) { + ConvertSwitchToLookupTable = B; + return *this; + } + SimplifyCFGOptions &needCanonicalLoops(bool B) { + NeedCanonicalLoop = B; + return *this; + } + SimplifyCFGOptions &sinkCommonInsts(bool B) { + SinkCommonInsts = B; + return *this; + } + SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) { + AC = Cache; + return *this; + } + SimplifyCFGOptions &setSimplifyCondBranch(bool B) { + SimplifyCondBranch = B; + return *this; + } + + SimplifyCFGOptions &setFoldTwoEntryPHINode(bool B) { + FoldTwoEntryPHINode = B; + return *this; + } +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap index b262311a96a07..778a17c8aeee5 100644 --- a/llvm/include/llvm/module.modulemap +++ b/llvm/include/llvm/module.modulemap @@ -388,7 +388,7 @@ module LLVM_Utils { umbrella "Support" module * { export * } - + // Exclude this; it should only be used on Windows. exclude header "Support/Windows/WindowsSupport.h" @@ -397,8 +397,9 @@ module LLVM_Utils { exclude header "Support/Solaris/sys/regset.h" // These are intended for textual inclusion. - textual header "Support/ARMTargetParser.def" textual header "Support/AArch64TargetParser.def" + textual header "Support/ARMTargetParser.def" + textual header "Support/RISCVTargetParser.def" textual header "Support/TargetOpcodes.def" textual header "Support/X86TargetParser.def" } diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp index e9da1e607b45b..972d0d3ea7f2b 100644 --- a/llvm/lib/Analysis/AssumeBundleQueries.cpp +++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp @@ -6,17 +6,29 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "assume-queries" + #include "llvm/Analysis/AssumeBundleQueries.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/DebugCounter.h" using namespace llvm; using namespace llvm::PatternMatch; +STATISTIC(NumAssumeQueries, "Number of Queries into an assume assume bundles"); +STATISTIC( + NumUsefullAssumeQueries, + "Number of Queries into an assume assume bundles that were satisfied"); + +DEBUG_COUNTER(AssumeQueryCounter, "assume-queries-counter", + "Controls which assumes gets created"); + static bool bundleHasArgument(const CallBase::BundleOpInfo &BOI, unsigned Idx) { return BOI.End - BOI.Begin > Idx; } @@ -151,6 +163,9 @@ llvm::getKnowledgeForValue(const Value *V, function_ref Filter) { + NumAssumeQueries++; + if (!DebugCounter::shouldExecute(AssumeQueryCounter)) + return RetainedKnowledge::none(); if (AC) { for (AssumptionCache::ResultElem &Elem : AC->assumptionsFor(V)) { IntrinsicInst *II = cast_or_null(Elem.Assume); @@ -159,20 +174,24 @@ llvm::getKnowledgeForValue(const Value *V, if (RetainedKnowledge RK = getKnowledgeFromBundle( *II, II->bundle_op_info_begin()[Elem.Index])) if (is_contained(AttrKinds, RK.AttrKind) && - Filter(RK, II, &II->bundle_op_info_begin()[Elem.Index])) + Filter(RK, II, &II->bundle_op_info_begin()[Elem.Index])) { + NumUsefullAssumeQueries++; return RK; + } } return RetainedKnowledge::none(); } - for (auto &U : V->uses()) { + for (const auto &U : V->uses()) { CallInst::BundleOpInfo* Bundle = getBundleFromUse(&U); if (!Bundle) continue; if (RetainedKnowledge RK = getKnowledgeFromBundle(*cast(U.getUser()), *Bundle)) if (is_contained(AttrKinds, RK.AttrKind) && - Filter(RK, cast(U.getUser()), Bundle)) + Filter(RK, cast(U.getUser()), Bundle)) { + NumUsefullAssumeQueries++; return RK; + } } return RetainedKnowledge::none(); } diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 74664098ce1d4..33f122728d2aa 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1648,8 +1648,32 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, } SmallVector V1Srcs; + // For a recursive phi, that recurses through a contant gep, we can perform + // aliasing calculations using the other phi operands with an unknown size to + // specify that an unknown number of elements after the initial value are + // potentially accessed. bool isRecursive = false; - if (PV) { + auto CheckForRecPhi = [&](Value *PV) { + if (!EnableRecPhiAnalysis) + return false; + if (GEPOperator *PVGEP = dyn_cast(PV)) { + // Check whether the incoming value is a GEP that advances the pointer + // result of this PHI node (e.g. in a loop). If this is the case, we + // would recurse and always get a MayAlias. Handle this case specially + // below. We need to ensure that the phi is inbounds and has a constant + // positive operand so that we can check for alias with the initial value + // and an unknown but positive size. + if (PVGEP->getPointerOperand() == PN && PVGEP->isInBounds() && + PVGEP->getNumIndices() == 1 && isa(PVGEP->idx_begin()) && + !cast(PVGEP->idx_begin())->isNegative()) { + isRecursive = true; + return true; + } + } + return false; + }; + + if (PV) { // If we have PhiValues then use it to get the underlying phi values. const PhiValues::ValueSet &PhiValueSet = PV->getValuesForPhi(PN); // If we have more phi values than the search depth then return MayAlias @@ -1660,19 +1684,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, return MayAlias; // Add the values to V1Srcs for (Value *PV1 : PhiValueSet) { - if (EnableRecPhiAnalysis) { - if (GEPOperator *PV1GEP = dyn_cast(PV1)) { - // Check whether the incoming value is a GEP that advances the pointer - // result of this PHI node (e.g. in a loop). If this is the case, we - // would recurse and always get a MayAlias. Handle this case specially - // below. - if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 && - isa(PV1GEP->idx_begin())) { - isRecursive = true; - continue; - } - } - } + if (CheckForRecPhi(PV1)) + continue; V1Srcs.push_back(PV1); } } else { @@ -1687,18 +1700,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, // and 'n' are the number of PHI sources. return MayAlias; - if (EnableRecPhiAnalysis) - if (GEPOperator *PV1GEP = dyn_cast(PV1)) { - // Check whether the incoming value is a GEP that advances the pointer - // result of this PHI node (e.g. in a loop). If this is the case, we - // would recurse and always get a MayAlias. Handle this case specially - // below. - if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 && - isa(PV1GEP->idx_begin())) { - isRecursive = true; - continue; - } - } + if (CheckForRecPhi(PV1)) + continue; if (UniqueSrc.insert(PV1).second) V1Srcs.push_back(PV1); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index a317579ecc836..703623396d96a 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -1,17 +1,35 @@ set(CommonMLSources MLInlineAdvisor.cpp) set(ReleaseModeMLSources ReleaseModeModelRunner.cpp) +set(DevelopmentModeMLSources TFUtils.cpp) -if (DEFINED LLVM_HAVE_TF_AOT) - include(TensorFlowCompile) - tfcompile(models/inliner serve action InlinerSizeModel llvm::InlinerSizeModel) - list(APPEND ReleaseModeMLSources - $ - ${GENERATED_OBJS} - ) - set(MLPolicySources ${CommonMLSources} ${ReleaseModeMLSources}) +if (DEFINED LLVM_HAVE_TF_AOT OR DEFINED LLVM_HAVE_TF_API) + set(MLPolicySources ${CommonMLSources}) + if (DEFINED LLVM_HAVE_TF_AOT) + include(TensorFlowCompile) + tfcompile(models/inliner serve action InlinerSizeModel llvm::InlinerSizeModel) + list(APPEND ReleaseModeMLSources + $ + ${GENERATED_OBJS} + ) + LIST(APPEND MLPolicySources ${ReleaseModeMLSources}) + else() + LIST(APPEND LLVM_OPTIONAL_SOURCES ${ReleaseModeMLSources}) + endif() + + if (DEFINED LLVM_HAVE_TF_API) + LIST(APPEND MLPolicySources ${DevelopmentModeMLSources}) + LIST(APPEND MLLinkDeps ${tensorflow_c_api}) + else() + LIST(APPEND LLVM_OPTIONAL_SOURCES ${DevelopmentModeMLSources}) + endif() else() - set(LLVM_OPTIONAL_SOURCES ${CommonMLSources} ${ReleaseModeMLSources}) + LIST(APPEND LLVM_OPTIONAL_SOURCES + ${CommonMLSources} + ${DevelopmentModeMLSources} + ${ReleaseModeMLSources} + ) endif() + add_llvm_component_library(LLVMAnalysis AliasAnalysis.cpp @@ -57,6 +75,7 @@ add_llvm_component_library(LLVMAnalysis InlineCost.cpp InlineAdvisor.cpp InlineFeaturesAnalysis.cpp + InlineSizeEstimatorAnalysis.cpp InstCount.cpp InstructionPrecedenceTracking.cpp InstructionSimplify.cpp @@ -124,4 +143,7 @@ add_llvm_component_library(LLVMAnalysis DEPENDS intrinsics_gen + + LINK_LIBS + ${MLLinkDeps} ) diff --git a/llvm/lib/Analysis/CallGraph.cpp b/llvm/lib/Analysis/CallGraph.cpp index d8abccfdb0958..55adb454b7338 100644 --- a/llvm/lib/Analysis/CallGraph.cpp +++ b/llvm/lib/Analysis/CallGraph.cpp @@ -77,9 +77,10 @@ bool CallGraph::invalidate(Module &, const PreservedAnalyses &PA, void CallGraph::addToCallGraph(Function *F) { CallGraphNode *Node = getOrInsertFunction(F); - // If this function has external linkage or has its address taken, anything - // could call it. - if (!F->hasLocalLinkage() || F->hasAddressTaken()) + // If this function has external linkage or has its address taken and + // it is not a callback, then anything could call it. + if (!F->hasLocalLinkage() || + F->hasAddressTaken(nullptr, /*IgnoreCallbackUses=*/true)) ExternalCallingNode->addCalledFunction(nullptr, Node); populateCallGraphNode(Node); diff --git a/llvm/lib/Analysis/CallGraphSCCPass.cpp b/llvm/lib/Analysis/CallGraphSCCPass.cpp index fc65936024af6..91f8029cc326b 100644 --- a/llvm/lib/Analysis/CallGraphSCCPass.cpp +++ b/llvm/lib/Analysis/CallGraphSCCPass.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/AbstractCallSite.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Intrinsics.h" @@ -225,11 +226,35 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, // invalidated and removed. unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0; + CallGraphNode::iterator CGNEnd = CGN->end(); + + auto RemoveAndCheckForDone = [&](CallGraphNode::iterator I) { + // Just remove the edge from the set of callees, keep track of whether + // I points to the last element of the vector. + bool WasLast = I + 1 == CGNEnd; + CGN->removeCallEdge(I); + + // If I pointed to the last element of the vector, we have to bail out: + // iterator checking rejects comparisons of the resultant pointer with + // end. + if (WasLast) + return true; + + CGNEnd = CGN->end(); + return false; + }; + // Get the set of call sites currently in the function. - for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { - // Skip "reference" call records that do not have call instruction. + for (CallGraphNode::iterator I = CGN->begin(); I != CGNEnd;) { + // Delete "reference" call records that do not have call instruction. We + // reinsert them as needed later. However, keep them in checking mode. if (!I->first) { - ++I; + if (CheckingMode) { + ++I; + continue; + } + if (RemoveAndCheckForDone(I)) + break; continue; } @@ -258,17 +283,8 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, else ++NumDirectRemoved; - // Just remove the edge from the set of callees, keep track of whether - // I points to the last element of the vector. - bool WasLast = I + 1 == E; - CGN->removeCallEdge(I); - - // If I pointed to the last element of the vector, we have to bail out: - // iterator checking rejects comparisons of the resultant pointer with - // end. - if (WasLast) + if (RemoveAndCheckForDone(I)) break; - E = CGN->end(); continue; } @@ -296,6 +312,15 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, if (Callee && Callee->isIntrinsic()) continue; + // If we are not in checking mode, insert potential callback calls as + // references. This is not a requirement but helps to iterate over the + // functions in the right order. + if (!CheckingMode) { + forEachCallbackFunction(*Call, [&](Function *CB) { + CGN->addCalledFunction(nullptr, CG.getOrInsertFunction(CB)); + }); + } + // If this call site already existed in the callgraph, just verify it // matches up to expectations and remove it from Calls. DenseMap::iterator ExistingIt = diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index a414336fb21be..6feffcbb98e1f 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -333,10 +333,29 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, const DataLayout &DL) { do { Type *SrcTy = C->getType(); + uint64_t DestSize = DL.getTypeSizeInBits(DestTy); + uint64_t SrcSize = DL.getTypeSizeInBits(SrcTy); + if (SrcSize < DestSize) + return nullptr; + + // Catch the obvious splat cases (since all-zeros can coerce non-integral + // pointers legally). + if (C->isNullValue() && !DestTy->isX86_MMXTy()) + return Constant::getNullValue(DestTy); + if (C->isAllOnesValue() && + (DestTy->isIntegerTy() || DestTy->isFloatingPointTy() || + DestTy->isVectorTy()) && + !DestTy->isX86_MMXTy() && !DestTy->isPtrOrPtrVectorTy()) + // Get ones when the input is trivial, but + // only for supported types inside getAllOnesValue. + return Constant::getAllOnesValue(DestTy); // If the type sizes are the same and a cast is legal, just directly // cast the constant. - if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) { + // But be careful not to coerce non-integral pointers illegally. + if (SrcSize == DestSize && + DL.isNonIntegralPointerType(SrcTy->getScalarType()) == + DL.isNonIntegralPointerType(DestTy->getScalarType())) { Instruction::CastOps Cast = Instruction::BitCast; // If we are going from a pointer to int or vice versa, we spell the cast // differently. diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp index 9a3e5fa0df722..e18f681278d3a 100644 --- a/llvm/lib/Analysis/InlineAdvisor.cpp +++ b/llvm/lib/Analysis/InlineAdvisor.cpp @@ -84,7 +84,8 @@ class DefaultInlineAdvice : public InlineAdvice { } // namespace -std::unique_ptr DefaultInlineAdvisor::getAdvice(CallBase &CB) { +llvm::Optional static getDefaultInlineAdvice( + CallBase &CB, FunctionAnalysisManager &FAM, const InlineParams &Params) { Function &Caller = *CB.getCaller(); ProfileSummaryInfo *PSI = FAM.getResult(Caller) @@ -111,10 +112,16 @@ std::unique_ptr DefaultInlineAdvisor::getAdvice(CallBase &CB) { return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, GetTLI, GetBFI, PSI, RemarksEnabled ? &ORE : nullptr); }; - auto OIC = llvm::shouldInline(CB, GetInlineCost, ORE, - Params.EnableDeferral.hasValue() && - Params.EnableDeferral.getValue()); - return std::make_unique(this, CB, OIC, ORE); + return llvm::shouldInline(CB, GetInlineCost, ORE, + Params.EnableDeferral.hasValue() && + Params.EnableDeferral.getValue()); +} + +std::unique_ptr DefaultInlineAdvisor::getAdvice(CallBase &CB) { + auto OIC = getDefaultInlineAdvice(CB, FAM, Params); + return std::make_unique( + this, CB, OIC, + FAM.getResult(*CB.getCaller())); } InlineAdvice::InlineAdvice(InlineAdvisor *Advisor, CallBase &CB, diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp new file mode 100644 index 0000000000000..cffdbe4116086 --- /dev/null +++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp @@ -0,0 +1,307 @@ +//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements feature and label extraction for offline supervised learning +// of a IR to native size model. +// +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" + +#ifdef LLVM_HAVE_TF_API +#include "llvm/Analysis/Utils/TFUtils.h" +#endif +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +using namespace llvm; + +AnalysisKey InlineSizeEstimatorAnalysis::Key; + +#define DEBUG_TYPE "inline-size-estimator" + +#ifdef LLVM_HAVE_TF_API +cl::opt TFIR2NativeModelPath( + "ml-inliner-ir2native-model", cl::Hidden, + cl::desc("Path to saved model evaluating native size from IR.")); + +namespace { +unsigned getMaxInstructionID() { +#define LAST_OTHER_INST(NR) return NR; +#include "llvm/IR/Instruction.def" +} + +class IRToNativeSizeLearning { +public: + enum class NamedFeatureIndex : size_t { + InitialSize, + Blocks, + Calls, + IsLocal, + IsLinkOnceODR, + IsLinkOnce, + Loops, + MaxLoopDepth, + MaxDomTreeLevel, + + NumNamedFeatures + }; + static const size_t NumNamedFeatures = + static_cast(NamedFeatureIndex::NumNamedFeatures); + struct FunctionFeatures { + static std::vector> + ImportantInstructionSuccessions; + static const size_t FeatureCount; + + std::array NamedFeatures = {0}; + std::vector InstructionHistogram; + std::vector InstructionPairHistogram; + + void fillTensor(int32_t *Ptr) const; + int32_t &operator[](NamedFeatureIndex Pos) { + return NamedFeatures[static_cast(Pos)]; + } + }; + IRToNativeSizeLearning() = default; + + static FunctionFeatures getFunctionFeatures(Function &F, + FunctionAnalysisManager &FAM); + +private: + /// Sort once the feature tuples. + struct SortFeatureTuples { + bool IsSorted = false; + SortFeatureTuples() { + std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(), + FunctionFeatures::ImportantInstructionSuccessions.end()); + IsSorted = true; + } + }; + + static llvm::ManagedStatic TupleSorter; + + static bool ensureSortedTuples() { return TupleSorter->IsSorted; } +}; +llvm::ManagedStatic + IRToNativeSizeLearning::TupleSorter; + +// This is a point in time - we determined including these pairs of +// consecutive instructions (in the IR layout available at inline time) as +// features improves the model performance. We want to move away from manual +// feature selection. +// The vector is given in opcode pairs rather than labels because 1) labels +// weren't readily available, and 2) the successions were hand - extracted +std::vector> + IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions = + {{1, 34}, {15, 27}, {53, 53}, {53, 34}, {1, 11}, {32, 2}, {2, 48}, + {28, 48}, {1, 45}, {49, 32}, {57, 56}, {55, 53}, {1, 28}, {57, 34}, + {1, 1}, {32, 28}, {32, 15}, {49, 28}, {53, 1}, {2, 53}, {48, 34}, + {28, 53}, {2, 32}, {1, 40}, {32, 48}, {29, 56}, {56, 32}, {55, 56}, + {48, 56}, {1, 31}, {33, 34}, {2, 28}, {1, 12}, {55, 1}, {31, 31}, + {65, 1}, {33, 56}, {32, 32}, {13, 13}, {1, 26}, {13, 26}, {2, 1}, + {1, 33}, {47, 49}, {64, 1}, {2, 38}, {34, 53}, {48, 2}, {55, 34}, + {34, 32}, {1, 5}, {56, 13}, {2, 2}, {2, 49}, {33, 2}, {49, 39}, + {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29}, + {47, 15}, {13, 34}, {2, 33}, {32, 49}, {49, 34}, {56, 33}, {1, 30}, + {33, 33}, {31, 33}, {2, 29}, {56, 7}, {32, 13}, {2, 55}, {56, 56}, + {2, 34}, {1, 42}, {34, 49}, {1, 20}, {32, 33}, {1, 25}, {53, 28}, + {1, 14}, {31, 49}, {28, 2}, {2, 13}, {2, 56}, {1, 32}, {56, 53}, + {65, 65}, {33, 53}, {64, 64}, {13, 2}, {34, 33}, {1, 4}, {49, 2}, + {1, 9}, {56, 1}, {33, 1}, {53, 57}, {32, 53}, {13, 56}, {32, 56}, + {55, 55}, {1, 18}, {49, 56}, {34, 34}, {1, 7}, {56, 64}, {32, 1}, + {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32}, + {32, 40}, {1, 29}, {53, 2}, {34, 1}, {32, 34}, {49, 49}, {1, 24}, + {40, 34}, {1, 13}, {38, 34}, {29, 2}, {34, 2}, {1, 39}, {1, 22}, + {1, 27}, {49, 1}, {1, 8}, {56, 2}}; + +// We have: 9 calculated features (the features here); 1 feature for each +// instruction opcode; and 1 feature for each manually-identified sequence. +// For the latter 2, we build a histogram: we count the number of +// occurrences of each instruction opcode or succession of instructions, +// respectively. +// Note that instruction opcodes start from 1. For convenience, we also have an +// always 0 feature for the '0' opcode, hence the extra 1. +const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount = + IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions + .size() + + getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures; + +size_t getSize(Function &F, TargetTransformInfo &TTI) { + size_t Ret = 0; + for (auto &BB : F) + for (auto &I : BB) + Ret += TTI.getInstructionCost( + &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize); + return Ret; +} + +size_t getSize(Function &F, FunctionAnalysisManager &FAM) { + auto &TTI = FAM.getResult(F); + return getSize(F, TTI); +} + +unsigned getMaxDominatorTreeDepth(const Function &F, + const DominatorTree &Tree) { + unsigned Ret = 0; + for (auto &BB : F) + if (auto *TN = Tree.getNode(&BB)) + Ret = std::max(Ret, TN->getLevel()); + return Ret; +} +} // namespace + +IRToNativeSizeLearning::FunctionFeatures +IRToNativeSizeLearning::getFunctionFeatures(Function &F, + FunctionAnalysisManager &FAM) { + assert(ensureSortedTuples() && "expected lazy initialization"); + + auto &DomTree = FAM.getResult(F); + FunctionFeatures FF; + size_t InstrCount = getMaxInstructionID() + 1; + FF.InstructionHistogram.resize(InstrCount); + + FF.InstructionPairHistogram.resize( + FunctionFeatures::ImportantInstructionSuccessions.size()); + + auto StartID = 0; + auto LastID = StartID; + auto getPairIndex = [](size_t a, size_t b) { + auto I = + std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(), + FunctionFeatures::ImportantInstructionSuccessions.end(), + std::make_pair(a, b)); + if (I == FunctionFeatures::ImportantInstructionSuccessions.end()) + return -1; + return static_cast(std::distance( + FunctionFeatures::ImportantInstructionSuccessions.begin(), I)); + }; + + // We don't want debug calls, because they'd just add noise. + for (auto &BB : F) { + for (auto I = BB.instructionsWithoutDebug().begin(), + E = BB.instructionsWithoutDebug().end(); + I != E; ++I) { + auto ID = I->getOpcode(); + + ++FF.InstructionHistogram[ID]; + int PairIndex = getPairIndex(LastID, ID); + if (PairIndex >= 0) + ++FF.InstructionPairHistogram[PairIndex]; + LastID = ID; + if (isa(*I)) + ++FF[NamedFeatureIndex::Calls]; + } + } + + FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM); + FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage(); + FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage(); + FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage(); + FF[NamedFeatureIndex::Blocks] = + std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end()); + auto &LI = FAM.getResult(F); + FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end()); + for (auto &L : LI) + FF[NamedFeatureIndex::MaxLoopDepth] = + std::max(FF[NamedFeatureIndex::MaxLoopDepth], + static_cast(L->getLoopDepth())); + FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree); + return FF; +} + +void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const { + std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr); + Ptr += NamedFeatures.size(); + std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr); + Ptr += InstructionHistogram.size(); + std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(), + Ptr); +} + +bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { + return !TFIR2NativeModelPath.empty(); +} + +InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() { + if (!isEvaluatorRequested()) { + return; + } + std::vector InputNames{"serving_default_input_1"}; + std::vector OutputName{"StatefulPartitionedCall"}; + Evaluator = std::make_unique( + TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName); + if (!Evaluator || !Evaluator->isValid()) { + Evaluator.reset(); + return; + } + static const std::vector Dim{ + 1, static_cast( + IRToNativeSizeLearning::FunctionFeatures::FeatureCount)}; + + Evaluator->initInput(0, Dim); +} + +InlineSizeEstimatorAnalysis::Result +InlineSizeEstimatorAnalysis::run(const Function &F, + FunctionAnalysisManager &FAM) { + if (!Evaluator) + return None; + auto Features = IRToNativeSizeLearning::getFunctionFeatures( + const_cast(F), FAM); + int32_t *V = Evaluator->getInput(0); + Features.fillTensor(V); + auto ER = Evaluator->evaluate(); + if (!ER) + return None; + float Ret = *ER->getTensorValue(0); + if (Ret < 0.0) + Ret = 0.0; + return static_cast(Ret); +} + +InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} +InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis( + InlineSizeEstimatorAnalysis &&Other) + : Evaluator(std::move(Other.Evaluator)) {} + +#else +namespace llvm { +class TFModelEvaluator {}; +} // namespace llvm +InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {} +InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis( + InlineSizeEstimatorAnalysis &&) {} +InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} +InlineSizeEstimatorAnalysis::Result +InlineSizeEstimatorAnalysis::run(const Function &F, + FunctionAnalysisManager &FAM) { + return None; +} +bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; } +#endif + +PreservedAnalyses +InlineSizeEstimatorAnalysisPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName() + << ": " << AM.getResult(F) << "\n"; + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 0975a65d183e4..8fbcee84a1567 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -3703,6 +3703,13 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; } } + + // LHS == Inf + if (Pred == FCmpInst::FCMP_OEQ && isKnownNeverInfinity(LHS, Q.TLI)) + return getFalse(RetTy); + // LHS != Inf + if (Pred == FCmpInst::FCMP_UNE && isKnownNeverInfinity(LHS, Q.TLI)) + return getTrue(RetTy); } if (C->isNegative() && !C->isNegZero()) { assert(!C->isNaN() && "Unexpected NaN constant!"); @@ -4118,15 +4125,9 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, if (TrueVal == FalseVal) return TrueVal; - // If the true or false value is undef, we can fold to the other value as - // long as the other value isn't poison. - // select ?, undef, X -> X - if (isa(TrueVal) && - isGuaranteedNotToBeUndefOrPoison(FalseVal, Q.CxtI, Q.DT)) + if (isa(TrueVal)) // select ?, undef, X -> X return FalseVal; - // select ?, X, undef -> X - if (isa(FalseVal) && - isGuaranteedNotToBeUndefOrPoison(TrueVal, Q.CxtI, Q.DT)) + if (isa(FalseVal)) // select ?, X, undef -> X return TrueVal; // Deal with partial undef vector constants: select ?, VecC, VecC' --> VecC'' @@ -4146,11 +4147,9 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, // one element is undef, choose the defined element as the safe result. if (TEltC == FEltC) NewC.push_back(TEltC); - else if (isa(TEltC) && - isGuaranteedNotToBeUndefOrPoison(FEltC)) + else if (isa(TEltC)) NewC.push_back(FEltC); - else if (isa(FEltC) && - isGuaranteedNotToBeUndefOrPoison(TEltC)) + else if (isa(FEltC)) NewC.push_back(TEltC); else break; diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index fb14008a2b471..f5ffa7286b3b8 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -330,11 +330,11 @@ class LazyValueInfoAnnotatedWriter : public AssemblyAnnotationWriter { LazyValueInfoAnnotatedWriter(LazyValueInfoImpl *L, DominatorTree &DTree) : LVIImpl(L), DT(DTree) {} - virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, - formatted_raw_ostream &OS); + void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS) override; - virtual void emitInstructionAnnot(const Instruction *I, - formatted_raw_ostream &OS); + void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) override; }; } namespace { diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp index 0b61b1c0eabd7..204f855d28b33 100644 --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -677,7 +677,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { // No interprocedural analysis is done at the moment. - if (!A.hasPassPointeeByValueAttr()) { + if (!A.hasPassPointeeByValueCopyAttr()) { ++ObjectVisitorArgument; return unknown(); } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 48c686b732608..755a4e9685211 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -11937,6 +11937,11 @@ ScalarEvolutionVerifierPass::run(Function &F, FunctionAnalysisManager &AM) { PreservedAnalyses ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { + // For compatibility with opt's -analyze feature under legacy pass manager + // which was not ported to NPM. This keeps tests using + // update_analyze_test_checks.py working. + OS << "Printing analysis 'Scalar Evolution Analysis' for function '" + << F.getName() << "':\n"; AM.getResult(F).print(OS); return PreservedAnalyses::all(); } diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp new file mode 100644 index 0000000000000..19e6d626e2386 --- /dev/null +++ b/llvm/lib/Analysis/TFUtils.cpp @@ -0,0 +1,289 @@ +//===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities for interfacing with tensorflow C APIs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Utils/TFUtils.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/raw_ostream.h" + +#include "tensorflow/c/c_api.h" +#include "tensorflow/c/c_api_experimental.h" + +#include + +using namespace llvm; + +namespace { + +using TFGraphPtr = std::unique_ptr; +using TFSessionOptionsPtr = + std::unique_ptr; +using TFStatusPtr = std::unique_ptr; + +struct TFInitializer { + TFInitializer() { + assert(!IsInitialized && "TFInitialized should be called only once"); + int Argc = 1; + const char *Name = ""; + const char **NamePtr = &Name; + TF_InitMain(Name, &Argc, const_cast(&NamePtr)); + IsInitialized = true; + } + bool IsInitialized = false; +}; + +llvm::ManagedStatic TFLibInitializer; + +bool ensureInitTF() { return TFLibInitializer->IsInitialized; } + +TFGraphPtr createTFGraph() { + return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph); +} + +TFStatusPtr createTFStatus() { + return TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus); +} + +TFSessionOptionsPtr createTFSessionOptions() { + return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions); +} +} // namespace + +namespace llvm { +class EvaluationResultImpl { +public: + EvaluationResultImpl(size_t OutputSize) + : OutputSize(OutputSize), Output(OutputSize){}; + + ~EvaluationResultImpl() { + for (auto *P : Output) + if (P) + TF_DeleteTensor(P); + } + + EvaluationResultImpl(const EvaluationResultImpl &) = delete; + EvaluationResultImpl(EvaluationResultImpl &&Other) = delete; + std::vector &getOutput() { return Output; } + +private: + const size_t OutputSize; + std::vector Output; +}; + +class TFModelEvaluatorImpl { +public: + TFModelEvaluatorImpl(StringRef SavedModelPath, + const std::vector &InputNames, + const std::vector &OutputNames, + const char *Tags); + + bool isValid() const { return IsValid; } + size_t OutputSize() const { return OutputFeed.size(); } + + void evaluate(TF_Tensor **Output, TF_Status *Status) { + TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(), + Input.size(), OutputFeed.data(), Output, OutputFeed.size(), + nullptr, 0, nullptr, Status); + } + + void initInput(size_t Index, TF_DataType Type, + const std::vector &Dimensions); + const std::vector &getInput() const { return Input; } + + ~TFModelEvaluatorImpl(); + +private: + /// The objects necessary for carrying out an evaluation of the SavedModel. + /// They are expensive to set up, and we maintain them accross all the + /// evaluations of the model. + TF_Session *Session = nullptr; + TFGraphPtr Graph; + TFSessionOptionsPtr Options; + + /// The specification of the input nodes. + std::vector InputFeed; + + /// The input tensors. They must match by index of the corresponding InputFeed + /// value. We set up the tensors once and just mutate theirs scalars before + /// each evaluation. The input tensors keep their value after an evaluation. + std::vector Input; + + /// The specification of the output nodes. When evaluating, the tensors in the + /// output tensor vector must match by index the corresponding element in the + /// OutputFeed. + std::vector OutputFeed; + + void invalidate() { IsValid = false; } + + bool IsValid = true; + + /// Reusable utility for ensuring we can bind the requested Name to a node in + /// the SavedModel Graph. + bool checkReportAndInvalidate(const TF_Output &Output, StringRef Name); +}; +} // namespace llvm + +TFModelEvaluatorImpl::TFModelEvaluatorImpl( + StringRef SavedModelPath, const std::vector &InputNames, + const std::vector &OutputNames, const char *Tags) + : Graph(createTFGraph()), Options(createTFSessionOptions()), + InputFeed(InputNames.size()), Input(InputNames.size()), + OutputFeed(OutputNames.size()) { + if (!ensureInitTF()) { + errs() << "Tensorflow should have been initialized"; + return; + } + auto Status = createTFStatus(); + + Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr, + SavedModelPath.str().c_str(), &Tags, 1, + Graph.get(), nullptr, Status.get()); + if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { + errs() << TF_Message(Status.get()); + invalidate(); + } + for (size_t I = 0; I < InputNames.size(); ++I) { + InputFeed[I] = { + TF_GraphOperationByName(Graph.get(), (InputNames[I]).c_str()), 0}; + if (!checkReportAndInvalidate(InputFeed[I], InputNames[I])) + return; + } + for (size_t I = 0; I < OutputNames.size(); ++I) { + OutputFeed[I] = { + TF_GraphOperationByName(Graph.get(), (OutputNames[I]).c_str()), 0}; + if (!checkReportAndInvalidate(OutputFeed[I], OutputNames[I])) + return; + } +} + +TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath, + const std::vector &InputNames, + const std::vector &OutputNames, + const char *Tags) + : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputNames, OutputNames, + Tags)) { + if (!Impl->isValid()) + Impl.reset(); +} + +TFModelEvaluatorImpl::~TFModelEvaluatorImpl() { + for (auto *T : Input) { + TF_DeleteTensor(T); + } + if (Session == nullptr) + return; + auto Status = createTFStatus(); + TF_DeleteSession(Session, Status.get()); + Session = nullptr; + if (TF_GetCode(Status.get()) != TF_Code::TF_OK) + errs() << "Could not delete TF session"; +} + +bool TFModelEvaluatorImpl::checkReportAndInvalidate(const TF_Output &Output, + StringRef Name) { + if (Output.oper) + return true; + errs() << "Could not find TF_Output named: " + Name; + IsValid = false; + return IsValid; +} + +Optional TFModelEvaluator::evaluate() { + if (!isValid()) + return None; + std::unique_ptr Ret = + std::make_unique(Impl->OutputSize()); + auto Status = createTFStatus(); + Impl->evaluate(Ret->getOutput().data(), Status.get()); + if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { + errs() << TF_Message(Status.get()); + Impl.reset(); + return None; + } + return EvaluationResult(std::move(Ret)); +} + +void TFModelEvaluatorImpl::initInput(size_t Index, TF_DataType Type, + const std::vector &Dimensions) { + int64_t TotalSize = TF_DataTypeSize(Type); + for (auto &D : Dimensions) + TotalSize *= D; + + Input[Index] = + TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize); + std::memset(TF_TensorData(Input[Index]), 0, TotalSize); +} + +void *TFModelEvaluator::getUntypedInput(size_t Index) { + return TF_TensorData(Impl->getInput()[Index]); +} + +TFModelEvaluator::EvaluationResult::EvaluationResult( + std::unique_ptr Impl) + : Impl(std::move(Impl)) {} + +TFModelEvaluator::EvaluationResult::EvaluationResult(EvaluationResult &&Other) + : Impl(std::move(Other.Impl)) {} + +void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) { + return TF_TensorData(Impl->getOutput()[Index]); +} + +void TFModelEvaluator::initInput(size_t Index, int TypeIndex, + const std::vector &Dimensions) { + Impl->initInput(Index, static_cast(TypeIndex), Dimensions); +} + +template <> int TFModelEvaluator::getModelTypeIndex() { + return TF_FLOAT; +} + +template <> int TFModelEvaluator::getModelTypeIndex() { + return TF_DOUBLE; +} + +template <> int TFModelEvaluator::getModelTypeIndex() { + return TF_INT8; +} + +template <> int TFModelEvaluator::getModelTypeIndex() { + return TF_UINT8; +} + +template <> int TFModelEvaluator::getModelTypeIndex() { + return TF_INT16; +} + +template <> int TFModelEvaluator::getModelTypeIndex() { + return TF_UINT16; +} + +template <> int TFModelEvaluator::getModelTypeIndex() { + return TF_INT32; +} + +template <> int TFModelEvaluator::getModelTypeIndex() { + return TF_UINT32; +} + +template <> int TFModelEvaluator::getModelTypeIndex() { + return TF_INT64; +} + +template <> int TFModelEvaluator::getModelTypeIndex() { + return TF_UINT64; +} + +TFModelEvaluator::EvaluationResult::~EvaluationResult() {} +TFModelEvaluator::~TFModelEvaluator() {} diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 60cfb04634c4a..0b465d3c31773 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -1228,6 +1228,15 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_ZdaPvmSt11align_val_t: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy()); + // void __atomic_load(size_t, void *, void *, int) + case LibFunc_atomic_load: + // void __atomic_store(size_t, void *, void *, int) + case LibFunc_atomic_store: + return (NumParams == 4 && FTy.getParamType(0)->isIntegerTy() && + FTy.getParamType(1)->isPointerTy() && + FTy.getParamType(2)->isPointerTy() && + FTy.getParamType(3)->isIntegerTy()); + case LibFunc_memset_pattern16: return (!FTy.isVarArg() && NumParams == 3 && FTy.getParamType(0)->isPointerTy() && diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 614c8bb2f1e64..380022c10acec 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2353,15 +2353,20 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, return false; // Check for pointer simplifications. - if (V->getType()->isPointerTy()) { + + if (PointerType *PtrTy = dyn_cast(V->getType())) { // Alloca never returns null, malloc might. if (isa(V) && Q.DL.getAllocaAddrSpace() == 0) return true; - // A byval, inalloca, or nonnull argument is never null. - if (const Argument *A = dyn_cast(V)) - if (A->hasPassPointeeByValueAttr() || A->hasNonNullAttr()) + // A byval, inalloca may not be null in a non-default addres space. A + // nonnull argument is assumed never 0. + if (const Argument *A = dyn_cast(V)) { + if (((A->hasPassPointeeByValueCopyAttr() && + !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) || + A->hasNonNullAttr())) return true; + } // A Load tagged with nonnull metadata is never null. if (const LoadInst *LI = dyn_cast(V)) @@ -3133,21 +3138,14 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB, if (F->isIntrinsic()) return F->getIntrinsicID(); - if (!TLI) - return Intrinsic::not_intrinsic; - + // We are going to infer semantics of a library function based on mapping it + // to an LLVM intrinsic. Check that the library function is available from + // this callbase and in this environment. LibFunc Func; - // We're going to make assumptions on the semantics of the functions, check - // that the target knows that it's available in this environment and it does - // not have local linkage. - if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(*F, Func)) + if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) || + !CB.onlyReadsMemory()) return Intrinsic::not_intrinsic; - if (!CB.onlyReadsMemory()) - return Intrinsic::not_intrinsic; - - // Otherwise check if we have a call to a function that can be turned into a - // vector intrinsic. switch (Func) { default: break; @@ -3368,13 +3366,28 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V, switch (IID) { default: break; - case Intrinsic::maxnum: - return (isKnownNeverNaN(I->getOperand(0), TLI) && - cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, - SignBitOnly, Depth + 1)) || - (isKnownNeverNaN(I->getOperand(1), TLI) && - cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, - SignBitOnly, Depth + 1)); + case Intrinsic::maxnum: { + Value *V0 = I->getOperand(0), *V1 = I->getOperand(1); + auto isPositiveNum = [&](Value *V) { + if (SignBitOnly) { + // With SignBitOnly, this is tricky because the result of + // maxnum(+0.0, -0.0) is unspecified. Just check if the operand is + // a constant strictly greater than 0.0. + const APFloat *C; + return match(V, m_APFloat(C)) && + *C > APFloat::getZero(C->getSemantics()); + } + + // -0.0 compares equal to 0.0, so if this operand is at least -0.0, + // maxnum can't be ordered-less-than-zero. + return isKnownNeverNaN(V, TLI) && + cannotBeOrderedLessThanZeroImpl(V, TLI, false, Depth + 1); + }; + + // TODO: This could be improved. We could also check that neither operand + // has its sign bit set (and at least 1 is not-NAN?). + return isPositiveNum(V0) || isPositiveNum(V1); + } case Intrinsic::maximum: return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, @@ -4652,31 +4665,30 @@ bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO, return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch); } -bool llvm::canCreatePoison(const Instruction *I) { +static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) { // See whether I has flags that may create poison - if (isa(I) && - (I->hasNoSignedWrap() || I->hasNoUnsignedWrap())) - return true; - if (isa(I) && I->isExact()) - return true; - if (auto *FP = dyn_cast(I)) { + if (const auto *OvOp = dyn_cast(Op)) { + if (OvOp->hasNoSignedWrap() || OvOp->hasNoUnsignedWrap()) + return true; + } + if (const auto *ExactOp = dyn_cast(Op)) + if (ExactOp->isExact()) + return true; + if (const auto *FP = dyn_cast(Op)) { auto FMF = FP->getFastMathFlags(); if (FMF.noNaNs() || FMF.noInfs()) return true; } - if (auto *GEP = dyn_cast(I)) - if (GEP->isInBounds()) - return true; - unsigned Opcode = I->getOpcode(); + unsigned Opcode = Op->getOpcode(); - // Check whether opcode is a poison-generating operation + // Check whether opcode is a poison/undef-generating operation switch (Opcode) { case Instruction::Shl: case Instruction::AShr: case Instruction::LShr: { // Shifts return poison if shiftwidth is larger than the bitwidth. - if (auto *C = dyn_cast(I->getOperand(1))) { + if (auto *C = dyn_cast(Op->getOperand(1))) { SmallVector ShiftAmounts; if (auto *FVTy = dyn_cast(C->getType())) { unsigned NumElts = FVTy->getNumElements(); @@ -4702,41 +4714,62 @@ bool llvm::canCreatePoison(const Instruction *I) { return true; case Instruction::Call: case Instruction::CallBr: - case Instruction::Invoke: - // Function calls can return a poison value even if args are non-poison - // values. - return true; + case Instruction::Invoke: { + const auto *CB = cast(Op); + return !CB->hasRetAttr(Attribute::NoUndef); + } case Instruction::InsertElement: case Instruction::ExtractElement: { // If index exceeds the length of the vector, it returns poison - auto *VTy = cast(I->getOperand(0)->getType()); - unsigned IdxOp = I->getOpcode() == Instruction::InsertElement ? 2 : 1; - auto *Idx = dyn_cast(I->getOperand(IdxOp)); + auto *VTy = cast(Op->getOperand(0)->getType()); + unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1; + auto *Idx = dyn_cast(Op->getOperand(IdxOp)); if (!Idx || Idx->getZExtValue() >= VTy->getElementCount().Min) return true; return false; } + case Instruction::ShuffleVector: { + // shufflevector may return undef. + if (PoisonOnly) + return false; + ArrayRef Mask = isa(Op) + ? cast(Op)->getShuffleMask() + : cast(Op)->getShuffleMask(); + return any_of(Mask, [](int Elt) { return Elt == UndefMaskElem; }); + } case Instruction::FNeg: case Instruction::PHI: case Instruction::Select: case Instruction::URem: case Instruction::SRem: - case Instruction::ShuffleVector: case Instruction::ExtractValue: case Instruction::InsertValue: case Instruction::Freeze: case Instruction::ICmp: case Instruction::FCmp: - case Instruction::GetElementPtr: return false; - default: - if (isa(I)) + case Instruction::GetElementPtr: { + const auto *GEP = cast(Op); + return GEP->isInBounds(); + } + default: { + const auto *CE = dyn_cast(Op); + if (isa(Op) || (CE && CE->isCast())) return false; - else if (isa(I)) + else if (isa(Op)) return false; // Be conservative and return true. return true; } + } +} + +bool llvm::canCreateUndefOrPoison(const Operator *Op) { + return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/false); +} + +bool llvm::canCreatePoison(const Operator *Op) { + return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/true); } bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, @@ -4753,6 +4786,12 @@ bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, // TODO: Some instructions are guaranteed to return neither undef // nor poison if their arguments are not poison/undef. + if (auto *A = dyn_cast(V)) { + // NoUndef does not guarantee that paddings are not undef. + if (A->hasAttribute(Attribute::NoUndef)) + return true; + } + if (auto *C = dyn_cast(V)) { // TODO: We can analyze ConstExpr by opcode to determine if there is any // possibility of poison. diff --git a/llvm/lib/Analysis/models/inliner/saved_model.pb b/llvm/lib/Analysis/models/inliner/saved_model.pb deleted file mode 100644 index 5488989454f72..0000000000000 Binary files a/llvm/lib/Analysis/models/inliner/saved_model.pb and /dev/null differ diff --git a/llvm/lib/Analysis/models/inliner/saved_model.pbtxt b/llvm/lib/Analysis/models/inliner/saved_model.pbtxt new file mode 100644 index 0000000000000..ec522a8b7c353 --- /dev/null +++ b/llvm/lib/Analysis/models/inliner/saved_model.pbtxt @@ -0,0 +1,32634 @@ +saved_model_schema_version: 1 +meta_graphs { + meta_info_def { + stripped_op_list { + op { + name: "Const" + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "value" + type: "tensor" + } + attr { + name: "dtype" + type: "type" + } + } + op { + name: "NoOp" + } + op { + name: "PartitionedCall" + input_arg { + name: "args" + type_list_attr: "Tin" + } + output_arg { + name: "output" + type_list_attr: "Tout" + } + attr { + name: "Tin" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tout" + type: "list(type)" + has_minimum: true + } + attr { + name: "f" + type: "func" + } + attr { + name: "config" + type: "string" + default_value { + s: "" + } + } + attr { + name: "config_proto" + type: "string" + default_value { + s: "" + } + } + attr { + name: "executor_type" + type: "string" + default_value { + s: "" + } + } + } + op { + name: "Placeholder" + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + attr { + name: "shape" + type: "shape" + default_value { + shape { + unknown_rank: true + } + } + } + } + op { + name: "ReadVariableOp" + input_arg { + name: "resource" + type: DT_RESOURCE + } + output_arg { + name: "value" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + is_stateful: true + } + op { + name: "StatefulPartitionedCall" + input_arg { + name: "args" + type_list_attr: "Tin" + } + output_arg { + name: "output" + type_list_attr: "Tout" + } + attr { + name: "Tin" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tout" + type: "list(type)" + has_minimum: true + } + attr { + name: "f" + type: "func" + } + attr { + name: "config" + type: "string" + default_value { + s: "" + } + } + attr { + name: "config_proto" + type: "string" + default_value { + s: "" + } + } + attr { + name: "executor_type" + type: "string" + default_value { + s: "" + } + } + is_stateful: true + } + op { + name: "VarHandleOp" + output_arg { + name: "resource" + type: DT_RESOURCE + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "dtype" + type: "type" + } + attr { + name: "shape" + type: "shape" + } + attr { + name: "allowed_devices" + type: "list(string)" + default_value { + list { + } + } + } + is_stateful: true + } + } + tags: "serve" + tensorflow_version: "1.15.0" + tensorflow_git_version: "unknown" + stripped_default_attrs: true + } + graph_def { + node { + name: "train_step" + op: "VarHandleOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + } + } + } + attr { + key: "shared_name" + value { + s: "train_step" + } + } + } + node { + name: "train_step/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "train_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense/kernel" + op: "VarHandleOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 34 + } + dim { + size: 100 + } + } + } + } + attr { + key: "shared_name" + value { + s: "QNetwork/EncodingNetwork/dense/kernel" + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense/kernel/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense/kernel" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 34 + } + dim { + size: 100 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense/bias" + op: "VarHandleOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 100 + } + } + } + } + attr { + key: "shared_name" + value { + s: "QNetwork/EncodingNetwork/dense/bias" + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense/bias/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense/bias" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 100 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense_1/kernel" + op: "VarHandleOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 100 + } + dim { + size: 40 + } + } + } + } + attr { + key: "shared_name" + value { + s: "QNetwork/EncodingNetwork/dense_1/kernel" + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense_1/kernel/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense_1/kernel" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 100 + } + dim { + size: 40 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense_1/bias" + op: "VarHandleOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 40 + } + } + } + } + attr { + key: "shared_name" + value { + s: "QNetwork/EncodingNetwork/dense_1/bias" + } + } + } + node { + name: "QNetwork/EncodingNetwork/dense_1/bias/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense_1/bias" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 40 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + } + node { + name: "QNetwork/dense_2/kernel" + op: "VarHandleOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 40 + } + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "QNetwork/dense_2/kernel" + } + } + } + node { + name: "QNetwork/dense_2/kernel/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "QNetwork/dense_2/kernel" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 40 + } + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + } + node { + name: "QNetwork/dense_2/bias" + op: "VarHandleOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "QNetwork/dense_2/bias" + } + } + } + node { + name: "QNetwork/dense_2/bias/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "QNetwork/dense_2/bias" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + } + node { + name: "NoOp" + op: "NoOp" + } + node { + name: "Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "\nu\n\023\010\001\022\017_time_step_spec\n\024\010\002\022\020_trajectory_spec\n\023\010\003\022\017_wrapped_policy\n\016\010\004\022\ntrain_step\n\023\010\005\022\017model_variables\n\016\010\006\022\nsignatures\n\030\n\017\010\007\022\013observation\n\005\010\007\022\0013\n\030\n\017\010\007\022\013observation\n\005\010\007\022\0011\n;\n\016\010\010\022\n_q_network\n\023\010\001\022\017_time_step_spec\n\024\010\t\022\020_trajectory_spec\nE\022C\n\016VARIABLE_VALUE\022\ntrain_step\032%train_step/.ATTRIBUTES/VARIABLE_VALUE\n*\n\005\010\n\022\0010\n\005\010\013\022\0011\n\005\010\014\022\0012\n\005\010\r\022\0013\n\005\010\016\022\0014\n\005\010\017\022\0015\n\000\n\000\n\214\001\n\026\010\020\022\022_input_tensor_spec\n\014\010\021\022\010_encoder\n\022\010\022\022\016_q_value_layer\n\r\010\023\022\tvariables\n\031\010\024\022\025regularization_losses\n\027\010\025\022\023trainable_variables\n\r\010\026\022\tkeras_api\n\030\n\017\010\007\022\013observation\n\005\010\007\022\0011\ng\022e\n\016VARIABLE_VALUE\022%QNetwork/EncodingNetwork/dense/kernel\032,model_variables/0/.ATTRIBUTES/VARIABLE_VALUE\ne\022c\n\016VARIABLE_VALUE\022#QNetwork/EncodingNetwork/dense/bias\032,model_variables/1/.ATTRIBUTES/VARIABLE_VALUE\ni\022g\n\016VARIABLE_VALUE\022\'QNetwork/EncodingNetwork/dense_1/kernel\032,model_variables/2/.ATTRIBUTES/VARIABLE_VALUE\ng\022e\n\016VARIABLE_VALUE\022%QNetwork/EncodingNetwork/dense_1/bias\032,model_variables/3/.ATTRIBUTES/VARIABLE_VALUE\nY\022W\n\016VARIABLE_VALUE\022\027QNetwork/dense_2/kernel\032,model_variables/4/.ATTRIBUTES/VARIABLE_VALUE\nW\022U\n\016VARIABLE_VALUE\022\025QNetwork/dense_2/bias\032,model_variables/5/.ATTRIBUTES/VARIABLE_VALUE\n\000\n\334\001\n\026\010\027\022\022_input_tensor_spec\n\027\010\030\022\023_preprocessing_nest\n\036\010\031\022\032_flat_preprocessing_layers\n\033\010\032\022\027_preprocessing_combiner\n\032\010\033\022\026_postprocessing_layers\n\r\010\034\022\tvariables\n\031\010\035\022\025regularization_losses\n\027\010\036\022\023trainable_variables\n\r\010\037\022\tkeras_api\nh\n\n\010\016\022\006kernel\n\010\010\017\022\004bias\n\r\010 \022\tvariables\n\031\010!\022\025regularization_losses\n\027\010\"\022\023trainable_variables\n\r\010#\022\tkeras_api\n*\n\005\010\n\022\0010\n\005\010\013\022\0011\n\005\010\014\022\0012\n\005\010\r\022\0013\n\005\010\016\022\0014\n\005\010\017\022\0015\n\000\n*\n\005\010\n\022\0010\n\005\010\013\022\0011\n\005\010\014\022\0012\n\005\010\r\022\0013\n\005\010\016\022\0014\n\005\010\017\022\0015\n\255\001\n\021\010$\022\rlayer_metrics\n\r\010\023\022\tvariables\n\037\010%\022\033layer_regularization_losses\n\013\010&\022\007metrics\n\n\010\'\022\006layers\n\031\010\024\022\025regularization_losses\n\033\010(\022\027non_trainable_variables\n\027\010\025\022\023trainable_variables\n\000\n\000\nV\n\005\010)\022\0010\n\005\010*\022\0011\n\005\010+\022\0012\n\005\010,\022\0013\n\005\010-\022\0014\n\005\010.\022\0015\n\005\010/\022\0016\n\005\0100\022\0017\n\005\0101\022\0018\n\005\0102\022\0019\n\006\0103\022\00210\n\006\0104\022\00211\nR\n\r\0105\022\tvariables\n\031\0106\022\025regularization_losses\n\027\0107\022\023trainable_variables\n\r\0108\022\tkeras_api\n\025\n\005\0109\022\0010\n\005\010:\022\0011\n\005\010;\022\0012\n\034\n\005\010\n\022\0010\n\005\010\013\022\0011\n\005\010\014\022\0012\n\005\010\r\022\0013\n\000\n\034\n\005\010\n\022\0010\n\005\010\013\022\0011\n\005\010\014\022\0012\n\005\010\r\022\0013\n\255\001\n\021\010<\022\rlayer_metrics\n\r\010\034\022\tvariables\n\037\010=\022\033layer_regularization_losses\n\013\010>\022\007metrics\n\n\010?\022\006layers\n\031\010\035\022\025regularization_losses\n\033\010@\022\027non_trainable_variables\n\027\010\036\022\023trainable_variables\n\016\n\005\010\016\022\0010\n\005\010\017\022\0011\n\000\n\016\n\005\010\016\022\0010\n\005\010\017\022\0011\n\255\001\n\021\010A\022\rlayer_metrics\n\r\010 \022\tvariables\n\037\010B\022\033layer_regularization_losses\n\013\010C\022\007metrics\n\n\010D\022\006layers\n\031\010!\022\025regularization_losses\n\033\010E\022\027non_trainable_variables\n\027\010\"\022\023trainable_variables\n\000\n\000\n\000\n\016\n\005\010\021\022\0010\n\005\010\022\022\0011\n\000\nR\n\r\010F\022\tvariables\n\031\010G\022\025regularization_losses\n\027\010H\022\023trainable_variables\n\r\010I\022\tkeras_api\nR\n\r\010J\022\tvariables\n\031\010K\022\025regularization_losses\n\027\010L\022\023trainable_variables\n\r\010M\022\tkeras_api\nR\n\r\010N\022\tvariables\n\031\010O\022\025regularization_losses\n\027\010P\022\023trainable_variables\n\r\010Q\022\tkeras_api\nR\n\r\010R\022\tvariables\n\031\010S\022\025regularization_losses\n\027\010T\022\023trainable_variables\n\r\010U\022\tkeras_api\nR\n\r\010V\022\tvariables\n\031\010W\022\025regularization_losses\n\027\010X\022\023trainable_variables\n\r\010Y\022\tkeras_api\nR\n\r\010Z\022\tvariables\n\031\010[\022\025regularization_losses\n\027\010\\\022\023trainable_variables\n\r\010]\022\tkeras_api\nR\n\r\010^\022\tvariables\n\031\010_\022\025regularization_losses\n\027\010`\022\023trainable_variables\n\r\010a\022\tkeras_api\nR\n\r\010b\022\tvariables\n\031\010c\022\025regularization_losses\n\027\010d\022\023trainable_variables\n\r\010e\022\tkeras_api\nR\n\r\010f\022\tvariables\n\031\010g\022\025regularization_losses\n\027\010h\022\023trainable_variables\n\r\010i\022\tkeras_api\nR\n\r\010j\022\tvariables\n\031\010k\022\025regularization_losses\n\027\010l\022\023trainable_variables\n\r\010m\022\tkeras_api\nR\n\r\010n\022\tvariables\n\031\010o\022\025regularization_losses\n\027\010p\022\023trainable_variables\n\r\010q\022\tkeras_api\nR\n\r\010r\022\tvariables\n\031\010s\022\025regularization_losses\n\027\010t\022\023trainable_variables\n\r\010u\022\tkeras_api\n\000\n\000\n\000\n\255\001\n\021\010v\022\rlayer_metrics\n\r\0105\022\tvariables\n\037\010w\022\033layer_regularization_losses\n\013\010x\022\007metrics\n\n\010y\022\006layers\n\031\0106\022\025regularization_losses\n\033\010z\022\027non_trainable_variables\n\027\0107\022\023trainable_variables\nR\n\r\010{\022\tvariables\n\031\010|\022\025regularization_losses\n\027\010}\022\023trainable_variables\n\r\010~\022\tkeras_api\nk\n\n\010\n\022\006kernel\n\010\010\013\022\004bias\n\r\010\177\022\tvariables\n\032\010\200\001\022\025regularization_losses\n\030\010\201\001\022\023trainable_variables\n\016\010\202\001\022\tkeras_api\nl\n\n\010\014\022\006kernel\n\010\010\r\022\004bias\n\016\010\203\001\022\tvariables\n\032\010\204\001\022\025regularization_losses\n\030\010\205\001\022\023trainable_variables\n\016\010\206\001\022\tkeras_api\n\000\n\000\n\000\nv\n\005\010)\022\0010\n\005\010*\022\0011\n\005\010+\022\0012\n\005\010,\022\0013\n\005\010-\022\0014\n\005\010.\022\0015\n\005\010/\022\0016\n\005\0100\022\0017\n\005\0101\022\0018\n\005\0102\022\0019\n\006\0103\022\00210\n\006\0104\022\00211\n\006\010\032\022\00212\n\006\0109\022\00213\n\006\010:\022\00214\n\006\010;\022\00215\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\262\001\n\022\010\207\001\022\rlayer_metrics\n\r\010F\022\tvariables\n \010\210\001\022\033layer_regularization_losses\n\014\010\211\001\022\007metrics\n\013\010\212\001\022\006layers\n\031\010G\022\025regularization_losses\n\034\010\213\001\022\027non_trainable_variables\n\027\010H\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\214\001\022\rlayer_metrics\n\r\010J\022\tvariables\n \010\215\001\022\033layer_regularization_losses\n\014\010\216\001\022\007metrics\n\013\010\217\001\022\006layers\n\031\010K\022\025regularization_losses\n\034\010\220\001\022\027non_trainable_variables\n\027\010L\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\221\001\022\rlayer_metrics\n\r\010N\022\tvariables\n \010\222\001\022\033layer_regularization_losses\n\014\010\223\001\022\007metrics\n\013\010\224\001\022\006layers\n\031\010O\022\025regularization_losses\n\034\010\225\001\022\027non_trainable_variables\n\027\010P\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\226\001\022\rlayer_metrics\n\r\010R\022\tvariables\n \010\227\001\022\033layer_regularization_losses\n\014\010\230\001\022\007metrics\n\013\010\231\001\022\006layers\n\031\010S\022\025regularization_losses\n\034\010\232\001\022\027non_trainable_variables\n\027\010T\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\233\001\022\rlayer_metrics\n\r\010V\022\tvariables\n \010\234\001\022\033layer_regularization_losses\n\014\010\235\001\022\007metrics\n\013\010\236\001\022\006layers\n\031\010W\022\025regularization_losses\n\034\010\237\001\022\027non_trainable_variables\n\027\010X\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\240\001\022\rlayer_metrics\n\r\010Z\022\tvariables\n \010\241\001\022\033layer_regularization_losses\n\014\010\242\001\022\007metrics\n\013\010\243\001\022\006layers\n\031\010[\022\025regularization_losses\n\034\010\244\001\022\027non_trainable_variables\n\027\010\\\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\245\001\022\rlayer_metrics\n\r\010^\022\tvariables\n \010\246\001\022\033layer_regularization_losses\n\014\010\247\001\022\007metrics\n\013\010\250\001\022\006layers\n\031\010_\022\025regularization_losses\n\034\010\251\001\022\027non_trainable_variables\n\027\010`\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\252\001\022\rlayer_metrics\n\r\010b\022\tvariables\n \010\253\001\022\033layer_regularization_losses\n\014\010\254\001\022\007metrics\n\013\010\255\001\022\006layers\n\031\010c\022\025regularization_losses\n\034\010\256\001\022\027non_trainable_variables\n\027\010d\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\257\001\022\rlayer_metrics\n\r\010f\022\tvariables\n \010\260\001\022\033layer_regularization_losses\n\014\010\261\001\022\007metrics\n\013\010\262\001\022\006layers\n\031\010g\022\025regularization_losses\n\034\010\263\001\022\027non_trainable_variables\n\027\010h\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\264\001\022\rlayer_metrics\n\r\010j\022\tvariables\n \010\265\001\022\033layer_regularization_losses\n\014\010\266\001\022\007metrics\n\013\010\267\001\022\006layers\n\031\010k\022\025regularization_losses\n\034\010\270\001\022\027non_trainable_variables\n\027\010l\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\271\001\022\rlayer_metrics\n\r\010n\022\tvariables\n \010\272\001\022\033layer_regularization_losses\n\014\010\273\001\022\007metrics\n\013\010\274\001\022\006layers\n\031\010o\022\025regularization_losses\n\034\010\275\001\022\027non_trainable_variables\n\027\010p\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\276\001\022\rlayer_metrics\n\r\010r\022\tvariables\n \010\277\001\022\033layer_regularization_losses\n\014\010\300\001\022\007metrics\n\013\010\301\001\022\006layers\n\031\010s\022\025regularization_losses\n\034\010\302\001\022\027non_trainable_variables\n\027\010t\022\023trainable_variables\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\262\001\n\022\010\303\001\022\rlayer_metrics\n\r\010{\022\tvariables\n \010\304\001\022\033layer_regularization_losses\n\014\010\305\001\022\007metrics\n\013\010\306\001\022\006layers\n\031\010|\022\025regularization_losses\n\034\010\307\001\022\027non_trainable_variables\n\027\010}\022\023trainable_variables\n\016\n\005\010\n\022\0010\n\005\010\013\022\0011\n\000\n\016\n\005\010\n\022\0010\n\005\010\013\022\0011\n\264\001\n\022\010\310\001\022\rlayer_metrics\n\r\010\177\022\tvariables\n \010\311\001\022\033layer_regularization_losses\n\014\010\312\001\022\007metrics\n\013\010\313\001\022\006layers\n\032\010\200\001\022\025regularization_losses\n\034\010\314\001\022\027non_trainable_variables\n\030\010\201\001\022\023trainable_variables\n\016\n\005\010\014\022\0010\n\005\010\r\022\0011\n\000\n\016\n\005\010\014\022\0010\n\005\010\r\022\0011\n\265\001\n\022\010\315\001\022\rlayer_metrics\n\016\010\203\001\022\tvariables\n \010\316\001\022\033layer_regularization_losses\n\014\010\317\001\022\007metrics\n\013\010\320\001\022\006layers\n\032\010\204\001\022\025regularization_losses\n\034\010\321\001\022\027non_trainable_variables\n\030\010\205\001\022\023trainable_variables\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000" + } + } + } + } + node { + name: "action_callee_basic_block_count" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_callee_conditionally_executed_blocks" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_callee_users" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_caller_basic_block_count" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_caller_conditionally_executed_blocks" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_caller_users" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_callsite_height" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_cost_estimate" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_discount" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_edge_count" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_inlining_default" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_node_count" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_nr_ctant_params" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_reward" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "action_step_type" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 1 + } + } + } + } + } + node { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "action_callee_basic_block_count" + input: "action_callee_conditionally_executed_blocks" + input: "action_callee_users" + input: "action_caller_basic_block_count" + input: "action_caller_conditionally_executed_blocks" + input: "action_caller_users" + input: "action_callsite_height" + input: "action_cost_estimate" + input: "action_discount" + input: "action_edge_count" + input: "action_inlining_default" + input: "action_node_count" + input: "action_nr_ctant_params" + input: "action_reward" + input: "action_step_type" + input: "QNetwork/EncodingNetwork/dense/kernel" + input: "QNetwork/EncodingNetwork/dense/bias" + input: "QNetwork/EncodingNetwork/dense_1/kernel" + input: "QNetwork/EncodingNetwork/dense_1/bias" + input: "QNetwork/dense_2/kernel" + input: "QNetwork/dense_2/bias" + attr { + key: "Tin" + value { + list { + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_FLOAT + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_FLOAT + type: DT_INT32 + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 15 + i: 16 + i: 17 + i: 18 + i: 19 + i: 20 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_signature_wrapper_4619026" + } + } + } + } + node { + name: "PartitionedCall" + op: "PartitionedCall" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_signature_wrapper_4619033" + } + } + } + } + node { + name: "StatefulPartitionedCall_1" + op: "StatefulPartitionedCall" + input: "train_step" + attr { + key: "Tin" + value { + list { + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 0 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_signature_wrapper_4619048" + } + } + } + } + node { + name: "saver_filename" + op: "Placeholder" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "shape" + value { + shape { + } + } + } + } + node { + name: "StatefulPartitionedCall_2" + op: "StatefulPartitionedCall" + input: "saver_filename" + input: "train_step/Read/ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense/kernel/Read/ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense/bias/Read/ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense_1/kernel/Read/ReadVariableOp" + input: "QNetwork/EncodingNetwork/dense_1/bias/Read/ReadVariableOp" + input: "QNetwork/dense_2/kernel/Read/ReadVariableOp" + input: "QNetwork/dense_2/bias/Read/ReadVariableOp" + input: "Const" + attr { + key: "Tin" + value { + list { + type: DT_STRING + type: DT_INT64 + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_STRING + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference__traced_save_4619143" + } + } + } + } + node { + name: "StatefulPartitionedCall_3" + op: "StatefulPartitionedCall" + input: "saver_filename" + input: "train_step" + input: "QNetwork/EncodingNetwork/dense/kernel" + input: "QNetwork/EncodingNetwork/dense/bias" + input: "QNetwork/EncodingNetwork/dense_1/kernel" + input: "QNetwork/EncodingNetwork/dense_1/bias" + input: "QNetwork/dense_2/kernel" + input: "QNetwork/dense_2/bias" + attr { + key: "Tin" + value { + list { + type: DT_STRING + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference__traced_restore_4619176" + } + } + } + } + library { + function { + signature { + name: "__inference_signature_wrapper_4619048" + input_arg { + name: "unknown" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + control_output: "StatefulPartitionedCall" + } + node_def { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "unknown" + attr { + key: "Tin" + value { + list { + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 0 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_function_with_signature_4619040" + } + } + } + experimental_debug_info { + original_node_names: "StatefulPartitionedCall" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "StatefulPartitionedCall:output:0" + input: "^StatefulPartitionedCall" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "StatefulPartitionedCall" + value: "StatefulPartitionedCall" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_function_with_signature_4619029" + } + node_def { + name: "PartitionedCall" + op: "PartitionedCall" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_function_722" + } + } + } + experimental_debug_info { + original_node_names: "PartitionedCall" + } + } + attr { + key: "_input_shapes" + value { + } + } + } + function { + signature { + name: "__inference_action_931" + input_arg { + name: "time_step" + type: DT_INT32 + } + input_arg { + name: "time_step_1" + type: DT_FLOAT + } + input_arg { + name: "time_step_2" + type: DT_FLOAT + } + input_arg { + name: "time_step_3" + type: DT_INT64 + } + input_arg { + name: "time_step_4" + type: DT_INT64 + } + input_arg { + name: "time_step_5" + type: DT_INT64 + } + input_arg { + name: "time_step_6" + type: DT_INT64 + } + input_arg { + name: "time_step_7" + type: DT_INT64 + } + input_arg { + name: "time_step_8" + type: DT_INT64 + } + input_arg { + name: "time_step_9" + type: DT_INT64 + } + input_arg { + name: "time_step_10" + type: DT_INT64 + } + input_arg { + name: "time_step_11" + type: DT_INT64 + } + input_arg { + name: "time_step_12" + type: DT_INT64 + } + input_arg { + name: "time_step_13" + type: DT_INT64 + } + input_arg { + name: "time_step_14" + type: DT_INT64 + } + input_arg { + name: "qnetwork_encodingnetwork_dense_matmul_readvariableop_resource" + type: DT_RESOURCE + } + input_arg { + name: "qnetwork_encodingnetwork_dense_biasadd_readvariableop_resource" + type: DT_RESOURCE + } + input_arg { + name: "qnetwork_encodingnetwork_dense_1_matmul_readvariableop_resource" + type: DT_RESOURCE + } + input_arg { + name: "qnetwork_encodingnetwork_dense_1_biasadd_readvariableop_resource" + type: DT_RESOURCE + } + input_arg { + name: "qnetwork_dense_2_matmul_readvariableop_resource" + type: DT_RESOURCE + } + input_arg { + name: "qnetwork_dense_2_biasadd_readvariableop_resource" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_3" + input: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 9 + f: 9 + f: 9 + f: 9 + f: 10 + f: 10 + f: 11 + f: 12 + f: 13 + f: 14 + f: 14 + f: 14 + f: 16 + f: 17 + f: 19 + f: 23 + f: 27 + f: 39 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_4" + input: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 3 + f: 3 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 7 + f: 8 + f: 8 + f: 8 + f: 8 + f: 9 + f: 10 + f: 10 + f: 10 + f: 12 + f: 12 + f: 12 + f: 14 + f: 14 + f: 18 + f: 20 + f: 23 + f: 30 + f: 41 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_1/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_1/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_1/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_1/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_1/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_1/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_1/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_1/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_1/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_1/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_1/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_1/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_5" + input: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 22 + f: 22 + f: 22 + f: 22 + f: 23 + f: 23 + f: 23 + f: 24 + f: 24 + f: 24 + f: 25 + f: 25 + f: 25 + f: 25 + f: 25 + f: 25 + f: 26 + f: 26 + f: 26 + f: 27 + f: 27 + f: 27 + f: 27 + f: 28 + f: 28 + f: 29 + f: 29 + f: 29 + f: 29 + f: 30 + f: 30 + f: 31 + f: 31 + f: 31 + f: 31 + f: 32 + f: 32 + f: 33 + f: 33 + f: 33 + f: 34 + f: 34 + f: 34 + f: 34 + f: 35 + f: 35 + f: 36 + f: 36 + f: 37 + f: 37 + f: 37 + f: 38 + f: 38 + f: 39 + f: 39 + f: 40 + f: 40 + f: 41 + f: 41 + f: 41 + f: 42 + f: 43 + f: 43 + f: 44 + f: 44 + f: 45 + f: 45 + f: 46 + f: 46 + f: 46 + f: 47 + f: 47 + f: 48 + f: 49 + f: 49 + f: 50 + f: 50 + f: 51 + f: 52 + f: 53 + f: 53 + f: 54 + f: 55 + f: 56 + f: 57 + f: 57 + f: 58 + f: 59 + f: 60 + f: 61 + f: 61 + f: 63 + f: 63 + f: 64 + f: 65 + f: 66 + f: 67 + f: 67 + f: 69 + f: 70 + f: 71 + f: 72 + f: 73 + f: 74 + f: 75 + f: 77 + f: 78 + f: 79 + f: 80 + f: 81 + f: 82 + f: 83 + f: 85 + f: 86 + f: 88 + f: 89 + f: 91 + f: 92 + f: 94 + f: 96 + f: 97 + f: 99 + f: 100 + f: 101 + f: 103 + f: 105 + f: 107 + f: 109 + f: 111 + f: 113 + f: 115 + f: 118 + f: 121 + f: 123 + f: 126 + f: 128 + f: 130 + f: 133 + f: 135 + f: 137 + f: 140 + f: 143 + f: 146 + f: 148 + f: 151 + f: 154 + f: 157 + f: 161 + f: 163 + f: 166 + f: 169 + f: 173 + f: 178 + f: 183 + f: 189 + f: 193 + f: 197 + f: 202 + f: 208 + f: 213 + f: 218 + f: 223 + f: 228 + f: 233 + f: 239 + f: 245 + f: 250 + f: 257 + f: 262 + f: 269 + f: 277 + f: 284 + f: 292 + f: 300 + f: 308 + f: 319 + f: 329 + f: 340 + f: 349 + f: 359 + f: 371 + f: 382 + f: 394 + f: 410 + f: 423 + f: 435 + f: 445 + f: 462 + f: 480 + f: 492 + f: 506 + f: 519 + f: 536 + f: 557 + f: 577 + f: 598 + f: 622 + f: 655 + f: 679 + f: 707 + f: 733 + f: 751 + f: 787 + f: 814 + f: 847 + f: 897 + f: 934 + f: 997 + f: 1062 + f: 1111 + f: 1181 + f: 1275 + f: 1385 + f: 1465 + f: 1603 + f: 1769 + f: 2057 + f: 2257 + f: 2803 + f: 3468 + f: 4417 + f: 6538 + f: 16126 + f: 23446 + f: 33536 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_2/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_2/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_2/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_2/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_2/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_2/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_2/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_2/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_2/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_2/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_2/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_2/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_6" + input: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 23 + f: 23 + f: 23 + f: 24 + f: 24 + f: 24 + f: 24 + f: 24 + f: 24 + f: 25 + f: 25 + f: 25 + f: 25 + f: 25 + f: 26 + f: 26 + f: 26 + f: 26 + f: 27 + f: 27 + f: 27 + f: 27 + f: 27 + f: 28 + f: 28 + f: 28 + f: 29 + f: 29 + f: 29 + f: 29 + f: 30 + f: 30 + f: 30 + f: 31 + f: 31 + f: 31 + f: 32 + f: 32 + f: 32 + f: 33 + f: 33 + f: 33 + f: 34 + f: 34 + f: 34 + f: 34 + f: 35 + f: 35 + f: 35 + f: 36 + f: 36 + f: 36 + f: 37 + f: 37 + f: 37 + f: 38 + f: 38 + f: 38 + f: 38 + f: 39 + f: 39 + f: 40 + f: 40 + f: 41 + f: 41 + f: 42 + f: 43 + f: 43 + f: 44 + f: 45 + f: 45 + f: 46 + f: 47 + f: 47 + f: 48 + f: 49 + f: 49 + f: 50 + f: 50 + f: 52 + f: 52 + f: 53 + f: 54 + f: 55 + f: 55 + f: 57 + f: 58 + f: 59 + f: 60 + f: 62 + f: 64 + f: 65 + f: 66 + f: 68 + f: 70 + f: 70 + f: 70 + f: 70 + f: 70 + f: 71 + f: 73 + f: 75 + f: 76 + f: 78 + f: 81 + f: 84 + f: 86 + f: 90 + f: 94 + f: 98 + f: 101 + f: 106 + f: 111 + f: 117 + f: 123 + f: 130 + f: 138 + f: 146 + f: 157 + f: 163 + f: 176 + f: 187 + f: 198 + f: 214 + f: 227 + f: 252 + f: 280 + f: 327 + f: 395 + f: 506 + f: 671 + f: 1025 + f: 1971 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_3/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_3/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_3/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_3/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_3/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_3/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_3/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_3/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_3/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_3/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_3/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_3/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_7" + input: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 5 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 7 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 11 + f: 11 + f: 11 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 13 + f: 13 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 19 + f: 19 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 21 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 24 + f: 24 + f: 24 + f: 24 + f: 24 + f: 24 + f: 24 + f: 24 + f: 25 + f: 26 + f: 26 + f: 26 + f: 26 + f: 26 + f: 26 + f: 26 + f: 26 + f: 26 + f: 26 + f: 27 + f: 28 + f: 28 + f: 28 + f: 28 + f: 28 + f: 29 + f: 30 + f: 30 + f: 30 + f: 30 + f: 30 + f: 30 + f: 31 + f: 32 + f: 32 + f: 32 + f: 32 + f: 32 + f: 34 + f: 34 + f: 34 + f: 34 + f: 34 + f: 34 + f: 35 + f: 36 + f: 36 + f: 36 + f: 37 + f: 38 + f: 38 + f: 38 + f: 39 + f: 40 + f: 40 + f: 41 + f: 42 + f: 42 + f: 43 + f: 44 + f: 44 + f: 46 + f: 46 + f: 47 + f: 48 + f: 48 + f: 50 + f: 50 + f: 52 + f: 52 + f: 54 + f: 55 + f: 55 + f: 56 + f: 57 + f: 58 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 62 + f: 62 + f: 64 + f: 65 + f: 66 + f: 68 + f: 70 + f: 72 + f: 74 + f: 77 + f: 80 + f: 82 + f: 86 + f: 89 + f: 92 + f: 96 + f: 99 + f: 104 + f: 108 + f: 114 + f: 119 + f: 125 + f: 131 + f: 139 + f: 146 + f: 157 + f: 167 + f: 176 + f: 188 + f: 198 + f: 215 + f: 236 + f: 262 + f: 306 + f: 376 + f: 462 + f: 596 + f: 942 + f: 1428 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_4/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_4/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_4/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_4/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_4/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_4/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_4/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_4/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_4/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_4/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_4/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_4/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_8" + input: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 10 + f: 10 + f: 11 + f: 11 + f: 12 + f: 13 + f: 14 + f: 15 + f: 16 + f: 18 + f: 20 + f: 23 + f: 29 + f: 38 + f: 60 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_5/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_5/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_5/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_5/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_5/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_5/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_5/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_5/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_5/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_5/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_5/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_5/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_9" + input: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 3 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 4 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 6 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 7 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 8 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 9 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 11 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 12 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 13 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 14 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 16 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 17 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 18 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 19 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 21 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 22 + f: 23 + f: 23 + f: 23 + f: 23 + f: 23 + f: 23 + f: 23 + f: 24 + f: 24 + f: 24 + f: 24 + f: 24 + f: 25 + f: 25 + f: 25 + f: 25 + f: 25 + f: 26 + f: 26 + f: 26 + f: 26 + f: 27 + f: 27 + f: 27 + f: 28 + f: 28 + f: 28 + f: 29 + f: 29 + f: 30 + f: 30 + f: 30 + f: 31 + f: 31 + f: 32 + f: 32 + f: 33 + f: 33 + f: 34 + f: 35 + f: 37 + f: 38 + f: 40 + f: 46 + f: 51 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_6/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_6/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_6/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_6/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_6/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_6/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_6/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_6/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_6/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_6/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_6/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_6/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_10" + input: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: -15035 + f: -15030 + f: -15025 + f: -15000 + f: -14985 + f: -14945 + f: -14745 + f: -70 + f: -55 + f: -55 + f: -50 + f: -50 + f: -50 + f: -45 + f: -45 + f: -45 + f: -45 + f: -45 + f: -45 + f: -45 + f: -45 + f: -45 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -40 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -35 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -30 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -25 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -20 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -15 + f: -10 + f: -10 + f: -10 + f: -10 + f: -10 + f: -10 + f: -10 + f: -10 + f: -10 + f: -10 + f: -10 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: -5 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 5 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 10 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 15 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 20 + f: 25 + f: 25 + f: 25 + f: 25 + f: 25 + f: 25 + f: 25 + f: 30 + f: 30 + f: 30 + f: 30 + f: 30 + f: 30 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 35 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 40 + f: 45 + f: 45 + f: 45 + f: 45 + f: 45 + f: 45 + f: 45 + f: 45 + f: 45 + f: 45 + f: 50 + f: 50 + f: 50 + f: 50 + f: 50 + f: 50 + f: 50 + f: 50 + f: 50 + f: 55 + f: 55 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 60 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 65 + f: 70 + f: 70 + f: 70 + f: 70 + f: 70 + f: 70 + f: 70 + f: 75 + f: 75 + f: 80 + f: 80 + f: 80 + f: 85 + f: 85 + f: 85 + f: 90 + f: 90 + f: 90 + f: 90 + f: 95 + f: 95 + f: 100 + f: 100 + f: 105 + f: 110 + f: 115 + f: 120 + f: 125 + f: 125 + f: 130 + f: 140 + f: 140 + f: 145 + f: 150 + f: 155 + f: 160 + f: 160 + f: 165 + f: 170 + f: 175 + f: 180 + f: 190 + f: 200 + f: 210 + f: 215 + f: 220 + f: 220 + f: 230 + f: 235 + f: 245 + f: 250 + f: 260 + f: 275 + f: 290 + f: 305 + f: 325 + f: 350 + f: 370 + f: 390 + f: 425 + f: 460 + f: 500 + f: 560 + f: 650 + f: 790 + f: 1025 + f: 1600 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_7/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_7/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_7/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_7/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_7/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_7/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_7/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_7/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_7/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_7/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_7/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_7/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_11" + input: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 18 + f: 29 + f: 39 + f: 48 + f: 57 + f: 64 + f: 70 + f: 76 + f: 82 + f: 87 + f: 92 + f: 97 + f: 101 + f: 105 + f: 109 + f: 113 + f: 116 + f: 120 + f: 123 + f: 127 + f: 130 + f: 134 + f: 137 + f: 140 + f: 143 + f: 146 + f: 149 + f: 152 + f: 156 + f: 159 + f: 162 + f: 165 + f: 168 + f: 171 + f: 174 + f: 177 + f: 180 + f: 183 + f: 186 + f: 188 + f: 191 + f: 194 + f: 197 + f: 200 + f: 203 + f: 205 + f: 208 + f: 211 + f: 214 + f: 217 + f: 219 + f: 222 + f: 225 + f: 228 + f: 231 + f: 233 + f: 236 + f: 239 + f: 242 + f: 244 + f: 247 + f: 250 + f: 253 + f: 255 + f: 258 + f: 261 + f: 264 + f: 266 + f: 269 + f: 272 + f: 275 + f: 278 + f: 280 + f: 283 + f: 286 + f: 289 + f: 292 + f: 294 + f: 297 + f: 300 + f: 303 + f: 305 + f: 308 + f: 311 + f: 314 + f: 317 + f: 319 + f: 322 + f: 325 + f: 327 + f: 330 + f: 333 + f: 336 + f: 339 + f: 341 + f: 344 + f: 347 + f: 350 + f: 353 + f: 355 + f: 358 + f: 361 + f: 364 + f: 367 + f: 370 + f: 373 + f: 375 + f: 378 + f: 381 + f: 384 + f: 387 + f: 390 + f: 393 + f: 396 + f: 399 + f: 401 + f: 404 + f: 407 + f: 410 + f: 413 + f: 416 + f: 419 + f: 422 + f: 425 + f: 428 + f: 431 + f: 434 + f: 437 + f: 440 + f: 443 + f: 446 + f: 449 + f: 452 + f: 455 + f: 458 + f: 461 + f: 464 + f: 467 + f: 470 + f: 473 + f: 476 + f: 479 + f: 483 + f: 486 + f: 489 + f: 492 + f: 495 + f: 498 + f: 501 + f: 504 + f: 507 + f: 511 + f: 514 + f: 517 + f: 520 + f: 523 + f: 526 + f: 530 + f: 533 + f: 536 + f: 539 + f: 542 + f: 545 + f: 549 + f: 552 + f: 555 + f: 558 + f: 562 + f: 565 + f: 569 + f: 572 + f: 575 + f: 579 + f: 582 + f: 585 + f: 589 + f: 592 + f: 595 + f: 599 + f: 602 + f: 605 + f: 609 + f: 612 + f: 616 + f: 620 + f: 623 + f: 626 + f: 630 + f: 634 + f: 637 + f: 641 + f: 644 + f: 648 + f: 651 + f: 655 + f: 658 + f: 662 + f: 665 + f: 669 + f: 672 + f: 676 + f: 680 + f: 683 + f: 687 + f: 691 + f: 694 + f: 698 + f: 702 + f: 705 + f: 709 + f: 712 + f: 716 + f: 720 + f: 724 + f: 727 + f: 731 + f: 735 + f: 739 + f: 742 + f: 746 + f: 750 + f: 754 + f: 758 + f: 761 + f: 765 + f: 769 + f: 773 + f: 777 + f: 780 + f: 784 + f: 788 + f: 792 + f: 796 + f: 800 + f: 804 + f: 808 + f: 812 + f: 816 + f: 820 + f: 823 + f: 828 + f: 832 + f: 836 + f: 840 + f: 844 + f: 848 + f: 852 + f: 856 + f: 860 + f: 864 + f: 868 + f: 873 + f: 877 + f: 881 + f: 885 + f: 889 + f: 893 + f: 897 + f: 902 + f: 906 + f: 910 + f: 914 + f: 919 + f: 923 + f: 927 + f: 931 + f: 935 + f: 940 + f: 944 + f: 948 + f: 953 + f: 957 + f: 962 + f: 966 + f: 970 + f: 975 + f: 979 + f: 984 + f: 988 + f: 993 + f: 997 + f: 1002 + f: 1006 + f: 1011 + f: 1015 + f: 1020 + f: 1024 + f: 1029 + f: 1034 + f: 1038 + f: 1043 + f: 1047 + f: 1052 + f: 1057 + f: 1062 + f: 1066 + f: 1071 + f: 1076 + f: 1081 + f: 1086 + f: 1090 + f: 1095 + f: 1100 + f: 1105 + f: 1110 + f: 1114 + f: 1119 + f: 1124 + f: 1129 + f: 1134 + f: 1139 + f: 1144 + f: 1149 + f: 1154 + f: 1159 + f: 1164 + f: 1169 + f: 1174 + f: 1179 + f: 1184 + f: 1189 + f: 1194 + f: 1199 + f: 1204 + f: 1209 + f: 1215 + f: 1220 + f: 1225 + f: 1230 + f: 1235 + f: 1241 + f: 1246 + f: 1251 + f: 1257 + f: 1262 + f: 1267 + f: 1273 + f: 1278 + f: 1284 + f: 1289 + f: 1294 + f: 1300 + f: 1305 + f: 1311 + f: 1316 + f: 1322 + f: 1327 + f: 1333 + f: 1338 + f: 1344 + f: 1350 + f: 1355 + f: 1361 + f: 1367 + f: 1372 + f: 1378 + f: 1383 + f: 1389 + f: 1395 + f: 1401 + f: 1407 + f: 1413 + f: 1418 + f: 1424 + f: 1430 + f: 1436 + f: 1442 + f: 1448 + f: 1454 + f: 1459 + f: 1465 + f: 1472 + f: 1477 + f: 1483 + f: 1489 + f: 1495 + f: 1501 + f: 1507 + f: 1514 + f: 1520 + f: 1526 + f: 1532 + f: 1538 + f: 1545 + f: 1551 + f: 1557 + f: 1564 + f: 1570 + f: 1576 + f: 1583 + f: 1589 + f: 1596 + f: 1602 + f: 1608 + f: 1615 + f: 1621 + f: 1628 + f: 1634 + f: 1641 + f: 1647 + f: 1654 + f: 1661 + f: 1667 + f: 1674 + f: 1681 + f: 1687 + f: 1694 + f: 1701 + f: 1708 + f: 1715 + f: 1722 + f: 1729 + f: 1735 + f: 1742 + f: 1749 + f: 1756 + f: 1763 + f: 1770 + f: 1777 + f: 1784 + f: 1791 + f: 1798 + f: 1806 + f: 1812 + f: 1820 + f: 1827 + f: 1835 + f: 1841 + f: 1849 + f: 1856 + f: 1863 + f: 1871 + f: 1878 + f: 1885 + f: 1893 + f: 1901 + f: 1908 + f: 1915 + f: 1923 + f: 1930 + f: 1938 + f: 1946 + f: 1953 + f: 1961 + f: 1969 + f: 1976 + f: 1984 + f: 1992 + f: 2000 + f: 2007 + f: 2015 + f: 2023 + f: 2031 + f: 2039 + f: 2047 + f: 2055 + f: 2063 + f: 2071 + f: 2079 + f: 2087 + f: 2095 + f: 2104 + f: 2112 + f: 2120 + f: 2128 + f: 2137 + f: 2146 + f: 2154 + f: 2162 + f: 2171 + f: 2179 + f: 2188 + f: 2197 + f: 2205 + f: 2214 + f: 2223 + f: 2232 + f: 2241 + f: 2250 + f: 2258 + f: 2268 + f: 2277 + f: 2285 + f: 2294 + f: 2304 + f: 2313 + f: 2322 + f: 2331 + f: 2340 + f: 2350 + f: 2359 + f: 2368 + f: 2378 + f: 2388 + f: 2397 + f: 2407 + f: 2416 + f: 2426 + f: 2436 + f: 2446 + f: 2455 + f: 2465 + f: 2475 + f: 2485 + f: 2495 + f: 2505 + f: 2515 + f: 2525 + f: 2535 + f: 2545 + f: 2556 + f: 2566 + f: 2577 + f: 2587 + f: 2598 + f: 2609 + f: 2620 + f: 2631 + f: 2641 + f: 2652 + f: 2663 + f: 2674 + f: 2685 + f: 2696 + f: 2708 + f: 2719 + f: 2730 + f: 2742 + f: 2753 + f: 2764 + f: 2776 + f: 2788 + f: 2799 + f: 2811 + f: 2823 + f: 2835 + f: 2847 + f: 2858 + f: 2870 + f: 2882 + f: 2894 + f: 2906 + f: 2919 + f: 2931 + f: 2943 + f: 2956 + f: 2968 + f: 2981 + f: 2994 + f: 3006 + f: 3019 + f: 3032 + f: 3045 + f: 3058 + f: 3070 + f: 3083 + f: 3096 + f: 3109 + f: 3121 + f: 3134 + f: 3148 + f: 3161 + f: 3174 + f: 3187 + f: 3200 + f: 3214 + f: 3228 + f: 3242 + f: 3255 + f: 3268 + f: 3283 + f: 3297 + f: 3310 + f: 3325 + f: 3340 + f: 3353 + f: 3368 + f: 3383 + f: 3398 + f: 3412 + f: 3427 + f: 3442 + f: 3457 + f: 3471 + f: 3487 + f: 3502 + f: 3516 + f: 3531 + f: 3546 + f: 3561 + f: 3577 + f: 3593 + f: 3608 + f: 3625 + f: 3641 + f: 3657 + f: 3673 + f: 3690 + f: 3706 + f: 3722 + f: 3738 + f: 3755 + f: 3772 + f: 3789 + f: 3805 + f: 3823 + f: 3839 + f: 3856 + f: 3873 + f: 3891 + f: 3908 + f: 3926 + f: 3944 + f: 3960 + f: 3977 + f: 3995 + f: 4013 + f: 4031 + f: 4048 + f: 4067 + f: 4085 + f: 4104 + f: 4122 + f: 4140 + f: 4159 + f: 4177 + f: 4196 + f: 4215 + f: 4234 + f: 4253 + f: 4272 + f: 4291 + f: 4311 + f: 4332 + f: 4351 + f: 4371 + f: 4391 + f: 4412 + f: 4433 + f: 4454 + f: 4474 + f: 4496 + f: 4518 + f: 4538 + f: 4558 + f: 4579 + f: 4601 + f: 4619 + f: 4640 + f: 4662 + f: 4684 + f: 4706 + f: 4728 + f: 4751 + f: 4771 + f: 4794 + f: 4818 + f: 4840 + f: 4863 + f: 4887 + f: 4910 + f: 4933 + f: 4956 + f: 4980 + f: 5004 + f: 5028 + f: 5052 + f: 5076 + f: 5100 + f: 5125 + f: 5152 + f: 5175 + f: 5200 + f: 5226 + f: 5251 + f: 5278 + f: 5304 + f: 5329 + f: 5354 + f: 5381 + f: 5407 + f: 5433 + f: 5460 + f: 5488 + f: 5516 + f: 5544 + f: 5573 + f: 5600 + f: 5628 + f: 5656 + f: 5684 + f: 5713 + f: 5741 + f: 5771 + f: 5799 + f: 5830 + f: 5860 + f: 5891 + f: 5921 + f: 5951 + f: 5980 + f: 6010 + f: 6041 + f: 6073 + f: 6105 + f: 6133 + f: 6163 + f: 6195 + f: 6227 + f: 6258 + f: 6291 + f: 6322 + f: 6356 + f: 6390 + f: 6424 + f: 6457 + f: 6491 + f: 6527 + f: 6561 + f: 6596 + f: 6631 + f: 6665 + f: 6701 + f: 6736 + f: 6771 + f: 6805 + f: 6840 + f: 6877 + f: 6911 + f: 6947 + f: 6985 + f: 7022 + f: 7059 + f: 7097 + f: 7135 + f: 7174 + f: 7212 + f: 7251 + f: 7289 + f: 7327 + f: 7366 + f: 7406 + f: 7447 + f: 7486 + f: 7525 + f: 7566 + f: 7606 + f: 7646 + f: 7688 + f: 7728 + f: 7771 + f: 7814 + f: 7859 + f: 7901 + f: 7949 + f: 7992 + f: 8036 + f: 8082 + f: 8127 + f: 8173 + f: 8218 + f: 8262 + f: 8309 + f: 8353 + f: 8397 + f: 8444 + f: 8489 + f: 8539 + f: 8585 + f: 8632 + f: 8682 + f: 8727 + f: 8777 + f: 8828 + f: 8879 + f: 8929 + f: 8982 + f: 9037 + f: 9087 + f: 9140 + f: 9193 + f: 9250 + f: 9305 + f: 9361 + f: 9418 + f: 9475 + f: 9532 + f: 9589 + f: 9644 + f: 9699 + f: 9758 + f: 9818 + f: 9875 + f: 9935 + f: 9997 + f: 10057 + f: 10117 + f: 10174 + f: 10232 + f: 10296 + f: 10356 + f: 10419 + f: 10482 + f: 10546 + f: 10608 + f: 10670 + f: 10729 + f: 10790 + f: 10855 + f: 10920 + f: 10990 + f: 11054 + f: 11118 + f: 11181 + f: 11248 + f: 11316 + f: 11385 + f: 11454 + f: 11526 + f: 11597 + f: 11667 + f: 11740 + f: 11820 + f: 11897 + f: 11973 + f: 12046 + f: 12126 + f: 12204 + f: 12287 + f: 12370 + f: 12456 + f: 12538 + f: 12627 + f: 12714 + f: 12799 + f: 12883 + f: 12971 + f: 13062 + f: 13154 + f: 13233 + f: 13328 + f: 13418 + f: 13511 + f: 13607 + f: 13709 + f: 13806 + f: 13903 + f: 14002 + f: 14104 + f: 14200 + f: 14288 + f: 14391 + f: 14488 + f: 14590 + f: 14698 + f: 14808 + f: 14910 + f: 15020 + f: 15126 + f: 15238 + f: 15347 + f: 15456 + f: 15574 + f: 15692 + f: 15786 + f: 15896 + f: 16016 + f: 16136 + f: 16250 + f: 16352 + f: 16474 + f: 16575 + f: 16702 + f: 16835 + f: 16965 + f: 17096 + f: 17232 + f: 17370 + f: 17443 + f: 17581 + f: 17719 + f: 17864 + f: 17976 + f: 18116 + f: 18250 + f: 18396 + f: 18540 + f: 18690 + f: 18840 + f: 18989 + f: 19136 + f: 19294 + f: 19445 + f: 19589 + f: 19750 + f: 19905 + f: 20064 + f: 20191 + f: 20325 + f: 20497 + f: 20662 + f: 20833 + f: 20981 + f: 21152 + f: 21334 + f: 21510 + f: 21642 + f: 21821 + f: 22001 + f: 22186 + f: 22379 + f: 22568 + f: 22770 + f: 22958 + f: 23162 + f: 23360 + f: 23524 + f: 23737 + f: 23960 + f: 24175 + f: 24395 + f: 24631 + f: 24865 + f: 25091 + f: 25327 + f: 25580 + f: 25833 + f: 26089 + f: 26361 + f: 26636 + f: 26889 + f: 27155 + f: 27436 + f: 27715 + f: 28003 + f: 28303 + f: 28600 + f: 28916 + f: 29223 + f: 29553 + f: 29884 + f: 30200 + f: 30538 + f: 30868 + f: 31211 + f: 31548 + f: 31881 + f: 32253 + f: 32605 + f: 32980 + f: 33385 + f: 33805 + f: 34254 + f: 34723 + f: 35167 + f: 35666 + f: 36125 + f: 36652 + f: 37177 + f: 37739 + f: 38321 + f: 38932 + f: 39640 + f: 40337 + f: 41000 + f: 41626 + f: 42385 + f: 43122 + f: 43890 + f: 44687 + f: 45609 + f: 46520 + f: 47489 + f: 48432 + f: 49458 + f: 50511 + f: 51561 + f: 52568 + f: 53676 + f: 54936 + f: 56071 + f: 57302 + f: 58513 + f: 59800 + f: 61192 + f: 62702 + f: 64205 + f: 65868 + f: 67780 + f: 69960 + f: 72330 + f: 74918 + f: 77540 + f: 80344 + f: 83727 + f: 87662 + f: 93589 + f: 101441 + f: 110544 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_8/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_8/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_8/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_8/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_8/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_8/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_8/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_8/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_8/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_8/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_8/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_8/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_9/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_9/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_9/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_12" + input: "QNetwork/EncodingNetwork/lambda_9/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_9/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_9/zeros_like" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + float_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_9/zeros_like" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_13" + input: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 13 + f: 38 + f: 56 + f: 70 + f: 82 + f: 94 + f: 104 + f: 114 + f: 123 + f: 131 + f: 139 + f: 148 + f: 152 + f: 153 + f: 158 + f: 163 + f: 170 + f: 174 + f: 178 + f: 180 + f: 183 + f: 186 + f: 188 + f: 190 + f: 192 + f: 196 + f: 198 + f: 201 + f: 205 + f: 208 + f: 212 + f: 215 + f: 219 + f: 221 + f: 225 + f: 227 + f: 229 + f: 232 + f: 233 + f: 236 + f: 239 + f: 242 + f: 245 + f: 248 + f: 250 + f: 252 + f: 254 + f: 256 + f: 259 + f: 261 + f: 264 + f: 267 + f: 270 + f: 272 + f: 275 + f: 278 + f: 280 + f: 283 + f: 285 + f: 287 + f: 290 + f: 293 + f: 295 + f: 297 + f: 300 + f: 303 + f: 305 + f: 308 + f: 311 + f: 313 + f: 316 + f: 319 + f: 322 + f: 325 + f: 329 + f: 331 + f: 333 + f: 336 + f: 338 + f: 340 + f: 343 + f: 345 + f: 347 + f: 347 + f: 349 + f: 351 + f: 353 + f: 355 + f: 357 + f: 359 + f: 361 + f: 363 + f: 365 + f: 368 + f: 369 + f: 371 + f: 373 + f: 375 + f: 377 + f: 380 + f: 382 + f: 385 + f: 387 + f: 389 + f: 391 + f: 394 + f: 396 + f: 398 + f: 400 + f: 403 + f: 405 + f: 408 + f: 410 + f: 412 + f: 415 + f: 417 + f: 420 + f: 422 + f: 425 + f: 427 + f: 429 + f: 432 + f: 434 + f: 437 + f: 439 + f: 442 + f: 444 + f: 446 + f: 449 + f: 451 + f: 454 + f: 456 + f: 458 + f: 461 + f: 463 + f: 466 + f: 469 + f: 472 + f: 474 + f: 476 + f: 479 + f: 482 + f: 483 + f: 486 + f: 489 + f: 492 + f: 495 + f: 498 + f: 500 + f: 503 + f: 505 + f: 508 + f: 510 + f: 513 + f: 516 + f: 519 + f: 522 + f: 524 + f: 528 + f: 530 + f: 533 + f: 536 + f: 539 + f: 541 + f: 544 + f: 547 + f: 550 + f: 553 + f: 556 + f: 559 + f: 561 + f: 563 + f: 567 + f: 570 + f: 572 + f: 575 + f: 577 + f: 580 + f: 584 + f: 586 + f: 589 + f: 592 + f: 595 + f: 598 + f: 601 + f: 605 + f: 607 + f: 611 + f: 613 + f: 617 + f: 620 + f: 623 + f: 626 + f: 629 + f: 632 + f: 635 + f: 639 + f: 642 + f: 645 + f: 648 + f: 651 + f: 654 + f: 657 + f: 660 + f: 662 + f: 666 + f: 669 + f: 672 + f: 676 + f: 679 + f: 682 + f: 685 + f: 688 + f: 690 + f: 693 + f: 696 + f: 699 + f: 702 + f: 705 + f: 709 + f: 712 + f: 714 + f: 718 + f: 721 + f: 724 + f: 726 + f: 728 + f: 729 + f: 731 + f: 734 + f: 737 + f: 741 + f: 745 + f: 748 + f: 750 + f: 753 + f: 756 + f: 760 + f: 763 + f: 766 + f: 770 + f: 773 + f: 776 + f: 779 + f: 782 + f: 786 + f: 788 + f: 793 + f: 796 + f: 798 + f: 802 + f: 805 + f: 808 + f: 811 + f: 815 + f: 818 + f: 820 + f: 824 + f: 827 + f: 829 + f: 832 + f: 835 + f: 838 + f: 842 + f: 846 + f: 849 + f: 854 + f: 857 + f: 860 + f: 864 + f: 867 + f: 871 + f: 875 + f: 879 + f: 882 + f: 887 + f: 890 + f: 893 + f: 897 + f: 901 + f: 905 + f: 908 + f: 911 + f: 915 + f: 918 + f: 921 + f: 925 + f: 929 + f: 932 + f: 934 + f: 937 + f: 940 + f: 943 + f: 946 + f: 950 + f: 953 + f: 956 + f: 961 + f: 965 + f: 969 + f: 973 + f: 976 + f: 980 + f: 982 + f: 985 + f: 990 + f: 994 + f: 997 + f: 1001 + f: 1005 + f: 1007 + f: 1010 + f: 1014 + f: 1018 + f: 1022 + f: 1025 + f: 1028 + f: 1033 + f: 1035 + f: 1038 + f: 1042 + f: 1047 + f: 1052 + f: 1056 + f: 1060 + f: 1063 + f: 1067 + f: 1071 + f: 1075 + f: 1079 + f: 1083 + f: 1086 + f: 1088 + f: 1092 + f: 1097 + f: 1102 + f: 1106 + f: 1109 + f: 1113 + f: 1117 + f: 1120 + f: 1125 + f: 1129 + f: 1134 + f: 1137 + f: 1142 + f: 1146 + f: 1150 + f: 1151 + f: 1155 + f: 1159 + f: 1162 + f: 1166 + f: 1170 + f: 1174 + f: 1177 + f: 1181 + f: 1185 + f: 1188 + f: 1193 + f: 1196 + f: 1203 + f: 1207 + f: 1212 + f: 1214 + f: 1217 + f: 1220 + f: 1222 + f: 1222 + f: 1226 + f: 1229 + f: 1233 + f: 1237 + f: 1241 + f: 1246 + f: 1250 + f: 1253 + f: 1257 + f: 1262 + f: 1267 + f: 1272 + f: 1278 + f: 1283 + f: 1287 + f: 1293 + f: 1297 + f: 1301 + f: 1304 + f: 1309 + f: 1315 + f: 1320 + f: 1325 + f: 1329 + f: 1333 + f: 1336 + f: 1341 + f: 1344 + f: 1348 + f: 1351 + f: 1357 + f: 1363 + f: 1368 + f: 1374 + f: 1379 + f: 1383 + f: 1386 + f: 1391 + f: 1395 + f: 1399 + f: 1403 + f: 1407 + f: 1410 + f: 1415 + f: 1418 + f: 1423 + f: 1428 + f: 1432 + f: 1436 + f: 1438 + f: 1442 + f: 1446 + f: 1450 + f: 1454 + f: 1462 + f: 1467 + f: 1472 + f: 1477 + f: 1483 + f: 1488 + f: 1492 + f: 1496 + f: 1503 + f: 1508 + f: 1513 + f: 1518 + f: 1520 + f: 1526 + f: 1531 + f: 1534 + f: 1538 + f: 1542 + f: 1546 + f: 1552 + f: 1558 + f: 1564 + f: 1568 + f: 1573 + f: 1578 + f: 1581 + f: 1590 + f: 1596 + f: 1601 + f: 1606 + f: 1611 + f: 1616 + f: 1622 + f: 1629 + f: 1634 + f: 1640 + f: 1647 + f: 1651 + f: 1657 + f: 1660 + f: 1665 + f: 1672 + f: 1678 + f: 1686 + f: 1692 + f: 1698 + f: 1704 + f: 1709 + f: 1714 + f: 1719 + f: 1724 + f: 1730 + f: 1737 + f: 1744 + f: 1751 + f: 1755 + f: 1761 + f: 1764 + f: 1772 + f: 1778 + f: 1784 + f: 1789 + f: 1799 + f: 1804 + f: 1811 + f: 1819 + f: 1825 + f: 1830 + f: 1838 + f: 1849 + f: 1858 + f: 1862 + f: 1868 + f: 1872 + f: 1878 + f: 1885 + f: 1888 + f: 1892 + f: 1897 + f: 1902 + f: 1907 + f: 1919 + f: 1926 + f: 1932 + f: 1936 + f: 1941 + f: 1946 + f: 1952 + f: 1960 + f: 1968 + f: 1977 + f: 1985 + f: 1992 + f: 1997 + f: 2006 + f: 2012 + f: 2018 + f: 2026 + f: 2034 + f: 2044 + f: 2050 + f: 2057 + f: 2064 + f: 2069 + f: 2075 + f: 2082 + f: 2091 + f: 2098 + f: 2107 + f: 2122 + f: 2126 + f: 2135 + f: 2146 + f: 2149 + f: 2157 + f: 2163 + f: 2172 + f: 2178 + f: 2184 + f: 2191 + f: 2198 + f: 2208 + f: 2216 + f: 2223 + f: 2235 + f: 2242 + f: 2252 + f: 2263 + f: 2272 + f: 2277 + f: 2288 + f: 2296 + f: 2306 + f: 2311 + f: 2318 + f: 2323 + f: 2334 + f: 2341 + f: 2356 + f: 2366 + f: 2373 + f: 2379 + f: 2386 + f: 2407 + f: 2416 + f: 2423 + f: 2432 + f: 2438 + f: 2448 + f: 2453 + f: 2464 + f: 2473 + f: 2473 + f: 2481 + f: 2492 + f: 2504 + f: 2511 + f: 2523 + f: 2529 + f: 2537 + f: 2545 + f: 2556 + f: 2566 + f: 2575 + f: 2584 + f: 2592 + f: 2602 + f: 2613 + f: 2624 + f: 2636 + f: 2643 + f: 2647 + f: 2652 + f: 2664 + f: 2675 + f: 2688 + f: 2693 + f: 2702 + f: 2709 + f: 2722 + f: 2739 + f: 2754 + f: 2766 + f: 2776 + f: 2786 + f: 2799 + f: 2810 + f: 2832 + f: 2840 + f: 2849 + f: 2860 + f: 2873 + f: 2889 + f: 2908 + f: 2914 + f: 2926 + f: 2939 + f: 2950 + f: 2961 + f: 2969 + f: 2978 + f: 2990 + f: 2999 + f: 3023 + f: 3032 + f: 3049 + f: 3066 + f: 3085 + f: 3101 + f: 3107 + f: 3117 + f: 3129 + f: 3144 + f: 3167 + f: 3190 + f: 3212 + f: 3229 + f: 3238 + f: 3264 + f: 3293 + f: 3302 + f: 3309 + f: 3314 + f: 3323 + f: 3344 + f: 3352 + f: 3362 + f: 3390 + f: 3400 + f: 3411 + f: 3435 + f: 3456 + f: 3470 + f: 3485 + f: 3498 + f: 3505 + f: 3519 + f: 3539 + f: 3545 + f: 3545 + f: 3560 + f: 3576 + f: 3597 + f: 3607 + f: 3621 + f: 3641 + f: 3665 + f: 3679 + f: 3701 + f: 3714 + f: 3733 + f: 3741 + f: 3745 + f: 3757 + f: 3773 + f: 3787 + f: 3795 + f: 3805 + f: 3822 + f: 3835 + f: 3844 + f: 3861 + f: 3872 + f: 3878 + f: 3897 + f: 3919 + f: 3941 + f: 3971 + f: 4004 + f: 4014 + f: 4019 + f: 4061 + f: 4068 + f: 4089 + f: 4108 + f: 4117 + f: 4125 + f: 4146 + f: 4165 + f: 4194 + f: 4204 + f: 4224 + f: 4236 + f: 4263 + f: 4290 + f: 4301 + f: 4319 + f: 4326 + f: 4347 + f: 4369 + f: 4386 + f: 4413 + f: 4435 + f: 4451 + f: 4451 + f: 4451 + f: 4476 + f: 4500 + f: 4539 + f: 4579 + f: 4592 + f: 4600 + f: 4622 + f: 4650 + f: 4683 + f: 4714 + f: 4742 + f: 4755 + f: 4771 + f: 4788 + f: 4816 + f: 4828 + f: 4831 + f: 4831 + f: 4831 + f: 4843 + f: 4852 + f: 4865 + f: 4896 + f: 4915 + f: 4931 + f: 4952 + f: 4965 + f: 4983 + f: 5007 + f: 5043 + f: 5061 + f: 5081 + f: 5095 + f: 5122 + f: 5143 + f: 5171 + f: 5204 + f: 5226 + f: 5233 + f: 5250 + f: 5281 + f: 5320 + f: 5323 + f: 5328 + f: 5345 + f: 5374 + f: 5413 + f: 5466 + f: 5492 + f: 5524 + f: 5555 + f: 5567 + f: 5610 + f: 5676 + f: 5701 + f: 5716 + f: 5744 + f: 5768 + f: 5795 + f: 5818 + f: 5854 + f: 5906 + f: 5934 + f: 5960 + f: 5975 + f: 5993 + f: 6025 + f: 6034 + f: 6051 + f: 6082 + f: 6106 + f: 6125 + f: 6159 + f: 6187 + f: 6242 + f: 6287 + f: 6311 + f: 6332 + f: 6348 + f: 6358 + f: 6368 + f: 6377 + f: 6402 + f: 6407 + f: 6428 + f: 6450 + f: 6475 + f: 6498 + f: 6505 + f: 6533 + f: 6565 + f: 6580 + f: 6595 + f: 6611 + f: 6654 + f: 6658 + f: 6705 + f: 6751 + f: 6786 + f: 6828 + f: 6876 + f: 6896 + f: 6948 + f: 6964 + f: 7065 + f: 7082 + f: 7118 + f: 7184 + f: 7214 + f: 7271 + f: 7310 + f: 7357 + f: 7405 + f: 7506 + f: 7613 + f: 7641 + f: 7675 + f: 7720 + f: 7781 + f: 7833 + f: 7860 + f: 7898 + f: 7929 + f: 8044 + f: 8104 + f: 8148 + f: 8236 + f: 8273 + f: 8313 + f: 8349 + f: 8381 + f: 8409 + f: 8498 + f: 8507 + f: 8524 + f: 8570 + f: 8607 + f: 8630 + f: 8637 + f: 8675 + f: 8700 + f: 8714 + f: 8734 + f: 8776 + f: 8836 + f: 8854 + f: 8867 + f: 8868 + f: 9065 + f: 9113 + f: 9121 + f: 9241 + f: 9357 + f: 9360 + f: 9585 + f: 9613 + f: 9684 + f: 9727 + f: 9751 + f: 9777 + f: 9802 + f: 9889 + f: 9903 + f: 9914 + f: 9978 + f: 10061 + f: 10192 + f: 10213 + f: 10345 + f: 10369 + f: 10404 + f: 10430 + f: 10471 + f: 10481 + f: 10489 + f: 10492 + f: 10494 + f: 10524 + f: 10554 + f: 10557 + f: 10560 + f: 10562 + f: 10641 + f: 10716 + f: 10842 + f: 10897 + f: 10967 + f: 11053 + f: 11128 + f: 11137 + f: 11328 + f: 11336 + f: 11401 + f: 11532 + f: 11573 + f: 11860 + f: 11880 + f: 12013 + f: 12305 + f: 12358 + f: 12386 + f: 12404 + f: 12456 + f: 12456 + f: 12476 + f: 12615 + f: 12677 + f: 12981 + f: 13094 + f: 13197 + f: 13708 + f: 13717 + f: 13788 + f: 14049 + f: 14112 + f: 14224 + f: 14257 + f: 14681 + f: 14901 + f: 15006 + f: 15071 + f: 15100 + f: 15248 + f: 15669 + f: 15877 + f: 15953 + f: 15953 + f: 16066 + f: 16072 + f: 16271 + f: 16292 + f: 16386 + f: 16490 + f: 16633 + f: 16670 + f: 16834 + f: 16896 + f: 17543 + f: 17693 + f: 17800 + f: 17859 + f: 18397 + f: 18811 + f: 18826 + f: 18971 + f: 19304 + f: 19319 + f: 19695 + f: 20378 + f: 20865 + f: 21313 + f: 21330 + f: 22321 + f: 22760 + f: 22770 + f: 23783 + f: 23785 + f: 24525 + f: 24844 + f: 24848 + f: 24964 + f: 24966 + f: 27468 + f: 27478 + f: 27555 + f: 27555 + f: 28215 + f: 28219 + f: 28336 + f: 28490 + f: 30213 + f: 30228 + f: 30242 + f: 34116 + f: 43518 + f: 43518 + f: 43518 + f: 43852 + f: 43852 + f: 43852 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_10/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_10/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_10/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_10/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_10/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_10/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_10/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_10/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_10/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_10/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_10/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_10/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims/dim" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims" + op: "ExpandDims" + input: "time_step_14" + input: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims/dim:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/Bucketize" + op: "Bucketize" + input: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "boundaries" + value { + list { + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 0 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 1 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 2 + f: 3 + f: 4 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/Bucketize" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/Cast" + op: "Cast" + input: "QNetwork/EncodingNetwork/lambda_11/Bucketize:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/Cast" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/truediv/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 999 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/truediv/y" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/truediv" + op: "RealDiv" + input: "QNetwork/EncodingNetwork/lambda_11/Cast:y:0" + input: "QNetwork/EncodingNetwork/lambda_11/truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/truediv" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/Sqrt" + op: "Sqrt" + input: "QNetwork/EncodingNetwork/lambda_11/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/Sqrt" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/mul" + op: "Mul" + input: "QNetwork/EncodingNetwork/lambda_11/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_11/truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/mul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/lambda_11/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda_11/truediv:z:0" + input: "QNetwork/EncodingNetwork/lambda_11/Sqrt:y:0" + input: "QNetwork/EncodingNetwork/lambda_11/mul:z:0" + input: "QNetwork/EncodingNetwork/lambda_11/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 3 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/lambda_11/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/concatenate/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/concatenate/concat/axis" + } + } + node_def { + name: "QNetwork/EncodingNetwork/concatenate/concat" + op: "ConcatV2" + input: "QNetwork/EncodingNetwork/lambda/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_1/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_2/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_3/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_4/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_5/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_6/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_7/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_8/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_9/zeros_like:output:0" + input: "QNetwork/EncodingNetwork/lambda_10/concat:output:0" + input: "QNetwork/EncodingNetwork/lambda_11/concat:output:0" + input: "QNetwork/EncodingNetwork/concatenate/concat/axis:output:0" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 34 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/concatenate/concat" + } + } + node_def { + name: "QNetwork/EncodingNetwork/flatten/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\377\377\377\377\"\000\000\000" + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/flatten/Const" + } + } + node_def { + name: "QNetwork/EncodingNetwork/flatten/Reshape" + op: "Reshape" + input: "QNetwork/EncodingNetwork/concatenate/concat:output:0" + input: "QNetwork/EncodingNetwork/flatten/Const:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 34 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/flatten/Reshape" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense/MatMul/ReadVariableOp" + op: "ReadVariableOp" + input: "qnetwork_encodingnetwork_dense_matmul_readvariableop_resource" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 34 + } + dim { + size: 100 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense/MatMul/ReadVariableOp" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense/MatMul" + op: "MatMul" + input: "QNetwork/EncodingNetwork/flatten/Reshape:output:0" + input: "QNetwork/EncodingNetwork/dense/MatMul/ReadVariableOp:value:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 100 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense/MatMul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense/BiasAdd/ReadVariableOp" + op: "ReadVariableOp" + input: "qnetwork_encodingnetwork_dense_biasadd_readvariableop_resource" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 100 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense/BiasAdd/ReadVariableOp" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense/BiasAdd" + op: "BiasAdd" + input: "QNetwork/EncodingNetwork/dense/MatMul:product:0" + input: "QNetwork/EncodingNetwork/dense/BiasAdd/ReadVariableOp:value:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 100 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense/BiasAdd" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense/Relu" + op: "Relu" + input: "QNetwork/EncodingNetwork/dense/BiasAdd:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 100 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense/Relu" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense_1/MatMul/ReadVariableOp" + op: "ReadVariableOp" + input: "qnetwork_encodingnetwork_dense_1_matmul_readvariableop_resource" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 100 + } + dim { + size: 40 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense_1/MatMul/ReadVariableOp" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense_1/MatMul" + op: "MatMul" + input: "QNetwork/EncodingNetwork/dense/Relu:activations:0" + input: "QNetwork/EncodingNetwork/dense_1/MatMul/ReadVariableOp:value:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 40 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense_1/MatMul" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense_1/BiasAdd/ReadVariableOp" + op: "ReadVariableOp" + input: "qnetwork_encodingnetwork_dense_1_biasadd_readvariableop_resource" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 40 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense_1/BiasAdd/ReadVariableOp" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense_1/BiasAdd" + op: "BiasAdd" + input: "QNetwork/EncodingNetwork/dense_1/MatMul:product:0" + input: "QNetwork/EncodingNetwork/dense_1/BiasAdd/ReadVariableOp:value:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 40 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense_1/BiasAdd" + } + } + node_def { + name: "QNetwork/EncodingNetwork/dense_1/Relu" + op: "Relu" + input: "QNetwork/EncodingNetwork/dense_1/BiasAdd:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 40 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/EncodingNetwork/dense_1/Relu" + } + } + node_def { + name: "QNetwork/dense_2/MatMul/ReadVariableOp" + op: "ReadVariableOp" + input: "qnetwork_dense_2_matmul_readvariableop_resource" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 40 + } + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "QNetwork/dense_2/MatMul/ReadVariableOp" + } + } + node_def { + name: "QNetwork/dense_2/MatMul" + op: "MatMul" + input: "QNetwork/EncodingNetwork/dense_1/Relu:activations:0" + input: "QNetwork/dense_2/MatMul/ReadVariableOp:value:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 2 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/dense_2/MatMul" + } + } + node_def { + name: "QNetwork/dense_2/BiasAdd/ReadVariableOp" + op: "ReadVariableOp" + input: "qnetwork_dense_2_biasadd_readvariableop_resource" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "QNetwork/dense_2/BiasAdd/ReadVariableOp" + } + } + node_def { + name: "QNetwork/dense_2/BiasAdd" + op: "BiasAdd" + input: "QNetwork/dense_2/MatMul:product:0" + input: "QNetwork/dense_2/BiasAdd/ReadVariableOp:value:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 2 + } + } + } + } + } + experimental_debug_info { + original_node_names: "QNetwork/dense_2/BiasAdd" + } + } + node_def { + name: "ShiftedCategorical_1/mode/ArgMax/dimension" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } + experimental_debug_info { + original_node_names: "ShiftedCategorical_1/mode/ArgMax/dimension" + } + } + node_def { + name: "ShiftedCategorical_1/mode/ArgMax" + op: "ArgMax" + input: "QNetwork/dense_2/BiasAdd:output:0" + input: "ShiftedCategorical_1/mode/ArgMax/dimension:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "ShiftedCategorical_1/mode/ArgMax" + } + } + node_def { + name: "add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "add/y" + } + } + node_def { + name: "add" + op: "AddV2" + input: "ShiftedCategorical_1/mode/ArgMax:output:0" + input: "add/y:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "add" + } + } + node_def { + name: "Deterministic/atol" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic/atol" + } + } + node_def { + name: "Deterministic/rtol" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic/rtol" + } + } + node_def { + name: "Deterministic_1/sample/sample_shape/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/sample_shape/x" + } + } + node_def { + name: "Deterministic_1/sample/sample_shape" + op: "Cast" + input: "Deterministic_1/sample/sample_shape/x:output:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/sample_shape" + } + } + node_def { + name: "Deterministic_1/sample/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/Shape" + } + } + node_def { + name: "Deterministic_1/sample/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/Shape_1" + } + } + node_def { + name: "Deterministic_1/sample/Shape_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/Shape_2" + } + } + node_def { + name: "Deterministic_1/sample/BroadcastArgs" + op: "BroadcastArgs" + input: "Deterministic_1/sample/Shape_1:output:0" + input: "Deterministic_1/sample/Shape_2:output:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/BroadcastArgs" + } + } + node_def { + name: "Deterministic_1/sample/BroadcastArgs_1" + op: "BroadcastArgs" + input: "Deterministic_1/sample/Shape:output:0" + input: "Deterministic_1/sample/BroadcastArgs:r0:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/BroadcastArgs_1" + } + } + node_def { + name: "Deterministic_1/sample/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/Const" + } + } + node_def { + name: "Deterministic_1/sample/concat/values_0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/concat/values_0" + } + } + node_def { + name: "Deterministic_1/sample/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/concat/axis" + } + } + node_def { + name: "Deterministic_1/sample/concat" + op: "ConcatV2" + input: "Deterministic_1/sample/concat/values_0:output:0" + input: "Deterministic_1/sample/BroadcastArgs_1:r0:0" + input: "Deterministic_1/sample/Const:output:0" + input: "Deterministic_1/sample/concat/axis:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/concat" + } + } + node_def { + name: "Deterministic_1/sample/BroadcastTo" + op: "BroadcastTo" + input: "add:z:0" + input: "Deterministic_1/sample/concat:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/BroadcastTo" + } + } + node_def { + name: "Deterministic_1/sample/Shape_3" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\001\000\000\000\001\000\000\000" + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/Shape_3" + } + } + node_def { + name: "Deterministic_1/sample/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/strided_slice/stack" + } + } + node_def { + name: "Deterministic_1/sample/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/strided_slice/stack_1" + } + } + node_def { + name: "Deterministic_1/sample/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/strided_slice/stack_2" + } + } + node_def { + name: "Deterministic_1/sample/strided_slice" + op: "StridedSlice" + input: "Deterministic_1/sample/Shape_3:output:0" + input: "Deterministic_1/sample/strided_slice/stack:output:0" + input: "Deterministic_1/sample/strided_slice/stack_1:output:0" + input: "Deterministic_1/sample/strided_slice/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "end_mask" + value { + i: 1 + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/strided_slice" + } + } + node_def { + name: "Deterministic_1/sample/concat_1/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/concat_1/axis" + } + } + node_def { + name: "Deterministic_1/sample/concat_1" + op: "ConcatV2" + input: "Deterministic_1/sample/sample_shape:y:0" + input: "Deterministic_1/sample/strided_slice:output:0" + input: "Deterministic_1/sample/concat_1/axis:output:0" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/concat_1" + } + } + node_def { + name: "Deterministic_1/sample/Reshape" + op: "Reshape" + input: "Deterministic_1/sample/BroadcastTo:output:0" + input: "Deterministic_1/sample/concat_1:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Deterministic_1/sample/Reshape" + } + } + node_def { + name: "clip_by_value/Minimum/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 1 + } + } + } + experimental_debug_info { + original_node_names: "clip_by_value/Minimum/y" + } + } + node_def { + name: "clip_by_value/Minimum" + op: "Minimum" + input: "Deterministic_1/sample/Reshape:output:0" + input: "clip_by_value/Minimum/y:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "clip_by_value/Minimum" + } + } + node_def { + name: "clip_by_value/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "clip_by_value/y" + } + } + node_def { + name: "clip_by_value" + op: "Maximum" + input: "clip_by_value/Minimum:z:0" + input: "clip_by_value/y:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "clip_by_value" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "clip_by_value:z:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 8 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 9 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 10 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 11 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 12 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 13 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 14 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 15 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 16 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 17 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 18 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 19 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 20 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_signature_wrapper_4619033" + } + node_def { + name: "PartitionedCall" + op: "PartitionedCall" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_function_with_signature_4619029" + } + } + } + experimental_debug_info { + original_node_names: "PartitionedCall" + } + } + attr { + key: "_input_shapes" + value { + } + } + } + function { + signature { + name: "__inference__traced_save_4619143" + input_arg { + name: "file_prefix" + type: DT_STRING + } + input_arg { + name: "savev2_train_step_read_readvariableop" + type: DT_INT64 + } + input_arg { + name: "savev2_qnetwork_encodingnetwork_dense_kernel_read_readvariableop" + type: DT_FLOAT + } + input_arg { + name: "savev2_qnetwork_encodingnetwork_dense_bias_read_readvariableop" + type: DT_FLOAT + } + input_arg { + name: "savev2_qnetwork_encodingnetwork_dense_1_kernel_read_readvariableop" + type: DT_FLOAT + } + input_arg { + name: "savev2_qnetwork_encodingnetwork_dense_1_bias_read_readvariableop" + type: DT_FLOAT + } + input_arg { + name: "savev2_qnetwork_dense_2_kernel_read_readvariableop" + type: DT_FLOAT + } + input_arg { + name: "savev2_qnetwork_dense_2_bias_read_readvariableop" + type: DT_FLOAT + } + input_arg { + name: "savev2_1_const" + type: DT_STRING + } + output_arg { + name: "identity_1" + type: DT_STRING + } + is_stateful: true + control_output: "MergeV2Checkpoints" + control_output: "SaveV2" + control_output: "SaveV2_1" + } + node_def { + name: "StaticRegexFullMatch" + op: "StaticRegexFullMatch" + input: "file_prefix" + device: "/device:CPU:*" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "pattern" + value { + s: "^s3://.*" + } + } + experimental_debug_info { + original_node_names: "StaticRegexFullMatch" + } + } + node_def { + name: "Const" + op: "Const" + device: "/device:CPU:*" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: ".part" + } + } + } + experimental_debug_info { + original_node_names: "Const" + } + } + node_def { + name: "Const_1" + op: "Const" + device: "/device:CPU:*" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "_temp_f4c8d2e64931472295be68a11e57e937/part" + } + } + } + experimental_debug_info { + original_node_names: "Const_1" + } + } + node_def { + name: "Select" + op: "Select" + input: "StaticRegexFullMatch:output:0" + input: "Const:output:0" + input: "Const_1:output:0" + device: "/device:CPU:*" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Select" + } + } + node_def { + name: "StringJoin" + op: "StringJoin" + input: "file_prefix" + input: "Select:output:0" + device: "/device:CPU:*" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "StringJoin" + } + } + node_def { + name: "num_shards" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } + experimental_debug_info { + original_node_names: "num_shards" + } + } + node_def { + name: "ShardedFilename/shard" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } + experimental_debug_info { + original_node_names: "ShardedFilename/shard" + } + } + node_def { + name: "ShardedFilename" + op: "ShardedFilename" + input: "StringJoin:output:0" + input: "ShardedFilename/shard:output:0" + input: "num_shards:output:0" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "ShardedFilename" + } + } + node_def { + name: "SaveV2/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 7 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 7 + } + } + string_val: "train_step/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/0/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/1/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/2/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/3/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/4/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/5/.ATTRIBUTES/VARIABLE_VALUE" + } + } + } + experimental_debug_info { + original_node_names: "SaveV2/tensor_names" + } + } + node_def { + name: "SaveV2/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 7 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 7 + } + } + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + } + } + } + experimental_debug_info { + original_node_names: "SaveV2/shape_and_slices" + } + } + node_def { + name: "SaveV2" + op: "SaveV2" + input: "ShardedFilename:filename:0" + input: "SaveV2/tensor_names:output:0" + input: "SaveV2/shape_and_slices:output:0" + input: "savev2_train_step_read_readvariableop" + input: "savev2_qnetwork_encodingnetwork_dense_kernel_read_readvariableop" + input: "savev2_qnetwork_encodingnetwork_dense_bias_read_readvariableop" + input: "savev2_qnetwork_encodingnetwork_dense_1_kernel_read_readvariableop" + input: "savev2_qnetwork_encodingnetwork_dense_1_bias_read_readvariableop" + input: "savev2_qnetwork_dense_2_kernel_read_readvariableop" + input: "savev2_qnetwork_dense_2_bias_read_readvariableop" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_INT64 + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + } + } + } + experimental_debug_info { + original_node_names: "SaveV2" + } + } + node_def { + name: "ShardedFilename_1/shard" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } + experimental_debug_info { + original_node_names: "ShardedFilename_1/shard" + } + } + node_def { + name: "ShardedFilename_1" + op: "ShardedFilename" + input: "StringJoin:output:0" + input: "ShardedFilename_1/shard:output:0" + input: "num_shards:output:0" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "ShardedFilename_1" + } + } + node_def { + name: "SaveV2_1/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "_CHECKPOINTABLE_OBJECT_GRAPH" + } + } + } + experimental_debug_info { + original_node_names: "SaveV2_1/tensor_names" + } + } + node_def { + name: "SaveV2_1/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } + experimental_debug_info { + original_node_names: "SaveV2_1/shape_and_slices" + } + } + node_def { + name: "SaveV2_1" + op: "SaveV2" + input: "ShardedFilename_1:filename:0" + input: "SaveV2_1/tensor_names:output:0" + input: "SaveV2_1/shape_and_slices:output:0" + input: "savev2_1_const" + input: "^SaveV2" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_STRING + } + } + } + experimental_debug_info { + original_node_names: "SaveV2_1" + } + } + node_def { + name: "MergeV2Checkpoints/checkpoint_prefixes" + op: "Pack" + input: "ShardedFilename:filename:0" + input: "ShardedFilename_1:filename:0" + input: "^SaveV2" + input: "^SaveV2_1" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + experimental_debug_info { + original_node_names: "MergeV2Checkpoints/checkpoint_prefixes" + } + } + node_def { + name: "MergeV2Checkpoints" + op: "MergeV2Checkpoints" + input: "MergeV2Checkpoints/checkpoint_prefixes:output:0" + input: "file_prefix" + input: "^SaveV2_1" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + experimental_debug_info { + original_node_names: "MergeV2Checkpoints" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "file_prefix" + input: "^MergeV2Checkpoints" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + node_def { + name: "Identity_1" + op: "Identity" + input: "Identity:output:0" + input: "^MergeV2Checkpoints" + input: "^SaveV2" + input: "^SaveV2_1" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_1" + } + } + ret { + key: "identity_1" + value: "Identity_1:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + } + shape { + } + shape { + dim { + size: 34 + } + dim { + size: 100 + } + } + shape { + dim { + size: 100 + } + } + shape { + dim { + size: 100 + } + dim { + size: 40 + } + } + shape { + dim { + size: 40 + } + } + shape { + dim { + size: 40 + } + dim { + size: 2 + } + } + shape { + dim { + size: 2 + } + } + shape { + } + } + } + } + control_ret { + key: "MergeV2Checkpoints" + value: "MergeV2Checkpoints" + } + control_ret { + key: "SaveV2" + value: "SaveV2" + } + control_ret { + key: "SaveV2_1" + value: "SaveV2_1" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "file_prefix" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 34 + } + dim { + size: 100 + } + } + } + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 100 + } + } + } + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 100 + } + dim { + size: 40 + } + } + } + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 40 + } + } + } + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 40 + } + dim { + size: 2 + } + } + } + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + } + } + arg_attr { + key: 8 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_function_722" + } + attr { + key: "_input_shapes" + value { + } + } + } + function { + signature { + name: "__inference_signature_wrapper_4619026" + input_arg { + name: "callee_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "callee_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "callee_users" + type: DT_INT64 + } + input_arg { + name: "caller_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "caller_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "caller_users" + type: DT_INT64 + } + input_arg { + name: "callsite_height" + type: DT_INT64 + } + input_arg { + name: "cost_estimate" + type: DT_INT64 + } + input_arg { + name: "discount" + type: DT_FLOAT + } + input_arg { + name: "edge_count" + type: DT_INT64 + } + input_arg { + name: "inlining_default" + type: DT_INT64 + } + input_arg { + name: "node_count" + type: DT_INT64 + } + input_arg { + name: "nr_ctant_params" + type: DT_INT64 + } + input_arg { + name: "reward" + type: DT_FLOAT + } + input_arg { + name: "step_type" + type: DT_INT32 + } + input_arg { + name: "unknown" + type: DT_RESOURCE + } + input_arg { + name: "unknown_0" + type: DT_RESOURCE + } + input_arg { + name: "unknown_1" + type: DT_RESOURCE + } + input_arg { + name: "unknown_2" + type: DT_RESOURCE + } + input_arg { + name: "unknown_3" + type: DT_RESOURCE + } + input_arg { + name: "unknown_4" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + control_output: "StatefulPartitionedCall" + } + node_def { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "step_type" + input: "reward" + input: "discount" + input: "callee_basic_block_count" + input: "callee_conditionally_executed_blocks" + input: "callee_users" + input: "caller_basic_block_count" + input: "caller_conditionally_executed_blocks" + input: "caller_users" + input: "callsite_height" + input: "cost_estimate" + input: "edge_count" + input: "inlining_default" + input: "node_count" + input: "nr_ctant_params" + input: "unknown" + input: "unknown_0" + input: "unknown_1" + input: "unknown_2" + input: "unknown_3" + input: "unknown_4" + attr { + key: "Tin" + value { + list { + type: DT_INT32 + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 15 + i: 16 + i: 17 + i: 18 + i: 19 + i: 20 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_function_with_signature_4618993" + } + } + } + experimental_debug_info { + original_node_names: "StatefulPartitionedCall" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "StatefulPartitionedCall:output:0" + input: "^StatefulPartitionedCall" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "StatefulPartitionedCall" + value: "StatefulPartitionedCall" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_basic_block_count" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_users" + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_basic_block_count" + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_users" + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callsite_height" + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "cost_estimate" + } + } + } + } + arg_attr { + key: 8 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "discount" + } + } + } + } + arg_attr { + key: 9 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "edge_count" + } + } + } + } + arg_attr { + key: 10 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "inlining_default" + } + } + } + } + arg_attr { + key: 11 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "node_count" + } + } + } + } + arg_attr { + key: 12 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "nr_ctant_params" + } + } + } + } + arg_attr { + key: 13 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "reward" + } + } + } + } + arg_attr { + key: 14 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "step_type" + } + } + } + } + arg_attr { + key: 15 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 16 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 17 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 18 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 19 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 20 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_function_with_signature_4618993" + input_arg { + name: "step_type" + type: DT_INT32 + } + input_arg { + name: "reward" + type: DT_FLOAT + } + input_arg { + name: "discount" + type: DT_FLOAT + } + input_arg { + name: "callee_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "callee_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "callee_users" + type: DT_INT64 + } + input_arg { + name: "caller_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "caller_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "caller_users" + type: DT_INT64 + } + input_arg { + name: "callsite_height" + type: DT_INT64 + } + input_arg { + name: "cost_estimate" + type: DT_INT64 + } + input_arg { + name: "edge_count" + type: DT_INT64 + } + input_arg { + name: "inlining_default" + type: DT_INT64 + } + input_arg { + name: "node_count" + type: DT_INT64 + } + input_arg { + name: "nr_ctant_params" + type: DT_INT64 + } + input_arg { + name: "unknown" + type: DT_RESOURCE + } + input_arg { + name: "unknown_0" + type: DT_RESOURCE + } + input_arg { + name: "unknown_1" + type: DT_RESOURCE + } + input_arg { + name: "unknown_2" + type: DT_RESOURCE + } + input_arg { + name: "unknown_3" + type: DT_RESOURCE + } + input_arg { + name: "unknown_4" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + control_output: "StatefulPartitionedCall" + } + node_def { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "step_type" + input: "reward" + input: "discount" + input: "callee_basic_block_count" + input: "callee_conditionally_executed_blocks" + input: "callee_users" + input: "caller_basic_block_count" + input: "caller_conditionally_executed_blocks" + input: "caller_users" + input: "callsite_height" + input: "cost_estimate" + input: "edge_count" + input: "inlining_default" + input: "node_count" + input: "nr_ctant_params" + input: "unknown" + input: "unknown_0" + input: "unknown_1" + input: "unknown_2" + input: "unknown_3" + input: "unknown_4" + attr { + key: "Tin" + value { + list { + type: DT_INT32 + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 15 + i: 16 + i: 17 + i: 18 + i: 19 + i: 20 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_polymorphic_action_fn_4618978" + } + } + } + experimental_debug_info { + original_node_names: "StatefulPartitionedCall" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "StatefulPartitionedCall:output:0" + input: "^StatefulPartitionedCall" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "StatefulPartitionedCall" + value: "StatefulPartitionedCall" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "step_type" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "reward" + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "discount" + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_basic_block_count" + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_users" + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_basic_block_count" + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 8 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_users" + } + } + } + } + arg_attr { + key: 9 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callsite_height" + } + } + } + } + arg_attr { + key: 10 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "cost_estimate" + } + } + } + } + arg_attr { + key: 11 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "edge_count" + } + } + } + } + arg_attr { + key: 12 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "inlining_default" + } + } + } + } + arg_attr { + key: 13 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "node_count" + } + } + } + } + arg_attr { + key: 14 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "nr_ctant_params" + } + } + } + } + arg_attr { + key: 15 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 16 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 17 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 18 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 19 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 20 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_polymorphic_action_fn_4619080" + input_arg { + name: "time_step_step_type" + type: DT_INT32 + } + input_arg { + name: "time_step_reward" + type: DT_FLOAT + } + input_arg { + name: "time_step_discount" + type: DT_FLOAT + } + input_arg { + name: "time_step_observation_callee_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_callee_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_callee_users" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_caller_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_caller_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_caller_users" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_callsite_height" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_cost_estimate" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_edge_count" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_inlining_default" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_node_count" + type: DT_INT64 + } + input_arg { + name: "time_step_observation_nr_ctant_params" + type: DT_INT64 + } + input_arg { + name: "unknown" + type: DT_RESOURCE + } + input_arg { + name: "unknown_0" + type: DT_RESOURCE + } + input_arg { + name: "unknown_1" + type: DT_RESOURCE + } + input_arg { + name: "unknown_2" + type: DT_RESOURCE + } + input_arg { + name: "unknown_3" + type: DT_RESOURCE + } + input_arg { + name: "unknown_4" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + control_output: "StatefulPartitionedCall" + } + node_def { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "time_step_step_type" + input: "time_step_reward" + input: "time_step_discount" + input: "time_step_observation_callee_basic_block_count" + input: "time_step_observation_callee_conditionally_executed_blocks" + input: "time_step_observation_callee_users" + input: "time_step_observation_caller_basic_block_count" + input: "time_step_observation_caller_conditionally_executed_blocks" + input: "time_step_observation_caller_users" + input: "time_step_observation_callsite_height" + input: "time_step_observation_cost_estimate" + input: "time_step_observation_edge_count" + input: "time_step_observation_inlining_default" + input: "time_step_observation_node_count" + input: "time_step_observation_nr_ctant_params" + input: "unknown" + input: "unknown_0" + input: "unknown_1" + input: "unknown_2" + input: "unknown_3" + input: "unknown_4" + attr { + key: "Tin" + value { + list { + type: DT_INT32 + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 15 + i: 16 + i: 17 + i: 18 + i: 19 + i: 20 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_action_931" + } + } + } + experimental_debug_info { + original_node_names: "StatefulPartitionedCall" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "StatefulPartitionedCall:output:0" + input: "^StatefulPartitionedCall" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "StatefulPartitionedCall" + value: "StatefulPartitionedCall" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/step_type" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/reward" + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/discount" + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/callee_basic_block_count" + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/callee_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/callee_users" + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/caller_basic_block_count" + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/caller_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 8 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/caller_users" + } + } + } + } + arg_attr { + key: 9 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/callsite_height" + } + } + } + } + arg_attr { + key: 10 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/cost_estimate" + } + } + } + } + arg_attr { + key: 11 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/edge_count" + } + } + } + } + arg_attr { + key: 12 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/inlining_default" + } + } + } + } + arg_attr { + key: 13 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/node_count" + } + } + } + } + arg_attr { + key: 14 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step/observation/nr_ctant_params" + } + } + } + } + arg_attr { + key: 15 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 16 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 17 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 18 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 19 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 20 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_function_with_signature_4619040" + input_arg { + name: "unknown" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + control_output: "StatefulPartitionedCall" + } + node_def { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "unknown" + attr { + key: "Tin" + value { + list { + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 0 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference__728" + } + } + } + experimental_debug_info { + original_node_names: "StatefulPartitionedCall" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "StatefulPartitionedCall:output:0" + input: "^StatefulPartitionedCall" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "StatefulPartitionedCall" + value: "StatefulPartitionedCall" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_polymorphic_action_fn_4618978" + input_arg { + name: "time_step" + type: DT_INT32 + } + input_arg { + name: "time_step_1" + type: DT_FLOAT + } + input_arg { + name: "time_step_2" + type: DT_FLOAT + } + input_arg { + name: "time_step_3" + type: DT_INT64 + } + input_arg { + name: "time_step_4" + type: DT_INT64 + } + input_arg { + name: "time_step_5" + type: DT_INT64 + } + input_arg { + name: "time_step_6" + type: DT_INT64 + } + input_arg { + name: "time_step_7" + type: DT_INT64 + } + input_arg { + name: "time_step_8" + type: DT_INT64 + } + input_arg { + name: "time_step_9" + type: DT_INT64 + } + input_arg { + name: "time_step_10" + type: DT_INT64 + } + input_arg { + name: "time_step_11" + type: DT_INT64 + } + input_arg { + name: "time_step_12" + type: DT_INT64 + } + input_arg { + name: "time_step_13" + type: DT_INT64 + } + input_arg { + name: "time_step_14" + type: DT_INT64 + } + input_arg { + name: "unknown" + type: DT_RESOURCE + } + input_arg { + name: "unknown_0" + type: DT_RESOURCE + } + input_arg { + name: "unknown_1" + type: DT_RESOURCE + } + input_arg { + name: "unknown_2" + type: DT_RESOURCE + } + input_arg { + name: "unknown_3" + type: DT_RESOURCE + } + input_arg { + name: "unknown_4" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + control_output: "StatefulPartitionedCall" + } + node_def { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "time_step" + input: "time_step_1" + input: "time_step_2" + input: "time_step_3" + input: "time_step_4" + input: "time_step_5" + input: "time_step_6" + input: "time_step_7" + input: "time_step_8" + input: "time_step_9" + input: "time_step_10" + input: "time_step_11" + input: "time_step_12" + input: "time_step_13" + input: "time_step_14" + input: "unknown" + input: "unknown_0" + input: "unknown_1" + input: "unknown_2" + input: "unknown_3" + input: "unknown_4" + attr { + key: "Tin" + value { + list { + type: DT_INT32 + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 15 + i: 16 + i: 17 + i: 18 + i: 19 + i: 20 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_action_931" + } + } + } + experimental_debug_info { + original_node_names: "StatefulPartitionedCall" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "StatefulPartitionedCall:output:0" + input: "^StatefulPartitionedCall" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "StatefulPartitionedCall" + value: "StatefulPartitionedCall" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 8 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 9 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 10 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 11 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 12 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 13 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 14 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "time_step" + } + } + } + } + arg_attr { + key: 15 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 16 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 17 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 18 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 19 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 20 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference_polymorphic_action_fn_946" + input_arg { + name: "step_type" + type: DT_INT32 + } + input_arg { + name: "reward" + type: DT_FLOAT + } + input_arg { + name: "discount" + type: DT_FLOAT + } + input_arg { + name: "callee_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "callee_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "callee_users" + type: DT_INT64 + } + input_arg { + name: "caller_basic_block_count" + type: DT_INT64 + } + input_arg { + name: "caller_conditionally_executed_blocks" + type: DT_INT64 + } + input_arg { + name: "caller_users" + type: DT_INT64 + } + input_arg { + name: "callsite_height" + type: DT_INT64 + } + input_arg { + name: "cost_estimate" + type: DT_INT64 + } + input_arg { + name: "edge_count" + type: DT_INT64 + } + input_arg { + name: "inlining_default" + type: DT_INT64 + } + input_arg { + name: "node_count" + type: DT_INT64 + } + input_arg { + name: "nr_ctant_params" + type: DT_INT64 + } + input_arg { + name: "unknown" + type: DT_RESOURCE + } + input_arg { + name: "unknown_0" + type: DT_RESOURCE + } + input_arg { + name: "unknown_1" + type: DT_RESOURCE + } + input_arg { + name: "unknown_2" + type: DT_RESOURCE + } + input_arg { + name: "unknown_3" + type: DT_RESOURCE + } + input_arg { + name: "unknown_4" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + control_output: "StatefulPartitionedCall" + } + node_def { + name: "StatefulPartitionedCall" + op: "StatefulPartitionedCall" + input: "step_type" + input: "reward" + input: "discount" + input: "callee_basic_block_count" + input: "callee_conditionally_executed_blocks" + input: "callee_users" + input: "caller_basic_block_count" + input: "caller_conditionally_executed_blocks" + input: "caller_users" + input: "callsite_height" + input: "cost_estimate" + input: "edge_count" + input: "inlining_default" + input: "node_count" + input: "nr_ctant_params" + input: "unknown" + input: "unknown_0" + input: "unknown_1" + input: "unknown_2" + input: "unknown_3" + input: "unknown_4" + attr { + key: "Tin" + value { + list { + type: DT_INT32 + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + type: DT_RESOURCE + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "_collective_manager_ids" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + i: 15 + i: 16 + i: 17 + i: 18 + i: 19 + i: 20 + } + } + } + attr { + key: "config_proto" + value { + s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001" + } + } + attr { + key: "f" + value { + func { + name: "__inference_action_931" + } + } + } + experimental_debug_info { + original_node_names: "StatefulPartitionedCall" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "StatefulPartitionedCall:output:0" + input: "^StatefulPartitionedCall" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 1 + } + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "StatefulPartitionedCall" + value: "StatefulPartitionedCall" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "step_type" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "reward" + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "discount" + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_basic_block_count" + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callee_users" + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_basic_block_count" + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_conditionally_executed_blocks" + } + } + } + } + arg_attr { + key: 8 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "caller_users" + } + } + } + } + arg_attr { + key: 9 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "callsite_height" + } + } + } + } + arg_attr { + key: 10 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "cost_estimate" + } + } + } + } + arg_attr { + key: 11 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "edge_count" + } + } + } + } + arg_attr { + key: 12 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "inlining_default" + } + } + } + } + arg_attr { + key: 13 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "node_count" + } + } + } + } + arg_attr { + key: 14 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "nr_ctant_params" + } + } + } + } + arg_attr { + key: 15 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 16 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 17 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 18 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 19 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 20 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference__traced_restore_4619176" + input_arg { + name: "file_prefix" + type: DT_STRING + } + input_arg { + name: "assignvariableop_train_step" + type: DT_RESOURCE + } + input_arg { + name: "assignvariableop_1_qnetwork_encodingnetwork_dense_kernel" + type: DT_RESOURCE + } + input_arg { + name: "assignvariableop_2_qnetwork_encodingnetwork_dense_bias" + type: DT_RESOURCE + } + input_arg { + name: "assignvariableop_3_qnetwork_encodingnetwork_dense_1_kernel" + type: DT_RESOURCE + } + input_arg { + name: "assignvariableop_4_qnetwork_encodingnetwork_dense_1_bias" + type: DT_RESOURCE + } + input_arg { + name: "assignvariableop_5_qnetwork_dense_2_kernel" + type: DT_RESOURCE + } + input_arg { + name: "assignvariableop_6_qnetwork_dense_2_bias" + type: DT_RESOURCE + } + output_arg { + name: "identity_8" + type: DT_STRING + } + is_stateful: true + control_output: "AssignVariableOp" + control_output: "AssignVariableOp_1" + control_output: "AssignVariableOp_2" + control_output: "AssignVariableOp_3" + control_output: "AssignVariableOp_4" + control_output: "AssignVariableOp_5" + control_output: "AssignVariableOp_6" + control_output: "RestoreV2" + control_output: "RestoreV2_1" + } + node_def { + name: "RestoreV2/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 7 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 7 + } + } + string_val: "train_step/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/0/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/1/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/2/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/3/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/4/.ATTRIBUTES/VARIABLE_VALUE" + string_val: "model_variables/5/.ATTRIBUTES/VARIABLE_VALUE" + } + } + } + experimental_debug_info { + original_node_names: "RestoreV2/tensor_names" + } + } + node_def { + name: "RestoreV2/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 7 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 7 + } + } + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + } + } + } + experimental_debug_info { + original_node_names: "RestoreV2/shape_and_slices" + } + } + node_def { + name: "RestoreV2" + op: "RestoreV2" + input: "file_prefix" + input: "RestoreV2/tensor_names:output:0" + input: "RestoreV2/shape_and_slices:output:0" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_INT64 + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + } + } + } + experimental_debug_info { + original_node_names: "RestoreV2" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "RestoreV2:tensors:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + node_def { + name: "AssignVariableOp" + op: "AssignVariableOp" + input: "assignvariableop_train_step" + input: "Identity:output:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + experimental_debug_info { + original_node_names: "AssignVariableOp" + } + } + node_def { + name: "Identity_1" + op: "Identity" + input: "RestoreV2:tensors:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_1" + } + } + node_def { + name: "AssignVariableOp_1" + op: "AssignVariableOp" + input: "assignvariableop_1_qnetwork_encodingnetwork_dense_kernel" + input: "Identity_1:output:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "AssignVariableOp_1" + } + } + node_def { + name: "Identity_2" + op: "Identity" + input: "RestoreV2:tensors:2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_2" + } + } + node_def { + name: "AssignVariableOp_2" + op: "AssignVariableOp" + input: "assignvariableop_2_qnetwork_encodingnetwork_dense_bias" + input: "Identity_2:output:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "AssignVariableOp_2" + } + } + node_def { + name: "Identity_3" + op: "Identity" + input: "RestoreV2:tensors:3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_3" + } + } + node_def { + name: "AssignVariableOp_3" + op: "AssignVariableOp" + input: "assignvariableop_3_qnetwork_encodingnetwork_dense_1_kernel" + input: "Identity_3:output:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "AssignVariableOp_3" + } + } + node_def { + name: "Identity_4" + op: "Identity" + input: "RestoreV2:tensors:4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_4" + } + } + node_def { + name: "AssignVariableOp_4" + op: "AssignVariableOp" + input: "assignvariableop_4_qnetwork_encodingnetwork_dense_1_bias" + input: "Identity_4:output:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "AssignVariableOp_4" + } + } + node_def { + name: "Identity_5" + op: "Identity" + input: "RestoreV2:tensors:5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_5" + } + } + node_def { + name: "AssignVariableOp_5" + op: "AssignVariableOp" + input: "assignvariableop_5_qnetwork_dense_2_kernel" + input: "Identity_5:output:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "AssignVariableOp_5" + } + } + node_def { + name: "Identity_6" + op: "Identity" + input: "RestoreV2:tensors:6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_6" + } + } + node_def { + name: "AssignVariableOp_6" + op: "AssignVariableOp" + input: "assignvariableop_6_qnetwork_dense_2_bias" + input: "Identity_6:output:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + experimental_debug_info { + original_node_names: "AssignVariableOp_6" + } + } + node_def { + name: "RestoreV2_1/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "_CHECKPOINTABLE_OBJECT_GRAPH" + } + } + } + experimental_debug_info { + original_node_names: "RestoreV2_1/tensor_names" + } + } + node_def { + name: "RestoreV2_1/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } + experimental_debug_info { + original_node_names: "RestoreV2_1/shape_and_slices" + } + } + node_def { + name: "RestoreV2_1" + op: "RestoreV2" + input: "file_prefix" + input: "RestoreV2_1/tensor_names:output:0" + input: "RestoreV2_1/shape_and_slices:output:0" + input: "^RestoreV2" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_STRING + } + } + } + experimental_debug_info { + original_node_names: "RestoreV2_1" + } + } + node_def { + name: "NoOp" + op: "NoOp" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + } + } + } + experimental_debug_info { + original_node_names: "NoOp" + } + } + node_def { + name: "Identity_7" + op: "Identity" + input: "file_prefix" + input: "^AssignVariableOp" + input: "^AssignVariableOp_1" + input: "^AssignVariableOp_2" + input: "^AssignVariableOp_3" + input: "^AssignVariableOp_4" + input: "^AssignVariableOp_5" + input: "^AssignVariableOp_6" + input: "^NoOp" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_7" + } + } + node_def { + name: "Identity_8" + op: "Identity" + input: "Identity_7:output:0" + input: "^AssignVariableOp" + input: "^AssignVariableOp_1" + input: "^AssignVariableOp_2" + input: "^AssignVariableOp_3" + input: "^AssignVariableOp_4" + input: "^AssignVariableOp_5" + input: "^AssignVariableOp_6" + input: "^RestoreV2" + input: "^RestoreV2_1" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Identity_8" + } + } + ret { + key: "identity_8" + value: "Identity_8:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + control_ret { + key: "AssignVariableOp" + value: "AssignVariableOp" + } + control_ret { + key: "AssignVariableOp_1" + value: "AssignVariableOp_1" + } + control_ret { + key: "AssignVariableOp_2" + value: "AssignVariableOp_2" + } + control_ret { + key: "AssignVariableOp_3" + value: "AssignVariableOp_3" + } + control_ret { + key: "AssignVariableOp_4" + value: "AssignVariableOp_4" + } + control_ret { + key: "AssignVariableOp_5" + value: "AssignVariableOp_5" + } + control_ret { + key: "AssignVariableOp_6" + value: "AssignVariableOp_6" + } + control_ret { + key: "RestoreV2" + value: "RestoreV2" + } + control_ret { + key: "RestoreV2_1" + value: "RestoreV2_1" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "_user_specified_name" + value { + s: "file_prefix" + } + } + } + } + arg_attr { + key: 1 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 2 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 3 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 4 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 5 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 6 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + arg_attr { + key: 7 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "__inference__728" + input_arg { + name: "readvariableop_resource" + type: DT_RESOURCE + } + output_arg { + name: "identity" + type: DT_INT64 + } + is_stateful: true + } + node_def { + name: "ReadVariableOp" + op: "ReadVariableOp" + input: "readvariableop_resource" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + experimental_debug_info { + original_node_names: "ReadVariableOp" + } + } + node_def { + name: "Identity" + op: "Identity" + input: "ReadVariableOp:value:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + experimental_debug_info { + original_node_names: "Identity" + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + } + versions { + producer: 357 + min_consumer: 12 + } + } + saver_def { + filename_tensor_name: "saver_filename:0" + save_tensor_name: "StatefulPartitionedCall_2:0" + restore_op_name: "StatefulPartitionedCall_3" + version: V2 + } + collection_def { + key: "saved_model_main_op" + value { + node_list { + value: "NoOp" + } + } + } + signature_def { + key: "__saved_model_init_op" + value { + outputs { + key: "__saved_model_init_op" + value { + name: "NoOp" + tensor_shape { + unknown_rank: true + } + } + } + } + } + signature_def { + key: "action" + value { + inputs { + key: "callee_basic_block_count" + value { + name: "action_callee_basic_block_count:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "callee_conditionally_executed_blocks" + value { + name: "action_callee_conditionally_executed_blocks:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "callee_users" + value { + name: "action_callee_users:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "caller_basic_block_count" + value { + name: "action_caller_basic_block_count:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "caller_conditionally_executed_blocks" + value { + name: "action_caller_conditionally_executed_blocks:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "caller_users" + value { + name: "action_caller_users:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "callsite_height" + value { + name: "action_callsite_height:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "cost_estimate" + value { + name: "action_cost_estimate:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "discount" + value { + name: "action_discount:0" + dtype: DT_FLOAT + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "edge_count" + value { + name: "action_edge_count:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "inlining_default" + value { + name: "action_inlining_default:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "node_count" + value { + name: "action_node_count:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "nr_ctant_params" + value { + name: "action_nr_ctant_params:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "reward" + value { + name: "action_reward:0" + dtype: DT_FLOAT + tensor_shape { + dim { + size: 1 + } + } + } + } + inputs { + key: "step_type" + value { + name: "action_step_type:0" + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + } + } + outputs { + key: "inlining_decision" + value { + name: "StatefulPartitionedCall:0" + dtype: DT_INT64 + tensor_shape { + dim { + size: 1 + } + } + } + } + method_name: "tensorflow/serving/predict" + } + } + signature_def { + key: "get_initial_state" + value { + method_name: "tensorflow/serving/predict" + } + } + signature_def { + key: "get_train_step" + value { + outputs { + key: "int64" + value { + name: "StatefulPartitionedCall_1:0" + dtype: DT_INT64 + tensor_shape { + } + } + } + method_name: "tensorflow/serving/predict" + } + } + object_graph_def { + nodes { + children { + node_id: 1 + local_name: "_time_step_spec" + } + children { + node_id: 2 + local_name: "_trajectory_spec" + } + children { + node_id: 3 + local_name: "_wrapped_policy" + } + children { + node_id: 4 + local_name: "train_step" + } + children { + node_id: 5 + local_name: "model_variables" + } + children { + node_id: 6 + local_name: "signatures" + } + children { + node_id: 210 + local_name: "action" + } + children { + node_id: 211 + local_name: "get_initial_state" + } + children { + node_id: 212 + local_name: "get_train_step" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 7 + local_name: "observation" + } + children { + node_id: 7 + local_name: "3" + } + user_object { + identifier: "trackable_tuple_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 7 + local_name: "observation" + } + children { + node_id: 7 + local_name: "1" + } + user_object { + identifier: "trackable_tuple_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 8 + local_name: "_q_network" + } + children { + node_id: 1 + local_name: "_time_step_spec" + } + children { + node_id: 9 + local_name: "_trajectory_spec" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + variable { + dtype: DT_INT64 + shape { + } + name: "train_step" + } + } + nodes { + children { + node_id: 10 + local_name: "0" + } + children { + node_id: 11 + local_name: "1" + } + children { + node_id: 12 + local_name: "2" + } + children { + node_id: 13 + local_name: "3" + } + children { + node_id: 14 + local_name: "4" + } + children { + node_id: 15 + local_name: "5" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 213 + local_name: "action" + } + children { + node_id: 214 + local_name: "get_initial_state" + } + children { + node_id: 215 + local_name: "get_train_step" + } + user_object { + identifier: "signature_map" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 16 + local_name: "_input_tensor_spec" + } + children { + node_id: 17 + local_name: "_encoder" + } + children { + node_id: 18 + local_name: "_q_value_layer" + } + children { + node_id: 19 + local_name: "variables" + } + children { + node_id: 20 + local_name: "regularization_losses" + } + children { + node_id: 21 + local_name: "trainable_variables" + } + children { + node_id: 22 + local_name: "keras_api" + } + children { + node_id: 216 + local_name: "__call__" + } + children { + node_id: 217 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_network" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"QNetwork\", \"name\": \"QNetwork\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"config\": {\"layer was saved without config\": true}, \"is_graph_network\": false}" + } + } + nodes { + children { + node_id: 7 + local_name: "observation" + } + children { + node_id: 7 + local_name: "1" + } + user_object { + identifier: "trackable_tuple_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + variable { + dtype: DT_FLOAT + shape { + dim { + size: 34 + } + dim { + size: 100 + } + } + trainable: true + name: "QNetwork/EncodingNetwork/dense/kernel" + } + } + nodes { + variable { + dtype: DT_FLOAT + shape { + dim { + size: 100 + } + } + trainable: true + name: "QNetwork/EncodingNetwork/dense/bias" + } + } + nodes { + variable { + dtype: DT_FLOAT + shape { + dim { + size: 100 + } + dim { + size: 40 + } + } + trainable: true + name: "QNetwork/EncodingNetwork/dense_1/kernel" + } + } + nodes { + variable { + dtype: DT_FLOAT + shape { + dim { + size: 40 + } + } + trainable: true + name: "QNetwork/EncodingNetwork/dense_1/bias" + } + } + nodes { + variable { + dtype: DT_FLOAT + shape { + dim { + size: 40 + } + dim { + size: 2 + } + } + trainable: true + name: "QNetwork/dense_2/kernel" + } + } + nodes { + variable { + dtype: DT_FLOAT + shape { + dim { + size: 2 + } + } + trainable: true + name: "QNetwork/dense_2/bias" + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 23 + local_name: "_input_tensor_spec" + } + children { + node_id: 24 + local_name: "_preprocessing_nest" + } + children { + node_id: 25 + local_name: "_flat_preprocessing_layers" + } + children { + node_id: 26 + local_name: "_preprocessing_combiner" + } + children { + node_id: 27 + local_name: "_postprocessing_layers" + } + children { + node_id: 28 + local_name: "variables" + } + children { + node_id: 29 + local_name: "regularization_losses" + } + children { + node_id: 30 + local_name: "trainable_variables" + } + children { + node_id: 31 + local_name: "keras_api" + } + children { + node_id: 218 + local_name: "__call__" + } + children { + node_id: 219 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_network" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"EncodingNetwork\", \"name\": \"EncodingNetwork\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"config\": {\"layer was saved without config\": true}, \"is_graph_network\": false}" + } + } + nodes { + children { + node_id: 14 + local_name: "kernel" + } + children { + node_id: 15 + local_name: "bias" + } + children { + node_id: 32 + local_name: "variables" + } + children { + node_id: 33 + local_name: "regularization_losses" + } + children { + node_id: 34 + local_name: "trainable_variables" + } + children { + node_id: 35 + local_name: "keras_api" + } + children { + node_id: 220 + local_name: "__call__" + } + children { + node_id: 221 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Dense\", \"name\": \"dense_2\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"dense_2\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 2, \"activation\": \"linear\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"RandomUniform\", \"config\": {\"minval\": -0.03, \"maxval\": 0.03, \"seed\": null, \"dtype\": \"float32\"}}, \"bias_initializer\": {\"class_name\": \"Constant\", \"config\": {\"value\": -0.2, \"dtype\": \"float32\"}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 40}}}, \"build_input_shape\": {\"class_name\": \"TensorShape\", \"items\": [0, 40]}}" + } + } + nodes { + children { + node_id: 10 + local_name: "0" + } + children { + node_id: 11 + local_name: "1" + } + children { + node_id: 12 + local_name: "2" + } + children { + node_id: 13 + local_name: "3" + } + children { + node_id: 14 + local_name: "4" + } + children { + node_id: 15 + local_name: "5" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 10 + local_name: "0" + } + children { + node_id: 11 + local_name: "1" + } + children { + node_id: 12 + local_name: "2" + } + children { + node_id: 13 + local_name: "3" + } + children { + node_id: 14 + local_name: "4" + } + children { + node_id: 15 + local_name: "5" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 36 + local_name: "layer_metrics" + } + children { + node_id: 19 + local_name: "variables" + } + children { + node_id: 37 + local_name: "layer_regularization_losses" + } + children { + node_id: 38 + local_name: "metrics" + } + children { + node_id: 39 + local_name: "layers" + } + children { + node_id: 20 + local_name: "regularization_losses" + } + children { + node_id: 40 + local_name: "non_trainable_variables" + } + children { + node_id: 21 + local_name: "trainable_variables" + } + children { + node_id: 216 + local_name: "__call__" + } + children { + node_id: 217 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 217 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 41 + local_name: "0" + } + children { + node_id: 42 + local_name: "1" + } + children { + node_id: 43 + local_name: "2" + } + children { + node_id: 44 + local_name: "3" + } + children { + node_id: 45 + local_name: "4" + } + children { + node_id: 46 + local_name: "5" + } + children { + node_id: 47 + local_name: "6" + } + children { + node_id: 48 + local_name: "7" + } + children { + node_id: 49 + local_name: "8" + } + children { + node_id: 50 + local_name: "9" + } + children { + node_id: 51 + local_name: "10" + } + children { + node_id: 52 + local_name: "11" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 53 + local_name: "variables" + } + children { + node_id: 54 + local_name: "regularization_losses" + } + children { + node_id: 55 + local_name: "trainable_variables" + } + children { + node_id: 56 + local_name: "keras_api" + } + children { + node_id: 222 + local_name: "__call__" + } + children { + node_id: 223 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Concatenate\", \"name\": \"concatenate\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"concatenate\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}, \"build_input_shape\": [{\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 1]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}]}" + } + } + nodes { + children { + node_id: 57 + local_name: "0" + } + children { + node_id: 58 + local_name: "1" + } + children { + node_id: 59 + local_name: "2" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 10 + local_name: "0" + } + children { + node_id: 11 + local_name: "1" + } + children { + node_id: 12 + local_name: "2" + } + children { + node_id: 13 + local_name: "3" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 10 + local_name: "0" + } + children { + node_id: 11 + local_name: "1" + } + children { + node_id: 12 + local_name: "2" + } + children { + node_id: 13 + local_name: "3" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 60 + local_name: "layer_metrics" + } + children { + node_id: 28 + local_name: "variables" + } + children { + node_id: 61 + local_name: "layer_regularization_losses" + } + children { + node_id: 62 + local_name: "metrics" + } + children { + node_id: 63 + local_name: "layers" + } + children { + node_id: 29 + local_name: "regularization_losses" + } + children { + node_id: 64 + local_name: "non_trainable_variables" + } + children { + node_id: 30 + local_name: "trainable_variables" + } + children { + node_id: 218 + local_name: "__call__" + } + children { + node_id: 219 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 219 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 14 + local_name: "0" + } + children { + node_id: 15 + local_name: "1" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 14 + local_name: "0" + } + children { + node_id: 15 + local_name: "1" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 65 + local_name: "layer_metrics" + } + children { + node_id: 32 + local_name: "variables" + } + children { + node_id: 66 + local_name: "layer_regularization_losses" + } + children { + node_id: 67 + local_name: "metrics" + } + children { + node_id: 68 + local_name: "layers" + } + children { + node_id: 33 + local_name: "regularization_losses" + } + children { + node_id: 69 + local_name: "non_trainable_variables" + } + children { + node_id: 34 + local_name: "trainable_variables" + } + children { + node_id: 220 + local_name: "__call__" + } + children { + node_id: 221 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 221 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 17 + local_name: "0" + } + children { + node_id: 18 + local_name: "1" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 70 + local_name: "variables" + } + children { + node_id: 71 + local_name: "regularization_losses" + } + children { + node_id: 72 + local_name: "trainable_variables" + } + children { + node_id: 73 + local_name: "keras_api" + } + children { + node_id: 224 + local_name: "__call__" + } + children { + node_id: 225 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 11.0, 12.0, 13.0, 14.0, 14.0, 14.0, 16.0, 17.0, 19.0, 23.0, 27.0, 39.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 74 + local_name: "variables" + } + children { + node_id: 75 + local_name: "regularization_losses" + } + children { + node_id: 76 + local_name: "trainable_variables" + } + children { + node_id: 77 + local_name: "keras_api" + } + children { + node_id: 226 + local_name: "__call__" + } + children { + node_id: 227 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_1\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_1\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 8.0, 8.0, 8.0, 8.0, 9.0, 10.0, 10.0, 10.0, 12.0, 12.0, 12.0, 14.0, 14.0, 18.0, 20.0, 23.0, 30.0, 41.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 78 + local_name: "variables" + } + children { + node_id: 79 + local_name: "regularization_losses" + } + children { + node_id: 80 + local_name: "trainable_variables" + } + children { + node_id: 81 + local_name: "keras_api" + } + children { + node_id: 228 + local_name: "__call__" + } + children { + node_id: 229 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_2\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_2\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 18.0, 18.0, 18.0, 18.0, 18.0, 19.0, 19.0, 19.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 22.0, 22.0, 22.0, 22.0, 23.0, 23.0, 23.0, 24.0, 24.0, 24.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 26.0, 26.0, 26.0, 27.0, 27.0, 27.0, 27.0, 28.0, 28.0, 29.0, 29.0, 29.0, 29.0, 30.0, 30.0, 31.0, 31.0, 31.0, 31.0, 32.0, 32.0, 33.0, 33.0, 33.0, 34.0, 34.0, 34.0, 34.0, 35.0, 35.0, 36.0, 36.0, 37.0, 37.0, 37.0, 38.0, 38.0, 39.0, 39.0, 40.0, 40.0, 41.0, 41.0, 41.0, 42.0, 43.0, 43.0, 44.0, 44.0, 45.0, 45.0, 46.0, 46.0, 46.0, 47.0, 47.0, 48.0, 49.0, 49.0, 50.0, 50.0, 51.0, 52.0, 53.0, 53.0, 54.0, 55.0, 56.0, 57.0, 57.0, 58.0, 59.0, 60.0, 61.0, 61.0, 63.0, 63.0, 64.0, 65.0, 66.0, 67.0, 67.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 85.0, 86.0, 88.0, 89.0, 91.0, 92.0, 94.0, 96.0, 97.0, 99.0, 100.0, 101.0, 103.0, 105.0, 107.0, 109.0, 111.0, 113.0, 115.0, 118.0, 121.0, 123.0, 126.0, 128.0, 130.0, 133.0, 135.0, 137.0, 140.0, 143.0, 146.0, 148.0, 151.0, 154.0, 157.0, 161.0, 163.0, 166.0, 169.0, 173.0, 178.0, 183.0, 189.0, 193.0, 197.0, 202.0, 208.0, 213.0, 218.0, 223.0, 228.0, 233.0, 239.0, 245.0, 250.0, 257.0, 262.0, 269.0, 277.0, 284.0, 292.0, 300.0, 308.0, 319.0, 329.0, 340.0, 349.0, 359.0, 371.0, 382.0, 394.0, 410.0, 423.0, 435.0, 445.0, 462.0, 480.0, 492.0, 506.0, 519.0, 536.0, 557.0, 577.0, 598.0, 622.0, 655.0, 679.0, 707.0, 733.0, 751.0, 787.0, 814.0, 847.0, 897.0, 934.0, 997.0, 1062.0, 1111.0, 1181.0, 1275.0, 1385.0, 1465.0, 1603.0, 1769.0, 2057.0, 2257.0, 2803.0, 3468.0, 4417.0, 6538.0, 16126.0, 23446.0, 33536.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 82 + local_name: "variables" + } + children { + node_id: 83 + local_name: "regularization_losses" + } + children { + node_id: 84 + local_name: "trainable_variables" + } + children { + node_id: 85 + local_name: "keras_api" + } + children { + node_id: 230 + local_name: "__call__" + } + children { + node_id: 231 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_3\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_3\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 23.0, 23.0, 23.0, 24.0, 24.0, 24.0, 24.0, 24.0, 24.0, 25.0, 25.0, 25.0, 25.0, 25.0, 26.0, 26.0, 26.0, 26.0, 27.0, 27.0, 27.0, 27.0, 27.0, 28.0, 28.0, 28.0, 29.0, 29.0, 29.0, 29.0, 30.0, 30.0, 30.0, 31.0, 31.0, 31.0, 32.0, 32.0, 32.0, 33.0, 33.0, 33.0, 34.0, 34.0, 34.0, 34.0, 35.0, 35.0, 35.0, 36.0, 36.0, 36.0, 37.0, 37.0, 37.0, 38.0, 38.0, 38.0, 38.0, 39.0, 39.0, 40.0, 40.0, 41.0, 41.0, 42.0, 43.0, 43.0, 44.0, 45.0, 45.0, 46.0, 47.0, 47.0, 48.0, 49.0, 49.0, 50.0, 50.0, 52.0, 52.0, 53.0, 54.0, 55.0, 55.0, 57.0, 58.0, 59.0, 60.0, 62.0, 64.0, 65.0, 66.0, 68.0, 70.0, 70.0, 70.0, 70.0, 70.0, 71.0, 73.0, 75.0, 76.0, 78.0, 81.0, 84.0, 86.0, 90.0, 94.0, 98.0, 101.0, 106.0, 111.0, 117.0, 123.0, 130.0, 138.0, 146.0, 157.0, 163.0, 176.0, 187.0, 198.0, 214.0, 227.0, 252.0, 280.0, 327.0, 395.0, 506.0, 671.0, 1025.0, 1971.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 86 + local_name: "variables" + } + children { + node_id: 87 + local_name: "regularization_losses" + } + children { + node_id: 88 + local_name: "trainable_variables" + } + children { + node_id: 89 + local_name: "keras_api" + } + children { + node_id: 232 + local_name: "__call__" + } + children { + node_id: 233 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_4\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_4\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 13.0, 13.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 21.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 24.0, 24.0, 24.0, 24.0, 24.0, 24.0, 24.0, 24.0, 25.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 27.0, 28.0, 28.0, 28.0, 28.0, 28.0, 29.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 31.0, 32.0, 32.0, 32.0, 32.0, 32.0, 34.0, 34.0, 34.0, 34.0, 34.0, 34.0, 35.0, 36.0, 36.0, 36.0, 37.0, 38.0, 38.0, 38.0, 39.0, 40.0, 40.0, 41.0, 42.0, 42.0, 43.0, 44.0, 44.0, 46.0, 46.0, 47.0, 48.0, 48.0, 50.0, 50.0, 52.0, 52.0, 54.0, 55.0, 55.0, 56.0, 57.0, 58.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 62.0, 62.0, 64.0, 65.0, 66.0, 68.0, 70.0, 72.0, 74.0, 77.0, 80.0, 82.0, 86.0, 89.0, 92.0, 96.0, 99.0, 104.0, 108.0, 114.0, 119.0, 125.0, 131.0, 139.0, 146.0, 157.0, 167.0, 176.0, 188.0, 198.0, 215.0, 236.0, 262.0, 306.0, 376.0, 462.0, 596.0, 942.0, 1428.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 90 + local_name: "variables" + } + children { + node_id: 91 + local_name: "regularization_losses" + } + children { + node_id: 92 + local_name: "trainable_variables" + } + children { + node_id: 93 + local_name: "keras_api" + } + children { + node_id: 234 + local_name: "__call__" + } + children { + node_id: 235 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_5\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_5\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 11.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 18.0, 20.0, 23.0, 29.0, 38.0, 60.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 94 + local_name: "variables" + } + children { + node_id: 95 + local_name: "regularization_losses" + } + children { + node_id: 96 + local_name: "trainable_variables" + } + children { + node_id: 97 + local_name: "keras_api" + } + children { + node_id: 236 + local_name: "__call__" + } + children { + node_id: 237 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_6\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_6\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 23.0, 23.0, 23.0, 23.0, 23.0, 23.0, 23.0, 24.0, 24.0, 24.0, 24.0, 24.0, 25.0, 25.0, 25.0, 25.0, 25.0, 26.0, 26.0, 26.0, 26.0, 27.0, 27.0, 27.0, 28.0, 28.0, 28.0, 29.0, 29.0, 30.0, 30.0, 30.0, 31.0, 31.0, 32.0, 32.0, 33.0, 33.0, 34.0, 35.0, 37.0, 38.0, 40.0, 46.0, 51.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 98 + local_name: "variables" + } + children { + node_id: 99 + local_name: "regularization_losses" + } + children { + node_id: 100 + local_name: "trainable_variables" + } + children { + node_id: 101 + local_name: "keras_api" + } + children { + node_id: 238 + local_name: "__call__" + } + children { + node_id: 239 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_7\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_7\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [-15035.0, -15030.0, -15025.0, -15000.0, -14985.0, -14945.0, -14745.0, -70.0, -55.0, -55.0, -50.0, -50.0, -50.0, -45.0, -45.0, -45.0, -45.0, -45.0, -45.0, -45.0, -45.0, -45.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 45.0, 45.0, 45.0, 45.0, 45.0, 45.0, 45.0, 45.0, 45.0, 45.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 55.0, 55.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 70.0, 70.0, 70.0, 70.0, 70.0, 70.0, 70.0, 75.0, 75.0, 80.0, 80.0, 80.0, 85.0, 85.0, 85.0, 90.0, 90.0, 90.0, 90.0, 95.0, 95.0, 100.0, 100.0, 105.0, 110.0, 115.0, 120.0, 125.0, 125.0, 130.0, 140.0, 140.0, 145.0, 150.0, 155.0, 160.0, 160.0, 165.0, 170.0, 175.0, 180.0, 190.0, 200.0, 210.0, 215.0, 220.0, 220.0, 230.0, 235.0, 245.0, 250.0, 260.0, 275.0, 290.0, 305.0, 325.0, 350.0, 370.0, 390.0, 425.0, 460.0, 500.0, 560.0, 650.0, 790.0, 1025.0, 1600.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 102 + local_name: "variables" + } + children { + node_id: 103 + local_name: "regularization_losses" + } + children { + node_id: 104 + local_name: "trainable_variables" + } + children { + node_id: 105 + local_name: "keras_api" + } + children { + node_id: 240 + local_name: "__call__" + } + children { + node_id: 241 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_8\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_8\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [18.0, 29.0, 39.0, 48.0, 57.0, 64.0, 70.0, 76.0, 82.0, 87.0, 92.0, 97.0, 101.0, 105.0, 109.0, 113.0, 116.0, 120.0, 123.0, 127.0, 130.0, 134.0, 137.0, 140.0, 143.0, 146.0, 149.0, 152.0, 156.0, 159.0, 162.0, 165.0, 168.0, 171.0, 174.0, 177.0, 180.0, 183.0, 186.0, 188.0, 191.0, 194.0, 197.0, 200.0, 203.0, 205.0, 208.0, 211.0, 214.0, 217.0, 219.0, 222.0, 225.0, 228.0, 231.0, 233.0, 236.0, 239.0, 242.0, 244.0, 247.0, 250.0, 253.0, 255.0, 258.0, 261.0, 264.0, 266.0, 269.0, 272.0, 275.0, 278.0, 280.0, 283.0, 286.0, 289.0, 292.0, 294.0, 297.0, 300.0, 303.0, 305.0, 308.0, 311.0, 314.0, 317.0, 319.0, 322.0, 325.0, 327.0, 330.0, 333.0, 336.0, 339.0, 341.0, 344.0, 347.0, 350.0, 353.0, 355.0, 358.0, 361.0, 364.0, 367.0, 370.0, 373.0, 375.0, 378.0, 381.0, 384.0, 387.0, 390.0, 393.0, 396.0, 399.0, 401.0, 404.0, 407.0, 410.0, 413.0, 416.0, 419.0, 422.0, 425.0, 428.0, 431.0, 434.0, 437.0, 440.0, 443.0, 446.0, 449.0, 452.0, 455.0, 458.0, 461.0, 464.0, 467.0, 470.0, 473.0, 476.0, 479.0, 483.0, 486.0, 489.0, 492.0, 495.0, 498.0, 501.0, 504.0, 507.0, 511.0, 514.0, 517.0, 520.0, 523.0, 526.0, 530.0, 533.0, 536.0, 539.0, 542.0, 545.0, 549.0, 552.0, 555.0, 558.0, 562.0, 565.0, 569.0, 572.0, 575.0, 579.0, 582.0, 585.0, 589.0, 592.0, 595.0, 599.0, 602.0, 605.0, 609.0, 612.0, 616.0, 620.0, 623.0, 626.0, 630.0, 634.0, 637.0, 641.0, 644.0, 648.0, 651.0, 655.0, 658.0, 662.0, 665.0, 669.0, 672.0, 676.0, 680.0, 683.0, 687.0, 691.0, 694.0, 698.0, 702.0, 705.0, 709.0, 712.0, 716.0, 720.0, 724.0, 727.0, 731.0, 735.0, 739.0, 742.0, 746.0, 750.0, 754.0, 758.0, 761.0, 765.0, 769.0, 773.0, 777.0, 780.0, 784.0, 788.0, 792.0, 796.0, 800.0, 804.0, 808.0, 812.0, 816.0, 820.0, 823.0, 828.0, 832.0, 836.0, 840.0, 844.0, 848.0, 852.0, 856.0, 860.0, 864.0, 868.0, 873.0, 877.0, 881.0, 885.0, 889.0, 893.0, 897.0, 902.0, 906.0, 910.0, 914.0, 919.0, 923.0, 927.0, 931.0, 935.0, 940.0, 944.0, 948.0, 953.0, 957.0, 962.0, 966.0, 970.0, 975.0, 979.0, 984.0, 988.0, 993.0, 997.0, 1002.0, 1006.0, 1011.0, 1015.0, 1020.0, 1024.0, 1029.0, 1034.0, 1038.0, 1043.0, 1047.0, 1052.0, 1057.0, 1062.0, 1066.0, 1071.0, 1076.0, 1081.0, 1086.0, 1090.0, 1095.0, 1100.0, 1105.0, 1110.0, 1114.0, 1119.0, 1124.0, 1129.0, 1134.0, 1139.0, 1144.0, 1149.0, 1154.0, 1159.0, 1164.0, 1169.0, 1174.0, 1179.0, 1184.0, 1189.0, 1194.0, 1199.0, 1204.0, 1209.0, 1215.0, 1220.0, 1225.0, 1230.0, 1235.0, 1241.0, 1246.0, 1251.0, 1257.0, 1262.0, 1267.0, 1273.0, 1278.0, 1284.0, 1289.0, 1294.0, 1300.0, 1305.0, 1311.0, 1316.0, 1322.0, 1327.0, 1333.0, 1338.0, 1344.0, 1350.0, 1355.0, 1361.0, 1367.0, 1372.0, 1378.0, 1383.0, 1389.0, 1395.0, 1401.0, 1407.0, 1413.0, 1418.0, 1424.0, 1430.0, 1436.0, 1442.0, 1448.0, 1454.0, 1459.0, 1465.0, 1472.0, 1477.0, 1483.0, 1489.0, 1495.0, 1501.0, 1507.0, 1514.0, 1520.0, 1526.0, 1532.0, 1538.0, 1545.0, 1551.0, 1557.0, 1564.0, 1570.0, 1576.0, 1583.0, 1589.0, 1596.0, 1602.0, 1608.0, 1615.0, 1621.0, 1628.0, 1634.0, 1641.0, 1647.0, 1654.0, 1661.0, 1667.0, 1674.0, 1681.0, 1687.0, 1694.0, 1701.0, 1708.0, 1715.0, 1722.0, 1729.0, 1735.0, 1742.0, 1749.0, 1756.0, 1763.0, 1770.0, 1777.0, 1784.0, 1791.0, 1798.0, 1806.0, 1812.0, 1820.0, 1827.0, 1835.0, 1841.0, 1849.0, 1856.0, 1863.0, 1871.0, 1878.0, 1885.0, 1893.0, 1901.0, 1908.0, 1915.0, 1923.0, 1930.0, 1938.0, 1946.0, 1953.0, 1961.0, 1969.0, 1976.0, 1984.0, 1992.0, 2000.0, 2007.0, 2015.0, 2023.0, 2031.0, 2039.0, 2047.0, 2055.0, 2063.0, 2071.0, 2079.0, 2087.0, 2095.0, 2104.0, 2112.0, 2120.0, 2128.0, 2137.0, 2146.0, 2154.0, 2162.0, 2171.0, 2179.0, 2188.0, 2197.0, 2205.0, 2214.0, 2223.0, 2232.0, 2241.0, 2250.0, 2258.0, 2268.0, 2277.0, 2285.0, 2294.0, 2304.0, 2313.0, 2322.0, 2331.0, 2340.0, 2350.0, 2359.0, 2368.0, 2378.0, 2388.0, 2397.0, 2407.0, 2416.0, 2426.0, 2436.0, 2446.0, 2455.0, 2465.0, 2475.0, 2485.0, 2495.0, 2505.0, 2515.0, 2525.0, 2535.0, 2545.0, 2556.0, 2566.0, 2577.0, 2587.0, 2598.0, 2609.0, 2620.0, 2631.0, 2641.0, 2652.0, 2663.0, 2674.0, 2685.0, 2696.0, 2708.0, 2719.0, 2730.0, 2742.0, 2753.0, 2764.0, 2776.0, 2788.0, 2799.0, 2811.0, 2823.0, 2835.0, 2847.0, 2858.0, 2870.0, 2882.0, 2894.0, 2906.0, 2919.0, 2931.0, 2943.0, 2956.0, 2968.0, 2981.0, 2994.0, 3006.0, 3019.0, 3032.0, 3045.0, 3058.0, 3070.0, 3083.0, 3096.0, 3109.0, 3121.0, 3134.0, 3148.0, 3161.0, 3174.0, 3187.0, 3200.0, 3214.0, 3228.0, 3242.0, 3255.0, 3268.0, 3283.0, 3297.0, 3310.0, 3325.0, 3340.0, 3353.0, 3368.0, 3383.0, 3398.0, 3412.0, 3427.0, 3442.0, 3457.0, 3471.0, 3487.0, 3502.0, 3516.0, 3531.0, 3546.0, 3561.0, 3577.0, 3593.0, 3608.0, 3625.0, 3641.0, 3657.0, 3673.0, 3690.0, 3706.0, 3722.0, 3738.0, 3755.0, 3772.0, 3789.0, 3805.0, 3823.0, 3839.0, 3856.0, 3873.0, 3891.0, 3908.0, 3926.0, 3944.0, 3960.0, 3977.0, 3995.0, 4013.0, 4031.0, 4048.0, 4067.0, 4085.0, 4104.0, 4122.0, 4140.0, 4159.0, 4177.0, 4196.0, 4215.0, 4234.0, 4253.0, 4272.0, 4291.0, 4311.0, 4332.0, 4351.0, 4371.0, 4391.0, 4412.0, 4433.0, 4454.0, 4474.0, 4496.0, 4518.0, 4538.0, 4558.0, 4579.0, 4601.0, 4619.0, 4640.0, 4662.0, 4684.0, 4706.0, 4728.0, 4751.0, 4771.0, 4794.0, 4818.0, 4840.0, 4863.0, 4887.0, 4910.0, 4933.0, 4956.0, 4980.0, 5004.0, 5028.0, 5052.0, 5076.0, 5100.0, 5125.0, 5152.0, 5175.0, 5200.0, 5226.0, 5251.0, 5278.0, 5304.0, 5329.0, 5354.0, 5381.0, 5407.0, 5433.0, 5460.0, 5488.0, 5516.0, 5544.0, 5573.0, 5600.0, 5628.0, 5656.0, 5684.0, 5713.0, 5741.0, 5771.0, 5799.0, 5830.0, 5860.0, 5891.0, 5921.0, 5951.0, 5980.0, 6010.0, 6041.0, 6073.0, 6105.0, 6133.0, 6163.0, 6195.0, 6227.0, 6258.0, 6291.0, 6322.0, 6356.0, 6390.0, 6424.0, 6457.0, 6491.0, 6527.0, 6561.0, 6596.0, 6631.0, 6665.0, 6701.0, 6736.0, 6771.0, 6805.0, 6840.0, 6877.0, 6911.0, 6947.0, 6985.0, 7022.0, 7059.0, 7097.0, 7135.0, 7174.0, 7212.0, 7251.0, 7289.0, 7327.0, 7366.0, 7406.0, 7447.0, 7486.0, 7525.0, 7566.0, 7606.0, 7646.0, 7688.0, 7728.0, 7771.0, 7814.0, 7859.0, 7901.0, 7949.0, 7992.0, 8036.0, 8082.0, 8127.0, 8173.0, 8218.0, 8262.0, 8309.0, 8353.0, 8397.0, 8444.0, 8489.0, 8539.0, 8585.0, 8632.0, 8682.0, 8727.0, 8777.0, 8828.0, 8879.0, 8929.0, 8982.0, 9037.0, 9087.0, 9140.0, 9193.0, 9250.0, 9305.0, 9361.0, 9418.0, 9475.0, 9532.0, 9589.0, 9644.0, 9699.0, 9758.0, 9818.0, 9875.0, 9935.0, 9997.0, 10057.0, 10117.0, 10174.0, 10232.0, 10296.0, 10356.0, 10419.0, 10482.0, 10546.0, 10608.0, 10670.0, 10729.0, 10790.0, 10855.0, 10920.0, 10990.0, 11054.0, 11118.0, 11181.0, 11248.0, 11316.0, 11385.0, 11454.0, 11526.0, 11597.0, 11667.0, 11740.0, 11820.0, 11897.0, 11973.0, 12046.0, 12126.0, 12204.0, 12287.0, 12370.0, 12456.0, 12538.0, 12627.0, 12714.0, 12799.0, 12883.0, 12971.0, 13062.0, 13154.0, 13233.0, 13328.0, 13418.0, 13511.0, 13607.0, 13709.0, 13806.0, 13903.0, 14002.0, 14104.0, 14200.0, 14288.0, 14391.0, 14488.0, 14590.0, 14698.0, 14808.0, 14910.0, 15020.0, 15126.0, 15238.0, 15347.0, 15456.0, 15574.0, 15692.0, 15786.0, 15896.0, 16016.0, 16136.0, 16250.0, 16352.0, 16474.0, 16575.0, 16702.0, 16835.0, 16965.0, 17096.0, 17232.0, 17370.0, 17443.0, 17581.0, 17719.0, 17864.0, 17976.0, 18116.0, 18250.0, 18396.0, 18540.0, 18690.0, 18840.0, 18989.0, 19136.0, 19294.0, 19445.0, 19589.0, 19750.0, 19905.0, 20064.0, 20191.0, 20325.0, 20497.0, 20662.0, 20833.0, 20981.0, 21152.0, 21334.0, 21510.0, 21642.0, 21821.0, 22001.0, 22186.0, 22379.0, 22568.0, 22770.0, 22958.0, 23162.0, 23360.0, 23524.0, 23737.0, 23960.0, 24175.0, 24395.0, 24631.0, 24865.0, 25091.0, 25327.0, 25580.0, 25833.0, 26089.0, 26361.0, 26636.0, 26889.0, 27155.0, 27436.0, 27715.0, 28003.0, 28303.0, 28600.0, 28916.0, 29223.0, 29553.0, 29884.0, 30200.0, 30538.0, 30868.0, 31211.0, 31548.0, 31881.0, 32253.0, 32605.0, 32980.0, 33385.0, 33805.0, 34254.0, 34723.0, 35167.0, 35666.0, 36125.0, 36652.0, 37177.0, 37739.0, 38321.0, 38932.0, 39640.0, 40337.0, 41000.0, 41626.0, 42385.0, 43122.0, 43890.0, 44687.0, 45609.0, 46520.0, 47489.0, 48432.0, 49458.0, 50511.0, 51561.0, 52568.0, 53676.0, 54936.0, 56071.0, 57302.0, 58513.0, 59800.0, 61192.0, 62702.0, 64205.0, 65868.0, 67780.0, 69960.0, 72330.0, 74918.0, 77540.0, 80344.0, 83727.0, 87662.0, 93589.0, 101441.0, 110544.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 106 + local_name: "variables" + } + children { + node_id: 107 + local_name: "regularization_losses" + } + children { + node_id: 108 + local_name: "trainable_variables" + } + children { + node_id: 109 + local_name: "keras_api" + } + children { + node_id: 242 + local_name: "__call__" + } + children { + node_id: 243 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_9\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_9\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAgAAAAQAAAATAAAAcxgAAACIAHwAgwF9AXQAagF8AXQAagJkAY0CUwApAk4pAdoF\\nZHR5cGUpA9oCdGbaCnplcm9zX2xpa2XaB2Zsb2F0MzIpAtoDb2Jz2gxleHBhbmRlZF9vYnMpAdoO\\nZXhwYW5kX2RpbXNfb3CpAPr0L2V4cG9ydC9oZGEzL2JvcmdsZXQvbG9jYWxfcmFtX2ZzX2RpcnMv\\nMC55dW5kaV9tdXBwZXRfMF8xMjI3MDgzMy4xMy55dW5kaS4xOTQ3MzE0MTc5NjEuOGY0ZjlmOThj\\nYjdhMzA1NS9idWlsZF90YXJnZXRfdHJhaW5fcGFyX2Q5NzU3NTM3MDE2YTJlYjgvdHJhaW4ucGFy\\nL2dvb2dsZTMvbGVhcm5pbmcvc21hcnRjaG9pY2VzL3Jlc2VhcmNoL2NsaWVudHMvY29tcGlsZXJf\\nb3B0L3BvbGljeV90cmFpbmluZy9mZWF0dXJlX29wcy5wedoPZGlzY2FyZF9mZWF0dXJlJwAAAHME\\nAAAAAAEIAQ==\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 110 + local_name: "variables" + } + children { + node_id: 111 + local_name: "regularization_losses" + } + children { + node_id: 112 + local_name: "trainable_variables" + } + children { + node_id: 113 + local_name: "keras_api" + } + children { + node_id: 244 + local_name: "__call__" + } + children { + node_id: 245 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_10\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_10\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [13.0, 38.0, 56.0, 70.0, 82.0, 94.0, 104.0, 114.0, 123.0, 131.0, 139.0, 148.0, 152.0, 153.0, 158.0, 163.0, 170.0, 174.0, 178.0, 180.0, 183.0, 186.0, 188.0, 190.0, 192.0, 196.0, 198.0, 201.0, 205.0, 208.0, 212.0, 215.0, 219.0, 221.0, 225.0, 227.0, 229.0, 232.0, 233.0, 236.0, 239.0, 242.0, 245.0, 248.0, 250.0, 252.0, 254.0, 256.0, 259.0, 261.0, 264.0, 267.0, 270.0, 272.0, 275.0, 278.0, 280.0, 283.0, 285.0, 287.0, 290.0, 293.0, 295.0, 297.0, 300.0, 303.0, 305.0, 308.0, 311.0, 313.0, 316.0, 319.0, 322.0, 325.0, 329.0, 331.0, 333.0, 336.0, 338.0, 340.0, 343.0, 345.0, 347.0, 347.0, 349.0, 351.0, 353.0, 355.0, 357.0, 359.0, 361.0, 363.0, 365.0, 368.0, 369.0, 371.0, 373.0, 375.0, 377.0, 380.0, 382.0, 385.0, 387.0, 389.0, 391.0, 394.0, 396.0, 398.0, 400.0, 403.0, 405.0, 408.0, 410.0, 412.0, 415.0, 417.0, 420.0, 422.0, 425.0, 427.0, 429.0, 432.0, 434.0, 437.0, 439.0, 442.0, 444.0, 446.0, 449.0, 451.0, 454.0, 456.0, 458.0, 461.0, 463.0, 466.0, 469.0, 472.0, 474.0, 476.0, 479.0, 482.0, 483.0, 486.0, 489.0, 492.0, 495.0, 498.0, 500.0, 503.0, 505.0, 508.0, 510.0, 513.0, 516.0, 519.0, 522.0, 524.0, 528.0, 530.0, 533.0, 536.0, 539.0, 541.0, 544.0, 547.0, 550.0, 553.0, 556.0, 559.0, 561.0, 563.0, 567.0, 570.0, 572.0, 575.0, 577.0, 580.0, 584.0, 586.0, 589.0, 592.0, 595.0, 598.0, 601.0, 605.0, 607.0, 611.0, 613.0, 617.0, 620.0, 623.0, 626.0, 629.0, 632.0, 635.0, 639.0, 642.0, 645.0, 648.0, 651.0, 654.0, 657.0, 660.0, 662.0, 666.0, 669.0, 672.0, 676.0, 679.0, 682.0, 685.0, 688.0, 690.0, 693.0, 696.0, 699.0, 702.0, 705.0, 709.0, 712.0, 714.0, 718.0, 721.0, 724.0, 726.0, 728.0, 729.0, 731.0, 734.0, 737.0, 741.0, 745.0, 748.0, 750.0, 753.0, 756.0, 760.0, 763.0, 766.0, 770.0, 773.0, 776.0, 779.0, 782.0, 786.0, 788.0, 793.0, 796.0, 798.0, 802.0, 805.0, 808.0, 811.0, 815.0, 818.0, 820.0, 824.0, 827.0, 829.0, 832.0, 835.0, 838.0, 842.0, 846.0, 849.0, 854.0, 857.0, 860.0, 864.0, 867.0, 871.0, 875.0, 879.0, 882.0, 887.0, 890.0, 893.0, 897.0, 901.0, 905.0, 908.0, 911.0, 915.0, 918.0, 921.0, 925.0, 929.0, 932.0, 934.0, 937.0, 940.0, 943.0, 946.0, 950.0, 953.0, 956.0, 961.0, 965.0, 969.0, 973.0, 976.0, 980.0, 982.0, 985.0, 990.0, 994.0, 997.0, 1001.0, 1005.0, 1007.0, 1010.0, 1014.0, 1018.0, 1022.0, 1025.0, 1028.0, 1033.0, 1035.0, 1038.0, 1042.0, 1047.0, 1052.0, 1056.0, 1060.0, 1063.0, 1067.0, 1071.0, 1075.0, 1079.0, 1083.0, 1086.0, 1088.0, 1092.0, 1097.0, 1102.0, 1106.0, 1109.0, 1113.0, 1117.0, 1120.0, 1125.0, 1129.0, 1134.0, 1137.0, 1142.0, 1146.0, 1150.0, 1151.0, 1155.0, 1159.0, 1162.0, 1166.0, 1170.0, 1174.0, 1177.0, 1181.0, 1185.0, 1188.0, 1193.0, 1196.0, 1203.0, 1207.0, 1212.0, 1214.0, 1217.0, 1220.0, 1222.0, 1222.0, 1226.0, 1229.0, 1233.0, 1237.0, 1241.0, 1246.0, 1250.0, 1253.0, 1257.0, 1262.0, 1267.0, 1272.0, 1278.0, 1283.0, 1287.0, 1293.0, 1297.0, 1301.0, 1304.0, 1309.0, 1315.0, 1320.0, 1325.0, 1329.0, 1333.0, 1336.0, 1341.0, 1344.0, 1348.0, 1351.0, 1357.0, 1363.0, 1368.0, 1374.0, 1379.0, 1383.0, 1386.0, 1391.0, 1395.0, 1399.0, 1403.0, 1407.0, 1410.0, 1415.0, 1418.0, 1423.0, 1428.0, 1432.0, 1436.0, 1438.0, 1442.0, 1446.0, 1450.0, 1454.0, 1462.0, 1467.0, 1472.0, 1477.0, 1483.0, 1488.0, 1492.0, 1496.0, 1503.0, 1508.0, 1513.0, 1518.0, 1520.0, 1526.0, 1531.0, 1534.0, 1538.0, 1542.0, 1546.0, 1552.0, 1558.0, 1564.0, 1568.0, 1573.0, 1578.0, 1581.0, 1590.0, 1596.0, 1601.0, 1606.0, 1611.0, 1616.0, 1622.0, 1629.0, 1634.0, 1640.0, 1647.0, 1651.0, 1657.0, 1660.0, 1665.0, 1672.0, 1678.0, 1686.0, 1692.0, 1698.0, 1704.0, 1709.0, 1714.0, 1719.0, 1724.0, 1730.0, 1737.0, 1744.0, 1751.0, 1755.0, 1761.0, 1764.0, 1772.0, 1778.0, 1784.0, 1789.0, 1799.0, 1804.0, 1811.0, 1819.0, 1825.0, 1830.0, 1838.0, 1849.0, 1858.0, 1862.0, 1868.0, 1872.0, 1878.0, 1885.0, 1888.0, 1892.0, 1897.0, 1902.0, 1907.0, 1919.0, 1926.0, 1932.0, 1936.0, 1941.0, 1946.0, 1952.0, 1960.0, 1968.0, 1977.0, 1985.0, 1992.0, 1997.0, 2006.0, 2012.0, 2018.0, 2026.0, 2034.0, 2044.0, 2050.0, 2057.0, 2064.0, 2069.0, 2075.0, 2082.0, 2091.0, 2098.0, 2107.0, 2122.0, 2126.0, 2135.0, 2146.0, 2149.0, 2157.0, 2163.0, 2172.0, 2178.0, 2184.0, 2191.0, 2198.0, 2208.0, 2216.0, 2223.0, 2235.0, 2242.0, 2252.0, 2263.0, 2272.0, 2277.0, 2288.0, 2296.0, 2306.0, 2311.0, 2318.0, 2323.0, 2334.0, 2341.0, 2356.0, 2366.0, 2373.0, 2379.0, 2386.0, 2407.0, 2416.0, 2423.0, 2432.0, 2438.0, 2448.0, 2453.0, 2464.0, 2473.0, 2473.0, 2481.0, 2492.0, 2504.0, 2511.0, 2523.0, 2529.0, 2537.0, 2545.0, 2556.0, 2566.0, 2575.0, 2584.0, 2592.0, 2602.0, 2613.0, 2624.0, 2636.0, 2643.0, 2647.0, 2652.0, 2664.0, 2675.0, 2688.0, 2693.0, 2702.0, 2709.0, 2722.0, 2739.0, 2754.0, 2766.0, 2776.0, 2786.0, 2799.0, 2810.0, 2832.0, 2840.0, 2849.0, 2860.0, 2873.0, 2889.0, 2908.0, 2914.0, 2926.0, 2939.0, 2950.0, 2961.0, 2969.0, 2978.0, 2990.0, 2999.0, 3023.0, 3032.0, 3049.0, 3066.0, 3085.0, 3101.0, 3107.0, 3117.0, 3129.0, 3144.0, 3167.0, 3190.0, 3212.0, 3229.0, 3238.0, 3264.0, 3293.0, 3302.0, 3309.0, 3314.0, 3323.0, 3344.0, 3352.0, 3362.0, 3390.0, 3400.0, 3411.0, 3435.0, 3456.0, 3470.0, 3485.0, 3498.0, 3505.0, 3519.0, 3539.0, 3545.0, 3545.0, 3560.0, 3576.0, 3597.0, 3607.0, 3621.0, 3641.0, 3665.0, 3679.0, 3701.0, 3714.0, 3733.0, 3741.0, 3745.0, 3757.0, 3773.0, 3787.0, 3795.0, 3805.0, 3822.0, 3835.0, 3844.0, 3861.0, 3872.0, 3878.0, 3897.0, 3919.0, 3941.0, 3971.0, 4004.0, 4014.0, 4019.0, 4061.0, 4068.0, 4089.0, 4108.0, 4117.0, 4125.0, 4146.0, 4165.0, 4194.0, 4204.0, 4224.0, 4236.0, 4263.0, 4290.0, 4301.0, 4319.0, 4326.0, 4347.0, 4369.0, 4386.0, 4413.0, 4435.0, 4451.0, 4451.0, 4451.0, 4476.0, 4500.0, 4539.0, 4579.0, 4592.0, 4600.0, 4622.0, 4650.0, 4683.0, 4714.0, 4742.0, 4755.0, 4771.0, 4788.0, 4816.0, 4828.0, 4831.0, 4831.0, 4831.0, 4843.0, 4852.0, 4865.0, 4896.0, 4915.0, 4931.0, 4952.0, 4965.0, 4983.0, 5007.0, 5043.0, 5061.0, 5081.0, 5095.0, 5122.0, 5143.0, 5171.0, 5204.0, 5226.0, 5233.0, 5250.0, 5281.0, 5320.0, 5323.0, 5328.0, 5345.0, 5374.0, 5413.0, 5466.0, 5492.0, 5524.0, 5555.0, 5567.0, 5610.0, 5676.0, 5701.0, 5716.0, 5744.0, 5768.0, 5795.0, 5818.0, 5854.0, 5906.0, 5934.0, 5960.0, 5975.0, 5993.0, 6025.0, 6034.0, 6051.0, 6082.0, 6106.0, 6125.0, 6159.0, 6187.0, 6242.0, 6287.0, 6311.0, 6332.0, 6348.0, 6358.0, 6368.0, 6377.0, 6402.0, 6407.0, 6428.0, 6450.0, 6475.0, 6498.0, 6505.0, 6533.0, 6565.0, 6580.0, 6595.0, 6611.0, 6654.0, 6658.0, 6705.0, 6751.0, 6786.0, 6828.0, 6876.0, 6896.0, 6948.0, 6964.0, 7065.0, 7082.0, 7118.0, 7184.0, 7214.0, 7271.0, 7310.0, 7357.0, 7405.0, 7506.0, 7613.0, 7641.0, 7675.0, 7720.0, 7781.0, 7833.0, 7860.0, 7898.0, 7929.0, 8044.0, 8104.0, 8148.0, 8236.0, 8273.0, 8313.0, 8349.0, 8381.0, 8409.0, 8498.0, 8507.0, 8524.0, 8570.0, 8607.0, 8630.0, 8637.0, 8675.0, 8700.0, 8714.0, 8734.0, 8776.0, 8836.0, 8854.0, 8867.0, 8868.0, 9065.0, 9113.0, 9121.0, 9241.0, 9357.0, 9360.0, 9585.0, 9613.0, 9684.0, 9727.0, 9751.0, 9777.0, 9802.0, 9889.0, 9903.0, 9914.0, 9978.0, 10061.0, 10192.0, 10213.0, 10345.0, 10369.0, 10404.0, 10430.0, 10471.0, 10481.0, 10489.0, 10492.0, 10494.0, 10524.0, 10554.0, 10557.0, 10560.0, 10562.0, 10641.0, 10716.0, 10842.0, 10897.0, 10967.0, 11053.0, 11128.0, 11137.0, 11328.0, 11336.0, 11401.0, 11532.0, 11573.0, 11860.0, 11880.0, 12013.0, 12305.0, 12358.0, 12386.0, 12404.0, 12456.0, 12456.0, 12476.0, 12615.0, 12677.0, 12981.0, 13094.0, 13197.0, 13708.0, 13717.0, 13788.0, 14049.0, 14112.0, 14224.0, 14257.0, 14681.0, 14901.0, 15006.0, 15071.0, 15100.0, 15248.0, 15669.0, 15877.0, 15953.0, 15953.0, 16066.0, 16072.0, 16271.0, 16292.0, 16386.0, 16490.0, 16633.0, 16670.0, 16834.0, 16896.0, 17543.0, 17693.0, 17800.0, 17859.0, 18397.0, 18811.0, 18826.0, 18971.0, 19304.0, 19319.0, 19695.0, 20378.0, 20865.0, 21313.0, 21330.0, 22321.0, 22760.0, 22770.0, 23783.0, 23785.0, 24525.0, 24844.0, 24848.0, 24964.0, 24966.0, 27468.0, 27478.0, 27555.0, 27555.0, 28215.0, 28219.0, 28336.0, 28490.0, 30213.0, 30228.0, 30242.0, 34116.0, 43518.0, 43518.0, 43518.0, 43852.0, 43852.0, 43852.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + children { + node_id: 114 + local_name: "variables" + } + children { + node_id: 115 + local_name: "regularization_losses" + } + children { + node_id: 116 + local_name: "trainable_variables" + } + children { + node_id: 117 + local_name: "keras_api" + } + children { + node_id: 246 + local_name: "__call__" + } + children { + node_id: 247 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_11\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_11\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 4.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}" + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 118 + local_name: "layer_metrics" + } + children { + node_id: 53 + local_name: "variables" + } + children { + node_id: 119 + local_name: "layer_regularization_losses" + } + children { + node_id: 120 + local_name: "metrics" + } + children { + node_id: 121 + local_name: "layers" + } + children { + node_id: 54 + local_name: "regularization_losses" + } + children { + node_id: 122 + local_name: "non_trainable_variables" + } + children { + node_id: 55 + local_name: "trainable_variables" + } + children { + node_id: 222 + local_name: "__call__" + } + children { + node_id: 223 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 223 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 123 + local_name: "variables" + } + children { + node_id: 124 + local_name: "regularization_losses" + } + children { + node_id: 125 + local_name: "trainable_variables" + } + children { + node_id: 126 + local_name: "keras_api" + } + children { + node_id: 248 + local_name: "__call__" + } + children { + node_id: 249 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Flatten\", \"name\": \"flatten\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"flatten\", \"trainable\": true, \"dtype\": \"float32\", \"data_format\": \"channels_last\"}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 1, \"axes\": {}}}}" + } + } + nodes { + children { + node_id: 10 + local_name: "kernel" + } + children { + node_id: 11 + local_name: "bias" + } + children { + node_id: 127 + local_name: "variables" + } + children { + node_id: 128 + local_name: "regularization_losses" + } + children { + node_id: 129 + local_name: "trainable_variables" + } + children { + node_id: 130 + local_name: "keras_api" + } + children { + node_id: 250 + local_name: "__call__" + } + children { + node_id: 251 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Dense\", \"name\": \"dense\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"dense\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 100, \"activation\": \"relu\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"VarianceScaling\", \"config\": {\"scale\": 2.0, \"mode\": \"fan_in\", \"distribution\": \"truncated_normal\", \"seed\": null, \"dtype\": \"float32\"}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 34}}}, \"build_input_shape\": {\"class_name\": \"TensorShape\", \"items\": [0, 34]}}" + } + } + nodes { + children { + node_id: 12 + local_name: "kernel" + } + children { + node_id: 13 + local_name: "bias" + } + children { + node_id: 131 + local_name: "variables" + } + children { + node_id: 132 + local_name: "regularization_losses" + } + children { + node_id: 133 + local_name: "trainable_variables" + } + children { + node_id: 134 + local_name: "keras_api" + } + children { + node_id: 252 + local_name: "__call__" + } + children { + node_id: 253 + local_name: "call_and_return_all_conditional_losses" + } + user_object { + identifier: "_tf_keras_layer" + version { + producer: 1 + min_consumer: 1 + } + metadata: "{\"class_name\": \"Dense\", \"name\": \"dense_1\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"dense_1\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 40, \"activation\": \"relu\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"VarianceScaling\", \"config\": {\"scale\": 2.0, \"mode\": \"fan_in\", \"distribution\": \"truncated_normal\", \"seed\": null, \"dtype\": \"float32\"}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 100}}}, \"build_input_shape\": {\"class_name\": \"TensorShape\", \"items\": [0, 100]}}" + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 41 + local_name: "0" + } + children { + node_id: 42 + local_name: "1" + } + children { + node_id: 43 + local_name: "2" + } + children { + node_id: 44 + local_name: "3" + } + children { + node_id: 45 + local_name: "4" + } + children { + node_id: 46 + local_name: "5" + } + children { + node_id: 47 + local_name: "6" + } + children { + node_id: 48 + local_name: "7" + } + children { + node_id: 49 + local_name: "8" + } + children { + node_id: 50 + local_name: "9" + } + children { + node_id: 51 + local_name: "10" + } + children { + node_id: 52 + local_name: "11" + } + children { + node_id: 26 + local_name: "12" + } + children { + node_id: 57 + local_name: "13" + } + children { + node_id: 58 + local_name: "14" + } + children { + node_id: 59 + local_name: "15" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 135 + local_name: "layer_metrics" + } + children { + node_id: 70 + local_name: "variables" + } + children { + node_id: 136 + local_name: "layer_regularization_losses" + } + children { + node_id: 137 + local_name: "metrics" + } + children { + node_id: 138 + local_name: "layers" + } + children { + node_id: 71 + local_name: "regularization_losses" + } + children { + node_id: 139 + local_name: "non_trainable_variables" + } + children { + node_id: 72 + local_name: "trainable_variables" + } + children { + node_id: 224 + local_name: "__call__" + } + children { + node_id: 225 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 225 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 140 + local_name: "layer_metrics" + } + children { + node_id: 74 + local_name: "variables" + } + children { + node_id: 141 + local_name: "layer_regularization_losses" + } + children { + node_id: 142 + local_name: "metrics" + } + children { + node_id: 143 + local_name: "layers" + } + children { + node_id: 75 + local_name: "regularization_losses" + } + children { + node_id: 144 + local_name: "non_trainable_variables" + } + children { + node_id: 76 + local_name: "trainable_variables" + } + children { + node_id: 226 + local_name: "__call__" + } + children { + node_id: 227 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 227 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 145 + local_name: "layer_metrics" + } + children { + node_id: 78 + local_name: "variables" + } + children { + node_id: 146 + local_name: "layer_regularization_losses" + } + children { + node_id: 147 + local_name: "metrics" + } + children { + node_id: 148 + local_name: "layers" + } + children { + node_id: 79 + local_name: "regularization_losses" + } + children { + node_id: 149 + local_name: "non_trainable_variables" + } + children { + node_id: 80 + local_name: "trainable_variables" + } + children { + node_id: 228 + local_name: "__call__" + } + children { + node_id: 229 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 229 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 150 + local_name: "layer_metrics" + } + children { + node_id: 82 + local_name: "variables" + } + children { + node_id: 151 + local_name: "layer_regularization_losses" + } + children { + node_id: 152 + local_name: "metrics" + } + children { + node_id: 153 + local_name: "layers" + } + children { + node_id: 83 + local_name: "regularization_losses" + } + children { + node_id: 154 + local_name: "non_trainable_variables" + } + children { + node_id: 84 + local_name: "trainable_variables" + } + children { + node_id: 230 + local_name: "__call__" + } + children { + node_id: 231 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 231 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 155 + local_name: "layer_metrics" + } + children { + node_id: 86 + local_name: "variables" + } + children { + node_id: 156 + local_name: "layer_regularization_losses" + } + children { + node_id: 157 + local_name: "metrics" + } + children { + node_id: 158 + local_name: "layers" + } + children { + node_id: 87 + local_name: "regularization_losses" + } + children { + node_id: 159 + local_name: "non_trainable_variables" + } + children { + node_id: 88 + local_name: "trainable_variables" + } + children { + node_id: 232 + local_name: "__call__" + } + children { + node_id: 233 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 233 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 160 + local_name: "layer_metrics" + } + children { + node_id: 90 + local_name: "variables" + } + children { + node_id: 161 + local_name: "layer_regularization_losses" + } + children { + node_id: 162 + local_name: "metrics" + } + children { + node_id: 163 + local_name: "layers" + } + children { + node_id: 91 + local_name: "regularization_losses" + } + children { + node_id: 164 + local_name: "non_trainable_variables" + } + children { + node_id: 92 + local_name: "trainable_variables" + } + children { + node_id: 234 + local_name: "__call__" + } + children { + node_id: 235 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 235 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 165 + local_name: "layer_metrics" + } + children { + node_id: 94 + local_name: "variables" + } + children { + node_id: 166 + local_name: "layer_regularization_losses" + } + children { + node_id: 167 + local_name: "metrics" + } + children { + node_id: 168 + local_name: "layers" + } + children { + node_id: 95 + local_name: "regularization_losses" + } + children { + node_id: 169 + local_name: "non_trainable_variables" + } + children { + node_id: 96 + local_name: "trainable_variables" + } + children { + node_id: 236 + local_name: "__call__" + } + children { + node_id: 237 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 237 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 170 + local_name: "layer_metrics" + } + children { + node_id: 98 + local_name: "variables" + } + children { + node_id: 171 + local_name: "layer_regularization_losses" + } + children { + node_id: 172 + local_name: "metrics" + } + children { + node_id: 173 + local_name: "layers" + } + children { + node_id: 99 + local_name: "regularization_losses" + } + children { + node_id: 174 + local_name: "non_trainable_variables" + } + children { + node_id: 100 + local_name: "trainable_variables" + } + children { + node_id: 238 + local_name: "__call__" + } + children { + node_id: 239 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 239 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 175 + local_name: "layer_metrics" + } + children { + node_id: 102 + local_name: "variables" + } + children { + node_id: 176 + local_name: "layer_regularization_losses" + } + children { + node_id: 177 + local_name: "metrics" + } + children { + node_id: 178 + local_name: "layers" + } + children { + node_id: 103 + local_name: "regularization_losses" + } + children { + node_id: 179 + local_name: "non_trainable_variables" + } + children { + node_id: 104 + local_name: "trainable_variables" + } + children { + node_id: 240 + local_name: "__call__" + } + children { + node_id: 241 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 241 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 180 + local_name: "layer_metrics" + } + children { + node_id: 106 + local_name: "variables" + } + children { + node_id: 181 + local_name: "layer_regularization_losses" + } + children { + node_id: 182 + local_name: "metrics" + } + children { + node_id: 183 + local_name: "layers" + } + children { + node_id: 107 + local_name: "regularization_losses" + } + children { + node_id: 184 + local_name: "non_trainable_variables" + } + children { + node_id: 108 + local_name: "trainable_variables" + } + children { + node_id: 242 + local_name: "__call__" + } + children { + node_id: 243 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 243 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 185 + local_name: "layer_metrics" + } + children { + node_id: 110 + local_name: "variables" + } + children { + node_id: 186 + local_name: "layer_regularization_losses" + } + children { + node_id: 187 + local_name: "metrics" + } + children { + node_id: 188 + local_name: "layers" + } + children { + node_id: 111 + local_name: "regularization_losses" + } + children { + node_id: 189 + local_name: "non_trainable_variables" + } + children { + node_id: 112 + local_name: "trainable_variables" + } + children { + node_id: 244 + local_name: "__call__" + } + children { + node_id: 245 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 245 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 190 + local_name: "layer_metrics" + } + children { + node_id: 114 + local_name: "variables" + } + children { + node_id: 191 + local_name: "layer_regularization_losses" + } + children { + node_id: 192 + local_name: "metrics" + } + children { + node_id: 193 + local_name: "layers" + } + children { + node_id: 115 + local_name: "regularization_losses" + } + children { + node_id: 194 + local_name: "non_trainable_variables" + } + children { + node_id: 116 + local_name: "trainable_variables" + } + children { + node_id: 246 + local_name: "__call__" + } + children { + node_id: 247 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 247 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 195 + local_name: "layer_metrics" + } + children { + node_id: 123 + local_name: "variables" + } + children { + node_id: 196 + local_name: "layer_regularization_losses" + } + children { + node_id: 197 + local_name: "metrics" + } + children { + node_id: 198 + local_name: "layers" + } + children { + node_id: 124 + local_name: "regularization_losses" + } + children { + node_id: 199 + local_name: "non_trainable_variables" + } + children { + node_id: 125 + local_name: "trainable_variables" + } + children { + node_id: 248 + local_name: "__call__" + } + children { + node_id: 249 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 249 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 10 + local_name: "0" + } + children { + node_id: 11 + local_name: "1" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 10 + local_name: "0" + } + children { + node_id: 11 + local_name: "1" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 200 + local_name: "layer_metrics" + } + children { + node_id: 127 + local_name: "variables" + } + children { + node_id: 201 + local_name: "layer_regularization_losses" + } + children { + node_id: 202 + local_name: "metrics" + } + children { + node_id: 203 + local_name: "layers" + } + children { + node_id: 128 + local_name: "regularization_losses" + } + children { + node_id: 204 + local_name: "non_trainable_variables" + } + children { + node_id: 129 + local_name: "trainable_variables" + } + children { + node_id: 250 + local_name: "__call__" + } + children { + node_id: 251 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 251 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 12 + local_name: "0" + } + children { + node_id: 13 + local_name: "1" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 12 + local_name: "0" + } + children { + node_id: 13 + local_name: "1" + } + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + children { + node_id: 205 + local_name: "layer_metrics" + } + children { + node_id: 131 + local_name: "variables" + } + children { + node_id: 206 + local_name: "layer_regularization_losses" + } + children { + node_id: 207 + local_name: "metrics" + } + children { + node_id: 208 + local_name: "layers" + } + children { + node_id: 132 + local_name: "regularization_losses" + } + children { + node_id: 209 + local_name: "non_trainable_variables" + } + children { + node_id: 133 + local_name: "trainable_variables" + } + children { + node_id: 252 + local_name: "__call__" + } + children { + node_id: 253 + local_name: "call_and_return_all_conditional_losses" + } + children { + node_id: 253 + local_name: "call_and_return_conditional_losses" + } + user_object { + identifier: "_generic_user_object" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_dict_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + user_object { + identifier: "trackable_list_wrapper" + version { + producer: 1 + min_consumer: 1 + } + } + } + nodes { + function { + concrete_functions: "__inference_polymorphic_action_fn_4619080" + concrete_functions: "__inference_polymorphic_action_fn_946" + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "time_step" + } + values { + string_value: "policy_state" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + tuple_value { + values { + tuple_value { + } + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + concrete_functions: "__inference_function_722" + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + input_signature { + none_value { + } + } + } + } + } + nodes { + bare_concrete_function { + concrete_function_name: "__inference__728" + } + } + nodes { + bare_concrete_function { + concrete_function_name: "__inference_signature_wrapper_4619026" + argument_keywords: "callee_basic_block_count" + argument_keywords: "callee_conditionally_executed_blocks" + argument_keywords: "callee_users" + argument_keywords: "caller_basic_block_count" + argument_keywords: "caller_conditionally_executed_blocks" + argument_keywords: "caller_users" + argument_keywords: "callsite_height" + argument_keywords: "cost_estimate" + argument_keywords: "discount" + argument_keywords: "edge_count" + argument_keywords: "inlining_default" + argument_keywords: "node_count" + argument_keywords: "nr_ctant_params" + argument_keywords: "reward" + argument_keywords: "step_type" + } + } + nodes { + bare_concrete_function { + concrete_function_name: "__inference_signature_wrapper_4619033" + } + } + nodes { + bare_concrete_function { + concrete_function_name: "__inference_signature_wrapper_4619048" + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "observation" + } + values { + string_value: "step_type" + } + values { + string_value: "network_state" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + tuple_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "observation" + } + values { + string_value: "step_type" + } + values { + string_value: "network_state" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + tuple_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "observation" + } + values { + string_value: "step_type" + } + values { + string_value: "network_state" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + tuple_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "observation" + } + values { + string_value: "step_type" + } + values { + string_value: "network_state" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + tuple_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + values { + string_value: "mask" + } + values { + string_value: "training" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + list_value { + values { + none_value { + } + } + values { + bool_value: false + } + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + dict_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + nodes { + function { + function_spec { + fullargspec { + named_tuple_value { + name: "FullArgSpec" + values { + key: "args" + value { + list_value { + values { + string_value: "self" + } + values { + string_value: "inputs" + } + } + } + } + values { + key: "varargs" + value { + none_value { + } + } + } + values { + key: "varkw" + value { + none_value { + } + } + } + values { + key: "defaults" + value { + none_value { + } + } + } + values { + key: "kwonlyargs" + value { + list_value { + } + } + } + values { + key: "kwonlydefaults" + value { + none_value { + } + } + } + values { + key: "annotations" + value { + dict_value { + } + } + } + } + } + is_method: true + input_signature { + none_value { + } + } + } + } + } + concrete_functions { + key: "__inference__728" + value { + bound_inputs: 4 + canonicalized_input_signature { + tuple_value { + values { + tuple_value { + } + } + values { + dict_value { + } + } + } + } + output_signature { + tensor_spec_value { + shape { + } + dtype: DT_INT64 + } + } + } + } + concrete_functions { + key: "__inference_function_722" + value { + canonicalized_input_signature { + tuple_value { + values { + tuple_value { + } + } + values { + dict_value { + } + } + } + } + output_signature { + tuple_value { + } + } + } + } + concrete_functions { + key: "__inference_polymorphic_action_fn_4619080" + value { + bound_inputs: 10 + bound_inputs: 11 + bound_inputs: 12 + bound_inputs: 13 + bound_inputs: 14 + bound_inputs: 15 + canonicalized_input_signature { + tuple_value { + values { + tuple_value { + values { + named_tuple_value { + name: "TimeStep" + values { + key: "step_type" + value { + tensor_spec_value { + name: "time_step/step_type" + shape { + dim { + size: 1 + } + } + dtype: DT_INT32 + } + } + } + values { + key: "reward" + value { + tensor_spec_value { + name: "time_step/reward" + shape { + dim { + size: 1 + } + } + dtype: DT_FLOAT + } + } + } + values { + key: "discount" + value { + tensor_spec_value { + name: "time_step/discount" + shape { + dim { + size: 1 + } + } + dtype: DT_FLOAT + } + } + } + values { + key: "observation" + value { + dict_value { + fields { + key: "callee_basic_block_count" + value { + tensor_spec_value { + name: "time_step/observation/callee_basic_block_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callee_conditionally_executed_blocks" + value { + tensor_spec_value { + name: "time_step/observation/callee_conditionally_executed_blocks" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callee_users" + value { + tensor_spec_value { + name: "time_step/observation/callee_users" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_basic_block_count" + value { + tensor_spec_value { + name: "time_step/observation/caller_basic_block_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_conditionally_executed_blocks" + value { + tensor_spec_value { + name: "time_step/observation/caller_conditionally_executed_blocks" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_users" + value { + tensor_spec_value { + name: "time_step/observation/caller_users" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callsite_height" + value { + tensor_spec_value { + name: "time_step/observation/callsite_height" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "cost_estimate" + value { + tensor_spec_value { + name: "time_step/observation/cost_estimate" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "edge_count" + value { + tensor_spec_value { + name: "time_step/observation/edge_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "inlining_default" + value { + tensor_spec_value { + name: "time_step/observation/inlining_default" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "node_count" + value { + tensor_spec_value { + name: "time_step/observation/node_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "nr_ctant_params" + value { + tensor_spec_value { + name: "time_step/observation/nr_ctant_params" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + } + } + } + } + } + values { + tuple_value { + } + } + } + } + values { + dict_value { + } + } + } + } + output_signature { + named_tuple_value { + name: "PolicyStep" + values { + key: "action" + value { + tensor_spec_value { + name: "action" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + values { + key: "state" + value { + tuple_value { + } + } + } + values { + key: "info" + value { + tuple_value { + } + } + } + } + } + } + } + concrete_functions { + key: "__inference_polymorphic_action_fn_946" + value { + bound_inputs: 10 + bound_inputs: 11 + bound_inputs: 12 + bound_inputs: 13 + bound_inputs: 14 + bound_inputs: 15 + canonicalized_input_signature { + tuple_value { + values { + tuple_value { + values { + named_tuple_value { + name: "TimeStep" + values { + key: "step_type" + value { + tensor_spec_value { + name: "step_type" + shape { + dim { + size: 1 + } + } + dtype: DT_INT32 + } + } + } + values { + key: "reward" + value { + tensor_spec_value { + name: "reward" + shape { + dim { + size: 1 + } + } + dtype: DT_FLOAT + } + } + } + values { + key: "discount" + value { + tensor_spec_value { + name: "discount" + shape { + dim { + size: 1 + } + } + dtype: DT_FLOAT + } + } + } + values { + key: "observation" + value { + dict_value { + fields { + key: "callee_basic_block_count" + value { + tensor_spec_value { + name: "callee_basic_block_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callee_conditionally_executed_blocks" + value { + tensor_spec_value { + name: "callee_conditionally_executed_blocks" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callee_users" + value { + tensor_spec_value { + name: "callee_users" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_basic_block_count" + value { + tensor_spec_value { + name: "caller_basic_block_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_conditionally_executed_blocks" + value { + tensor_spec_value { + name: "caller_conditionally_executed_blocks" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_users" + value { + tensor_spec_value { + name: "caller_users" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callsite_height" + value { + tensor_spec_value { + name: "callsite_height" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "cost_estimate" + value { + tensor_spec_value { + name: "cost_estimate" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "edge_count" + value { + tensor_spec_value { + name: "edge_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "inlining_default" + value { + tensor_spec_value { + name: "inlining_default" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "node_count" + value { + tensor_spec_value { + name: "node_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "nr_ctant_params" + value { + tensor_spec_value { + name: "nr_ctant_params" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + } + } + } + } + } + values { + tuple_value { + } + } + } + } + values { + dict_value { + } + } + } + } + output_signature { + named_tuple_value { + name: "PolicyStep" + values { + key: "action" + value { + tensor_spec_value { + name: "action" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + values { + key: "state" + value { + tuple_value { + } + } + } + values { + key: "info" + value { + tuple_value { + } + } + } + } + } + } + } + concrete_functions { + key: "__inference_signature_wrapper_4619026" + value { + bound_inputs: 10 + bound_inputs: 11 + bound_inputs: 12 + bound_inputs: 13 + bound_inputs: 14 + bound_inputs: 15 + canonicalized_input_signature { + tuple_value { + values { + tuple_value { + } + } + values { + dict_value { + fields { + key: "callee_basic_block_count" + value { + tensor_spec_value { + name: "callee_basic_block_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callee_conditionally_executed_blocks" + value { + tensor_spec_value { + name: "callee_conditionally_executed_blocks" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callee_users" + value { + tensor_spec_value { + name: "callee_users" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_basic_block_count" + value { + tensor_spec_value { + name: "caller_basic_block_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_conditionally_executed_blocks" + value { + tensor_spec_value { + name: "caller_conditionally_executed_blocks" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "caller_users" + value { + tensor_spec_value { + name: "caller_users" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "callsite_height" + value { + tensor_spec_value { + name: "callsite_height" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "cost_estimate" + value { + tensor_spec_value { + name: "cost_estimate" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "discount" + value { + tensor_spec_value { + name: "discount" + shape { + dim { + size: 1 + } + } + dtype: DT_FLOAT + } + } + } + fields { + key: "edge_count" + value { + tensor_spec_value { + name: "edge_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "inlining_default" + value { + tensor_spec_value { + name: "inlining_default" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "node_count" + value { + tensor_spec_value { + name: "node_count" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "nr_ctant_params" + value { + tensor_spec_value { + name: "nr_ctant_params" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + fields { + key: "reward" + value { + tensor_spec_value { + name: "reward" + shape { + dim { + size: 1 + } + } + dtype: DT_FLOAT + } + } + } + fields { + key: "step_type" + value { + tensor_spec_value { + name: "step_type" + shape { + dim { + size: 1 + } + } + dtype: DT_INT32 + } + } + } + } + } + } + } + output_signature { + dict_value { + fields { + key: "inlining_decision" + value { + tensor_spec_value { + name: "inlining_decision" + shape { + dim { + size: 1 + } + } + dtype: DT_INT64 + } + } + } + } + } + } + } + concrete_functions { + key: "__inference_signature_wrapper_4619033" + value { + canonicalized_input_signature { + tuple_value { + values { + tuple_value { + } + } + values { + dict_value { + } + } + } + } + output_signature { + dict_value { + } + } + } + } + concrete_functions { + key: "__inference_signature_wrapper_4619048" + value { + bound_inputs: 4 + canonicalized_input_signature { + tuple_value { + values { + tuple_value { + } + } + values { + dict_value { + } + } + } + } + output_signature { + dict_value { + fields { + key: "int64" + value { + tensor_spec_value { + name: "int64" + shape { + } + dtype: DT_INT64 + } + } + } + } + } + } + } + } +} + diff --git a/llvm/lib/Analysis/models/inliner/variables/variables.data-00001-of-00002 b/llvm/lib/Analysis/models/inliner/variables/variables.data-00000-of-00001 similarity index 77% rename from llvm/lib/Analysis/models/inliner/variables/variables.data-00001-of-00002 rename to llvm/lib/Analysis/models/inliner/variables/variables.data-00000-of-00001 index 1f1f1b151a71f..ee7d7060867e7 100644 Binary files a/llvm/lib/Analysis/models/inliner/variables/variables.data-00001-of-00002 and b/llvm/lib/Analysis/models/inliner/variables/variables.data-00000-of-00001 differ diff --git a/llvm/lib/Analysis/models/inliner/variables/variables.data-00000-of-00002 b/llvm/lib/Analysis/models/inliner/variables/variables.data-00000-of-00002 deleted file mode 100644 index 58ebd0fc98711..0000000000000 Binary files a/llvm/lib/Analysis/models/inliner/variables/variables.data-00000-of-00002 and /dev/null differ diff --git a/llvm/lib/Analysis/models/inliner/variables/variables.index b/llvm/lib/Analysis/models/inliner/variables/variables.index index 318d5a2443c2b..7e0c10c1780e0 100644 Binary files a/llvm/lib/Analysis/models/inliner/variables/variables.index and b/llvm/lib/Analysis/models/inliner/variables/variables.index differ diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index e3a52c7882a2f..c9f21ee83826a 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -7799,6 +7799,9 @@ bool LLParser::ParseTypeTestResolution(TypeTestResolution &TTRes) { return true; switch (Lex.getKind()) { + case lltok::kw_unknown: + TTRes.TheKind = TypeTestResolution::Unknown; + break; case lltok::kw_unsat: TTRes.TheKind = TypeTestResolution::Unsat; break; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 4d7c36041398f..f8f7b74baf916 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -3057,25 +3057,40 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { if (MBB.pred_empty() || (!MF->hasBBLabels() && isBlockOnlyReachableByFallthrough(&MBB) && - !MBB.isEHFuncletEntry())) { + !MBB.isEHFuncletEntry() && !MBB.hasLabelMustBeEmitted())) { if (isVerbose()) { // NOTE: Want this comment at start of line, don't emit with AddComment. OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":", false); } } else { + if (isVerbose() && MBB.hasLabelMustBeEmitted()) { + OutStreamer->AddComment("Label of block must be emitted"); + } + auto *BBSymbol = MBB.getSymbol(); // Switch to a new section if this basic block must begin a section. if (MBB.isBeginSection()) { OutStreamer->SwitchSection( getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(), MBB, TM)); - CurrentSectionBeginSym = MBB.getSymbol(); + CurrentSectionBeginSym = BBSymbol; } - OutStreamer->emitLabel(MBB.getSymbol()); + OutStreamer->emitLabel(BBSymbol); + // With BB sections, each basic block must handle CFI information on its own + // if it begins a section. + if (MBB.isBeginSection()) + for (const HandlerInfo &HI : Handlers) + HI.Handler->beginBasicBlock(MBB); } } -void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {} +void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { + // Check if CFI information needs to be updated for this MBB with basic block + // sections. + if (MBB.isEndSection()) + for (const HandlerInfo &HI : Handlers) + HI.Handler->endBasicBlock(MBB); +} void AsmPrinter::emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition) const { diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index f7041c0cc9263..3f053c7a38c77 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -101,21 +101,21 @@ class CVMCAdapter : public CodeViewRecordStreamer { CVMCAdapter(MCStreamer &OS, TypeCollection &TypeTable) : OS(&OS), TypeTable(TypeTable) {} - void emitBytes(StringRef Data) { OS->emitBytes(Data); } + void emitBytes(StringRef Data) override { OS->emitBytes(Data); } - void emitIntValue(uint64_t Value, unsigned Size) { + void emitIntValue(uint64_t Value, unsigned Size) override { OS->emitIntValueInHex(Value, Size); } - void emitBinaryData(StringRef Data) { OS->emitBinaryData(Data); } + void emitBinaryData(StringRef Data) override { OS->emitBinaryData(Data); } - void AddComment(const Twine &T) { OS->AddComment(T); } + void AddComment(const Twine &T) override { OS->AddComment(T); } - void AddRawComment(const Twine &T) { OS->emitRawComment(T); } + void AddRawComment(const Twine &T) override { OS->emitRawComment(T); } - bool isVerboseAsm() { return OS->isVerboseAsm(); } + bool isVerboseAsm() override { return OS->isVerboseAsm(); } - std::string getTypeName(TypeIndex TI) { + std::string getTypeName(TypeIndex TI) override { std::string TypeName; if (!TI.isNoneType()) { if (TI.isSimple()) diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index edf82fbed650a..713a15dd09391 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -472,10 +472,17 @@ void DIEExpr::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { /// SizeOf - Determine size of expression value in bytes. /// unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { - if (Form == dwarf::DW_FORM_data4) return 4; - if (Form == dwarf::DW_FORM_sec_offset) return 4; - if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getPointerSize(); + switch (Form) { + case dwarf::DW_FORM_data4: + return 4; + case dwarf::DW_FORM_data8: + return 8; + case dwarf::DW_FORM_sec_offset: + // FIXME: add support for DWARF64 + return 4; + default: + llvm_unreachable("DIE Value form not supported yet"); + } } LLVM_DUMP_METHOD diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 3245ecdbcc880..11ed1062f77e4 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -47,7 +47,7 @@ void DwarfCFIExceptionBase::markFunctionEnd() { } void DwarfCFIExceptionBase::endFragment() { - if (shouldEmitCFI) + if (shouldEmitCFI && !Asm->MF->hasBBSections()) Asm->OutStreamer->emitCFIEndProc(); } @@ -172,3 +172,12 @@ void DwarfCFIException::endFunction(const MachineFunction *MF) { emitExceptionTable(); } + +void DwarfCFIException::beginBasicBlock(const MachineBasicBlock &MBB) { + beginFragment(&MBB, getExceptionSym); +} + +void DwarfCFIException::endBasicBlock(const MachineBasicBlock &MBB) { + if (shouldEmitCFI) + Asm->OutStreamer->emitCFIEndProc(); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 296c380ae5508..d90e49c1c2511 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -260,7 +260,9 @@ void DwarfCompileUnit::addLocationAttribute( : dwarf::DW_OP_const8u); // 2) containing the (relocated) offset of the TLS variable // within the module's TLS block. - addExpr(*Loc, dwarf::DW_FORM_udata, + addExpr(*Loc, + PointerSize == 4 ? dwarf::DW_FORM_data4 + : dwarf::DW_FORM_data8, Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); } else { addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index d7a4b2abf52b3..ad2f2f3edd8e6 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -325,7 +325,7 @@ class DwarfDebug : public DebugHandlerBase { const MachineFunction *CurFn = nullptr; /// If nonnull, stores the CU in which the previous subprogram was contained. - const DwarfCompileUnit *PrevCU; + const DwarfCompileUnit *PrevCU = nullptr; /// As an optimization, there is no need to emit an entry in the directory /// table for the same directory as DW_AT_comp_dir. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index 24bbf58b91ec9..c2956380438f9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -66,6 +66,9 @@ class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase { void beginFragment(const MachineBasicBlock *MBB, ExceptionSymbolProvider ESP) override; + + void beginBasicBlock(const MachineBasicBlock &MBB) override; + void endBasicBlock(const MachineBasicBlock &MBB) override; }; class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase { diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp index cb9f8f2e01845..23c7fea01f282 100644 --- a/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -303,28 +303,31 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) { auto MBBI = MBBInfo.MBB->begin(); DebugLoc DL = MBBInfo.MBB->findDebugLoc(MBBI); - if (PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset) { + // If the current MBB will be placed in a unique section, a full DefCfa + // must be emitted. + const bool ForceFullCFA = MBB.isBeginSection(); + + if ((PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset && + PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) || + ForceFullCFA) { // If both outgoing offset and register of a previous block don't match - // incoming offset and register of this block, add a def_cfa instruction - // with the correct offset and register for this block. - if (PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) { - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( - nullptr, MBBInfo.IncomingCFARegister, getCorrectCFAOffset(&MBB))); - BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - // If outgoing offset of a previous block doesn't match incoming offset - // of this block, add a def_cfa_offset instruction with the correct - // offset for this block. - } else { - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset( - nullptr, getCorrectCFAOffset(&MBB))); - BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } + // incoming offset and register of this block, or if this block begins a + // section, add a def_cfa instruction with the correct offset and + // register for this block. + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( + nullptr, MBBInfo.IncomingCFARegister, getCorrectCFAOffset(&MBB))); + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + InsertedCFIInstr = true; + } else if (PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset) { + // If outgoing offset of a previous block doesn't match incoming offset + // of this block, add a def_cfa_offset instruction with the correct + // offset for this block. + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset( + nullptr, getCorrectCFAOffset(&MBB))); + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); InsertedCFIInstr = true; - // If outgoing register of a previous block doesn't match incoming - // register of this block, add a def_cfa_register instruction with the - // correct register for this block. } else if (PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) { unsigned CFIIndex = @@ -335,6 +338,14 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) { InsertedCFIInstr = true; } + if (ForceFullCFA) { + MF.getSubtarget().getFrameLowering()->emitCalleeSavedFrameMoves( + *MBBInfo.MBB, MBBI); + InsertedCFIInstr = true; + PrevMBBInfo = &MBBInfo; + continue; + } + BitVector SetDifference = PrevMBBInfo->OutgoingCSRSaved; SetDifference.reset(MBBInfo.IncomingCSRSaved); for (int Reg : SetDifference.set_bits()) { diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 5d6ee09c84387..254503673fd2b 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -161,6 +161,17 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, std::pair TargetHint = mri.getRegAllocationHint(li.reg); + if (li.isSpillable() && VRM) { + Register Reg = li.reg; + Register Original = VRM->getOriginal(Reg); + const LiveInterval &OrigInt = LIS.getInterval(Original); + // li comes from a split of OrigInt. If OrigInt was marked + // as not spillable, make sure the new interval is marked + // as not spillable as well. + if (!OrigInt.isSpillable()) + li.markNotSpillable(); + } + // Don't recompute spill weight for an unspillable register. bool Spillable = li.isSpillable(); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index e8b8e6c93cf0d..465ba08dbfcdb 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -376,6 +376,7 @@ class TypePromotionTransaction; return *DT; } + void removeAllAssertingVHReferences(Value *V); bool eliminateFallThrough(Function &F); bool eliminateMostlyEmptyBlocks(Function &F); BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); @@ -383,6 +384,7 @@ class TypePromotionTransaction; void eliminateMostlyEmptyBlock(BasicBlock *BB); bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, bool isPreheader); + bool makeBitReverse(Instruction &I); bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); bool optimizeInst(Instruction *I, bool &ModifiedDT); bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, @@ -601,6 +603,33 @@ bool CodeGenPrepare::runOnFunction(Function &F) { return EverMadeChange; } +/// An instruction is about to be deleted, so remove all references to it in our +/// GEP-tracking data strcutures. +void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) { + LargeOffsetGEPMap.erase(V); + NewGEPBases.erase(V); + + auto GEP = dyn_cast(V); + if (!GEP) + return; + + LargeOffsetGEPID.erase(GEP); + + auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand()); + if (VecI == LargeOffsetGEPMap.end()) + return; + + auto &GEPVector = VecI->second; + const auto &I = std::find_if(GEPVector.begin(), GEPVector.end(), + [=](auto &Elt) { return Elt.first == GEP; }); + if (I == GEPVector.end()) + return; + + GEPVector.erase(I); + if (GEPVector.empty()) + LargeOffsetGEPMap.erase(VecI); +} + // Verify BFI has been updated correctly by recomputing BFI and comparing them. void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) { DominatorTree NewDT(F); @@ -5242,7 +5271,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, WeakTrackingVH IterHandle(CurValue); BasicBlock *BB = CurInstIterator->getParent(); - RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); + RecursivelyDeleteTriviallyDeadInstructions( + Repl, TLInfo, nullptr, + [&](Value *V) { removeAllAssertingVHReferences(V); }); if (IterHandle != CurValue) { // If the iterator instruction was recursively deleted, start over at the @@ -5363,7 +5394,9 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, // If we have no uses, recursively delete the value and all dead instructions // using it. if (Ptr->use_empty()) - RecursivelyDeleteTriviallyDeadInstructions(Ptr, TLInfo); + RecursivelyDeleteTriviallyDeadInstructions( + Ptr, TLInfo, nullptr, + [&](Value *V) { removeAllAssertingVHReferences(V); }); return true; } @@ -6647,7 +6680,8 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType); SVI->replaceAllUsesWith(BC2); - RecursivelyDeleteTriviallyDeadInstructions(SVI); + RecursivelyDeleteTriviallyDeadInstructions( + SVI, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); }); // Also hoist the bitcast up to its operand if it they are not in the same // block. @@ -7604,11 +7638,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { /// Given an OR instruction, check to see if this is a bitreverse /// idiom. If so, insert the new intrinsic and return true. -static bool makeBitReverse(Instruction &I, const DataLayout &DL, - const TargetLowering &TLI) { +bool CodeGenPrepare::makeBitReverse(Instruction &I) { if (!I.getType()->isIntegerTy() || - !TLI.isOperationLegalOrCustom(ISD::BITREVERSE, - TLI.getValueType(DL, I.getType(), true))) + !TLI->isOperationLegalOrCustom(ISD::BITREVERSE, + TLI->getValueType(*DL, I.getType(), true))) return false; SmallVector Insts; @@ -7616,7 +7649,8 @@ static bool makeBitReverse(Instruction &I, const DataLayout &DL, return false; Instruction *LastInst = Insts.back(); I.replaceAllUsesWith(LastInst); - RecursivelyDeleteTriviallyDeadInstructions(&I); + RecursivelyDeleteTriviallyDeadInstructions( + &I, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); }); return true; } @@ -7638,7 +7672,7 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { while (MadeBitReverse) { MadeBitReverse = false; for (auto &I : reverse(BB)) { - if (makeBitReverse(I, *DL, *TLI)) { + if (makeBitReverse(I)) { MadeBitReverse = MadeChange = true; break; } diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index c4d8777615d27..071cc5b737358 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -59,6 +59,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) { case TargetOpcode::G_UNMERGE_VALUES: case TargetOpcode::G_TRUNC: case TargetOpcode::G_PTR_ADD: + case TargetOpcode::G_EXTRACT: return true; } return false; diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 88173dc4d302c..6f8fd309e1b58 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -70,6 +70,9 @@ void CSEMIRBuilder::profileDstOp(const DstOp &Op, void CSEMIRBuilder::profileSrcOp(const SrcOp &Op, GISelInstProfileBuilder &B) const { switch (Op.getSrcOpKind()) { + case SrcOp::SrcType::Ty_Imm: + B.addNodeIDImmediate(static_cast(Op.getImm())); + break; case SrcOp::SrcType::Ty_Predicate: B.addNodeIDImmediate(static_cast(Op.getPredicate())); break; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 32bad28d318ba..194961ae3b216 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -576,6 +576,24 @@ bool CombinerHelper::dominates(const MachineInstr &DefMI, return isPredecessor(DefMI, UseMI); } +bool CombinerHelper::matchSextAlreadyExtended(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + Register SrcReg = MI.getOperand(1).getReg(); + unsigned SrcSignBits = KB->computeNumSignBits(SrcReg); + unsigned NumSextBits = + MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() - + MI.getOperand(2).getImm(); + return SrcSignBits >= NumSextBits; +} + +bool CombinerHelper::applySextAlreadyExtended(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + MachineIRBuilder MIB(MI); + MIB.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base, Register &Offset) { auto &MF = *MI.getParent()->getParent(); diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index e5d77b0eb8578..0e9c6e4fab9f9 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -11,6 +11,7 @@ // //===------------------ #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -441,6 +442,16 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp; } + case TargetOpcode::G_SEXTLOAD: { + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + // TODO: add vector support + if (Ty.isVector()) + break; + if (MI.hasOneMemOperand()) + return Ty.getSizeInBits() - (*MI.memoperands_begin())->getSizeInBits(); + break; + } case TargetOpcode::G_TRUNC: { Register Src = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(Src); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index bbdefe3e5ca4d..8f6643b2f1935 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1598,6 +1598,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, case Intrinsic::sideeffect: // Discard annotate attributes, assumptions, and artificial side-effects. return true; + case Intrinsic::read_volatile_register: case Intrinsic::read_register: { Value *Arg = CI.getArgOperand(0); MIRBuilder diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index 241d5bace248b..2ce1d414e7550 100644 --- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -237,6 +237,39 @@ static unsigned getNumOpRegs(const MachineInstr &I, unsigned OpIdx) { return InlineAsm::getNumOperandRegisters(Flag); } +static bool buildAnyextOrCopy(Register Dst, Register Src, + MachineIRBuilder &MIRBuilder) { + const TargetRegisterInfo *TRI = + MIRBuilder.getMF().getSubtarget().getRegisterInfo(); + MachineRegisterInfo *MRI = MIRBuilder.getMRI(); + + auto SrcTy = MRI->getType(Src); + if (!SrcTy.isValid()) { + LLVM_DEBUG(dbgs() << "Source type for copy is not valid\n"); + return false; + } + unsigned SrcSize = TRI->getRegSizeInBits(Src, *MRI); + unsigned DstSize = TRI->getRegSizeInBits(Dst, *MRI); + + if (DstSize < SrcSize) { + LLVM_DEBUG(dbgs() << "Input can't fit in destination reg class\n"); + return false; + } + + // Attempt to anyext small scalar sources. + if (DstSize > SrcSize) { + if (!SrcTy.isScalar()) { + LLVM_DEBUG(dbgs() << "Can't extend non-scalar input to size of" + "destination register class\n"); + return false; + } + Src = MIRBuilder.buildAnyExt(LLT::scalar(DstSize), Src).getReg(0); + } + + MIRBuilder.buildCopy(Dst, Src); + return true; +} + bool InlineAsmLowering::lowerInlineAsm( MachineIRBuilder &MIRBuilder, const CallBase &Call, std::function(const Value &Val)> GetOrCreateVRegs) @@ -427,7 +460,8 @@ bool InlineAsmLowering::lowerInlineAsm( ArrayRef SrcRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal); assert(SrcRegs.size() == 1 && "Single register is expected here"); Register Tmp = MRI->createVirtualRegister(RC); - MIRBuilder.buildCopy(Tmp, SrcRegs[0]); + if (!buildAnyextOrCopy(Tmp, SrcRegs[0], MIRBuilder)) + return false; // Add Flag and input register operand (Tmp) to Inst. Tie Tmp to Def. unsigned UseFlag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1); @@ -525,7 +559,8 @@ bool InlineAsmLowering::lowerInlineAsm( unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs); Inst.addImm(Flag); - MIRBuilder.buildCopy(OpInfo.Regs[0], SourceRegs[0]); + if (!buildAnyextOrCopy(OpInfo.Regs[0], SourceRegs[0], MIRBuilder)) + return false; Inst.addReg(OpInfo.Regs[0]); break; } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 38590656d1f4b..5b6937e471cdc 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1638,6 +1638,44 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT || + MI.getOpcode() == TargetOpcode::G_SSUBSAT; + // We can convert this to: + // 1. Any extend iN to iM + // 2. SHL by M-N + // 3. [US][ADD|SUB]SAT + // 4. L/ASHR by M-N + // + // It may be more efficient to lower this to a min and a max operation in + // the higher precision arithmetic if the promoted operation isn't legal, + // but this decision is up to the target's lowering request. + Register DstReg = MI.getOperand(0).getReg(); + + unsigned NewBits = WideTy.getScalarSizeInBits(); + unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits(); + + auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1)); + auto RHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2)); + auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount); + auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK); + auto ShiftR = MIRBuilder.buildShl(WideTy, RHS, ShiftK); + + auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, + {ShiftL, ShiftR}, MI.getFlags()); + + // Use a shift that will preserve the number of sign bits when the trunc is + // folded away. + auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK) + : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK); + + MIRBuilder.buildTrunc(DstReg, Result); + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { switch (MI.getOpcode()) { @@ -1674,6 +1712,11 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SADDSAT: + case TargetOpcode::G_SSUBSAT: + case TargetOpcode::G_UADDSAT: + case TargetOpcode::G_USUBSAT: + return widenScalarAddSubSat(MI, TypeIdx, WideTy); case TargetOpcode::G_CTTZ: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: @@ -1865,21 +1908,25 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_SITOFP: - if (TypeIdx != 1) - return UnableToLegalize; Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + + if (TypeIdx == 0) + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + else + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_UITOFP: - if (TypeIdx != 1) - return UnableToLegalize; Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); + + if (TypeIdx == 0) + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + else + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_LOAD: case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: @@ -3414,6 +3461,10 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FSHL: case G_FSHR: case G_FREEZE: + case G_SADDSAT: + case G_SSUBSAT: + case G_UADDSAT: + case G_USUBSAT: return reduceOperationWidth(MI, TypeIdx, NarrowTy); case G_SHL: case G_LSHR: diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp index 0ba7e920e5075..ffffc7c243d83 100644 --- a/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/llvm/lib/CodeGen/HardwareLoops.cpp @@ -402,8 +402,15 @@ Value *HardwareLoop::InitLoopCount() { BasicBlock *BB = L->getLoopPreheader(); if (UseLoopGuard && BB->getSinglePredecessor() && - cast(BB->getTerminator())->isUnconditional()) - BB = BB->getSinglePredecessor(); + cast(BB->getTerminator())->isUnconditional()) { + BasicBlock *Predecessor = BB->getSinglePredecessor(); + // If it's not safe to create a while loop then don't force it and create a + // do-while loop instead + if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE)) + UseLoopGuard = false; + else + BB = Predecessor; + } if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) { LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount " diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp index 6610491dd111d..4fba8f3842550 100644 --- a/llvm/lib/CodeGen/LiveVariables.cpp +++ b/llvm/lib/CodeGen/LiveVariables.cpp @@ -89,10 +89,9 @@ LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) { return VirtRegInfo[RegIdx]; } -void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, - MachineBasicBlock *DefBlock, - MachineBasicBlock *MBB, - std::vector &WorkList) { +void LiveVariables::MarkVirtRegAliveInBlock( + VarInfo &VRInfo, MachineBasicBlock *DefBlock, MachineBasicBlock *MBB, + SmallVectorImpl &WorkList) { unsigned BBNum = MBB->getNumber(); // Check to see if this basic block is one of the killing blocks. If so, @@ -118,7 +117,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo, MachineBasicBlock *DefBlock, MachineBasicBlock *MBB) { - std::vector WorkList; + SmallVector WorkList; MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList); while (!WorkList.empty()) { diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index d4181591deabc..5fbf91e26a89f 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -147,6 +147,10 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) setFlags(MI.Flags); } +void MachineInstr::moveBefore(MachineInstr *MovePos) { + MovePos->getParent()->splice(MovePos, getParent(), getIterator()); +} + /// getRegInfo - If this instruction is embedded into a MachineFunction, /// return the MachineRegisterInfo object for the current function, otherwise /// return null. diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index cf75d531deb27..d9d0a783f8a22 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2724,7 +2724,11 @@ bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone) { if (Zone.isTop()) { - if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { + // Prefer the candidate with the lesser depth, but only if one of them has + // depth greater than the total latency scheduled so far, otherwise either + // of them could be scheduled now with no stall. + if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) > + Zone.getScheduledLatency()) { if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), TryCand, Cand, GenericSchedulerBase::TopDepthReduce)) return true; @@ -2733,7 +2737,11 @@ bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, TryCand, Cand, GenericSchedulerBase::TopPathReduce)) return true; } else { - if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { + // Prefer the candidate with the lesser height, but only if one of them has + // height greater than the total latency scheduled so far, otherwise either + // of them could be scheduled now with no stall. + if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) > + Zone.getScheduledLatency()) { if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) return true; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0d84cd89f5aee..f14b3dba4f318 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7694,6 +7694,12 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt); } + // rot i16 X, 8 --> bswap X + auto *RotAmtC = isConstOrConstSplat(N1); + if (RotAmtC && RotAmtC->getAPIntValue() == 8 && + VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT)) + return DAG.getNode(ISD::BSWAP, dl, VT, N0); + // Simplify the operands using demanded-bits information. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -11986,6 +11992,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { SDNodeFlags Flags = N->getFlags(); bool CanFuse = Options.UnsafeFPMath || isContractable(N); + bool CanReassociate = + Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || CanFuse || HasFMAD); // If the addition is not contractable, do not combine. @@ -12028,13 +12036,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E) // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E) + // This requires reassociation because it changes the order of operations. SDValue FMA, E; - if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && + if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode && N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() && N0.getOperand(2).hasOneUse()) { FMA = N0; E = N1; - } else if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && + } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode && N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() && N1.getOperand(2).hasOneUse()) { FMA = N1; @@ -14090,8 +14099,8 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) { } } - // Transform br(xor(x, y)) -> br(x != y) - // Transform br(xor(xor(x,y), 1)) -> br (x == y) + // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne)) + // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq)) if (N.getOpcode() == ISD::XOR) { // Because we may call this on a speculatively constructed // SimplifiedSetCC Node, we need to simplify this node first. @@ -14115,16 +14124,17 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) { if (N.getOpcode() != ISD::XOR) return N; - SDNode *TheXor = N.getNode(); - - SDValue Op0 = TheXor->getOperand(0); - SDValue Op1 = TheXor->getOperand(1); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { bool Equal = false; - if (isOneConstant(Op0) && Op0.hasOneUse() && - Op0.getOpcode() == ISD::XOR) { - TheXor = Op0.getNode(); + // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq)) + if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR && + Op0.getValueType() == MVT::i1) { + N = Op0; + Op0 = N->getOperand(0); + Op1 = N->getOperand(1); Equal = true; } @@ -14132,7 +14142,7 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) { if (LegalTypes) SetCCVT = getSetCCResultType(SetCCVT); // Replace the uses of XOR with SETCC - return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, + return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, Equal ? ISD::SETEQ : ISD::SETNE); } } @@ -15759,7 +15769,14 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { ST->getPointerInfo().getAddrSpace() != 0) return SDValue(); - EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + TypeSize VTSize = VT.getSizeInBits(); + + // We don't know the size of scalable types at compile time so we cannot + // create an integer of the equivalent size. + if (VTSize.isScalable()) + return SDValue(); + + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize()); if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || !TLI.isOperationLegal(ISD::STORE, IntVT) || !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || @@ -16538,14 +16555,27 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, } while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { - // If we have load/store pair instructions and we only have two values, - // don't bother merging. Align RequiredAlignment; - if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && - StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); - LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2); - break; + bool NeedRotate = false; + if (LoadNodes.size() == 2) { + // If we have load/store pair instructions and we only have two values, + // don't bother merging. + if (TLI.hasPairedLoad(MemVT, RequiredAlignment) && + StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2); + break; + } + // If the loads are reversed, see if we can rotate the halves into place. + int64_t Offset0 = LoadNodes[0].OffsetFromBase; + int64_t Offset1 = LoadNodes[1].OffsetFromBase; + EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2); + if (Offset0 - Offset1 == ElementSizeBytes && + (hasOperation(ISD::ROTL, PairVT) || + hasOperation(ISD::ROTR, PairVT))) { + std::swap(LoadNodes[0], LoadNodes[1]); + NeedRotate = true; + } } LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); @@ -16710,8 +16740,18 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, NewLoad = DAG.getLoad( JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags); + SDValue StoreOp = NewLoad; + if (NeedRotate) { + unsigned LoadWidth = ElementSizeBytes * 8 * 2; + assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && + "Unexpected type for rotate-able load pair"); + SDValue RotAmt = + DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL); + // Target can convert to the identical ROTR if it does not have ROTL. + StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt); + } NewStore = DAG.getStore( - NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), + NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags); } else { // This must be the truncstore/extload case EVT ExtendedTy = diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 6a6004c158bb8..cbbcaf1601ed1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1411,6 +1411,12 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { SmallVector Stores; unsigned TypeByteSize = MemVT.getSizeInBits() / 8; assert(TypeByteSize > 0 && "Vector element type too small for stack store!"); + + // If the destination vector element type of a BUILD_VECTOR is narrower than + // the source element type, only store the bits necessary. + bool Truncate = isa(Node) && + MemVT.bitsLT(Node->getOperand(0).getValueType()); + // Store (in the right endianness) the elements to memory. for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { // Ignore undef elements. @@ -1420,9 +1426,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, Offset, dl); - // If the destination vector element type is narrower than the source - // element type, only store the bits necessary. - if (MemVT.bitsLT(Node->getOperand(i).getValueType())) + if (Truncate) Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, PtrInfo.getWithOffset(Offset), MemVT)); @@ -3315,7 +3319,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Check to see if this FP immediate is already legal. // If this is a legal constant, turn it into a TargetConstantFP node. if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0), - DAG.getMachineFunction().getFunction().hasOptSize())) + DAG.shouldOptForSize())) Results.push_back(ExpandConstantFP(CFP, true)); break; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 4bc75ceb4928e..0fa6d653a8364 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -776,6 +776,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi); void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi); + // Helper function for incrementing the pointer when splitting + // memory operations + void IncrementPointer(MemSDNode *N, EVT MemVT, + MachinePointerInfo &MPI, SDValue &Ptr); + // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. void SplitVectorResult(SDNode *N, unsigned ResNo); void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 550174f0df72a..414ba25ffd5ff 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -976,6 +976,25 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, + MachinePointerInfo &MPI, + SDValue &Ptr) { + SDLoc DL(N); + unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinSize() / 8; + + if (MemVT.isScalableVector()) { + SDValue BytesIncrement = DAG.getVScale( + DL, Ptr.getValueType(), + APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize)); + MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement); + } else { + MPI = N->getPointerInfo().getWithOffset(IncrementSize); + // Increment the pointer to the other half. + Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize); + } +} + void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; @@ -1537,19 +1556,8 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, LD->getPointerInfo(), LoMemVT, LD->getOriginalAlign(), MMOFlags, AAInfo); - unsigned IncrementSize = LoMemVT.getSizeInBits().getKnownMinSize() / 8; - MachinePointerInfo MPI; - if (LoVT.isScalableVector()) { - SDValue BytesIncrement = DAG.getVScale( - dl, Ptr.getValueType(), - APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize)); - MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace()); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, BytesIncrement); - } else { - MPI = LD->getPointerInfo().getWithOffset(IncrementSize); - Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); - } + IncrementPointer(LD, LoMemVT, MPI, Ptr); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, MPI, HiMemVT, LD->getOriginalAlign(), MMOFlags, AAInfo); @@ -2489,8 +2497,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) return TLI.scalarizeVectorStore(N, DAG); - unsigned IncrementSize = LoMemVT.getSizeInBits().getKnownMinSize() / 8; - if (isTruncating) Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo); @@ -2499,17 +2505,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { AAInfo); MachinePointerInfo MPI; - if (LoMemVT.isScalableVector()) { - SDValue BytesIncrement = DAG.getVScale( - DL, Ptr.getValueType(), - APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize)); - MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement); - } else { - MPI = N->getPointerInfo().getWithOffset(IncrementSize); - // Increment the pointer to the other half. - Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize); - } + IncrementPointer(N, LoMemVT, MPI, Ptr); if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, MPI, diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index ce20d506586f0..17c68f2bf73b3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -1034,7 +1034,29 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { } InsertPos = Emitter.getInsertPos(); - return Emitter.getBlock(); + // In some cases, DBG_VALUEs might be inserted after the first terminator, + // which results in an invalid MBB. If that happens, move the DBG_VALUEs + // before the first terminator. + MachineBasicBlock *InsertBB = Emitter.getBlock(); + auto FirstTerm = InsertBB->getFirstTerminator(); + if (FirstTerm != InsertBB->end()) { + assert(!FirstTerm->isDebugValue() && + "first terminator cannot be a debug value"); + for (MachineInstr &MI : make_early_inc_range( + make_range(std::next(FirstTerm), InsertBB->end()))) { + if (!MI.isDebugValue()) + continue; + + if (&MI == InsertPos) + InsertPos = std::prev(InsertPos->getIterator()); + + // The DBG_VALUE was referencing a value produced by a terminator. By + // moving the DBG_VALUE, the referenced value also needs invalidating. + MI.getOperand(0).ChangeToRegister(0, false); + MI.moveBefore(&*FirstTerm); + } + } + return InsertBB; } /// Return the basic block label. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 8065091208694..592c09c10fb08 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2718,6 +2718,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::EXTRACT_SUBVECTOR: { // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); + // Bail until we can represent demanded elements for scalable vectors. + if (Src.getValueType().isScalableVector()) + break; uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); @@ -3973,6 +3976,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::EXTRACT_SUBVECTOR: { // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); + // Bail until we can represent demanded elements for scalable vectors. + if (Src.getValueType().isScalableVector()) + break; uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c8b72abb9b7d6..c0f84055cfceb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5698,6 +5698,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, TLI.getFrameIndexTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return; + case Intrinsic::read_volatile_register: case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); SDValue Chain = getRoot(); @@ -9570,7 +9571,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL, // initializes the alloca. Don't elide copies from the same argument twice. const Value *Val = SI->getValueOperand()->stripPointerCasts(); const auto *Arg = dyn_cast(Val); - if (!Arg || Arg->hasPassPointeeByValueAttr() || + if (!Arg || Arg->hasPassPointeeByValueCopyAttr() || Arg->getType()->isEmptyTy() || DL.getTypeStoreSize(Arg->getType()) != DL.getTypeAllocSize(AI->getAllocatedType()) || diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 96df20039b15d..d140b15067a6e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7153,6 +7153,9 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() && "Incompatible types of Data and Mask"); if (IsCompressedMemory) { + if (DataVT.isScalableVector()) + report_fatal_error( + "Cannot currently handle compressed memory with scalable vectors"); // Incrementing the pointer according to number of '1's in the mask. EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits()); SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask); @@ -7168,6 +7171,10 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL, AddrVT); Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale); + } else if (DataVT.isScalableVector()) { + Increment = DAG.getVScale(DL, AddrVT, + APInt(AddrVT.getSizeInBits().getFixedSize(), + DataVT.getStoreSize().getKnownMinSize())); } else Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT); @@ -7324,6 +7331,11 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { "addition or subtraction node."); } + // FIXME: Should really try to split the vector in case it's legal on a + // subvector. + if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT)) + return DAG.UnrollVectorOp(Node); + unsigned BitWidth = LHS.getScalarValueSizeInBits(); EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index e0fdb0cefcb8b..ffff56f545b2d 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -879,7 +879,7 @@ bool TargetPassConfig::addISelPasses() { addPass(createLowerEmuTLSPass()); addPass(createPreISelIntrinsicLoweringPass()); - addPass(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); + PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); addIRPasses(); addCodeGenPrepare(); addPassesToHandleExceptions(); diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index de336abe607a5..615ff4b8789c0 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -70,7 +70,6 @@ STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions"); STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted"); STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); -STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); @@ -109,10 +108,6 @@ class TwoAddressInstructionPass : public MachineFunctionPass { // Set of already processed instructions in the current block. SmallPtrSet Processed; - // Set of instructions converted to three-address by target and then sunk - // down current basic block. - SmallPtrSet SunkInstrs; - // A map from virtual registers to physical registers which are likely targets // to be coalesced to due to copies from physical registers to virtual // registers. e.g. v1024 = move r0. @@ -123,9 +118,6 @@ class TwoAddressInstructionPass : public MachineFunctionPass { // registers. e.g. r1 = move v1024. DenseMap DstRegMap; - bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg, - MachineBasicBlock::iterator OldPos); - bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen); bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef); @@ -209,136 +201,6 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE, static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS); -/// A two-address instruction has been converted to a three-address instruction -/// to avoid clobbering a register. Try to sink it past the instruction that -/// would kill the above mentioned register to reduce register pressure. -bool TwoAddressInstructionPass:: -sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, - MachineBasicBlock::iterator OldPos) { - // FIXME: Shouldn't we be trying to do this before we three-addressify the - // instruction? After this transformation is done, we no longer need - // the instruction to be in three-address form. - - // Check if it's safe to move this instruction. - bool SeenStore = true; // Be conservative. - if (!MI->isSafeToMove(AA, SeenStore)) - return false; - - unsigned DefReg = 0; - SmallSet UseRegs; - - for (const MachineOperand &MO : MI->operands()) { - if (!MO.isReg()) - continue; - Register MOReg = MO.getReg(); - if (!MOReg) - continue; - if (MO.isUse() && MOReg != SavedReg) - UseRegs.insert(MO.getReg()); - if (!MO.isDef()) - continue; - if (MO.isImplicit()) - // Don't try to move it if it implicitly defines a register. - return false; - if (DefReg) - // For now, don't move any instructions that define multiple registers. - return false; - DefReg = MO.getReg(); - } - - // Find the instruction that kills SavedReg. - MachineInstr *KillMI = nullptr; - if (LIS) { - LiveInterval &LI = LIS->getInterval(SavedReg); - assert(LI.end() != LI.begin() && - "Reg should not have empty live interval."); - - SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); - LiveInterval::const_iterator I = LI.find(MBBEndIdx); - if (I != LI.end() && I->start < MBBEndIdx) - return false; - - --I; - KillMI = LIS->getInstructionFromIndex(I->end); - } - if (!KillMI) { - for (MachineOperand &UseMO : MRI->use_nodbg_operands(SavedReg)) { - if (!UseMO.isKill()) - continue; - KillMI = UseMO.getParent(); - break; - } - } - - // If we find the instruction that kills SavedReg, and it is in an - // appropriate location, we can try to sink the current instruction - // past it. - if (!KillMI || KillMI->getParent() != MBB || KillMI == MI || - MachineBasicBlock::iterator(KillMI) == OldPos || KillMI->isTerminator()) - return false; - - // If any of the definitions are used by another instruction between the - // position and the kill use, then it's not safe to sink it. - // - // FIXME: This can be sped up if there is an easy way to query whether an - // instruction is before or after another instruction. Then we can use - // MachineRegisterInfo def / use instead. - MachineOperand *KillMO = nullptr; - MachineBasicBlock::iterator KillPos = KillMI; - ++KillPos; - - unsigned NumVisited = 0; - for (MachineInstr &OtherMI : make_range(std::next(OldPos), KillPos)) { - // Debug instructions cannot be counted against the limit. - if (OtherMI.isDebugInstr()) - continue; - if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost. - return false; - ++NumVisited; - for (unsigned i = 0, e = OtherMI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = OtherMI.getOperand(i); - if (!MO.isReg()) - continue; - Register MOReg = MO.getReg(); - if (!MOReg) - continue; - if (DefReg == MOReg) - return false; - - if (MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS))) { - if (&OtherMI == KillMI && MOReg == SavedReg) - // Save the operand that kills the register. We want to unset the kill - // marker if we can sink MI past it. - KillMO = &MO; - else if (UseRegs.count(MOReg)) - // One of the uses is killed before the destination. - return false; - } - } - } - assert(KillMO && "Didn't find kill"); - - if (!LIS) { - // Update kill and LV information. - KillMO->setIsKill(false); - KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); - KillMO->setIsKill(true); - - if (LV) - LV->replaceKillInstruction(SavedReg, *KillMI, *MI); - } - - // Move instruction to its destination. - MBB->remove(MI); - MBB->insert(KillPos, MI); - - if (LIS) - LIS->handleMove(*MI); - - ++Num3AddrSunk; - return true; -} - /// Return the MachineInstr* if it is the single def of the Reg in current BB. static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB, const MachineRegisterInfo *MRI) { @@ -740,26 +602,15 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); - bool Sunk = false; if (LIS) LIS->ReplaceMachineInstrInMaps(*mi, *NewMI); - if (NewMI->findRegisterUseOperand(RegB, false, TRI)) - // FIXME: Temporary workaround. If the new instruction doesn't - // uses RegB, convertToThreeAddress must have created more - // then one instruction. - Sunk = sink3AddrInstruction(NewMI, RegB, mi); - MBB->erase(mi); // Nuke the old inst. - if (!Sunk) { - DistanceMap.insert(std::make_pair(NewMI, Dist)); - mi = NewMI; - nmi = std::next(mi); - } - else - SunkInstrs.insert(NewMI); + DistanceMap.insert(std::make_pair(NewMI, Dist)); + mi = NewMI; + nmi = std::next(mi); // Update source and destination register maps. SrcRegMap.erase(RegA); @@ -1700,13 +1551,11 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { SrcRegMap.clear(); DstRegMap.clear(); Processed.clear(); - SunkInstrs.clear(); for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end(); mi != me; ) { MachineBasicBlock::iterator nmi = std::next(mi); - // Don't revisit an instruction previously converted by target. It may - // contain undef register operands (%noreg), which are not handled. - if (mi->isDebugInstr() || SunkInstrs.count(&*mi)) { + // Skip debug instructions. + if (mi->isDebugInstr()) { mi = nmi; continue; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp index 886fe1dff9769..fa0ceb4bbc01f 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp @@ -104,10 +104,10 @@ DWARFDataExtractor::getEncodedPointer(uint64_t *Offset, uint8_t Encoding, Result = getSigned(Offset, 2); break; case dwarf::DW_EH_PE_sdata4: - Result = getSigned(Offset, 4); + Result = SignExtend64<32>(getRelocatedValue(4, Offset)); break; case dwarf::DW_EH_PE_sdata8: - Result = getSigned(Offset, 8); + Result = getRelocatedValue(8, Offset); break; default: return None; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index 0a1b75592290c..ba7449baaf7f0 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -521,9 +521,9 @@ Error DWARFDebugFrame::parse(DWARFDataExtractor Data) { "parsing FDE data at 0x%" PRIx64 " failed due to missing CIE", StartOffset); - if (auto Val = Data.getEncodedPointer( - &Offset, Cie->getFDEPointerEncoding(), - EHFrameAddress ? EHFrameAddress + Offset : 0)) { + if (auto Val = + Data.getEncodedPointer(&Offset, Cie->getFDEPointerEncoding(), + EHFrameAddress + Offset)) { InitialLocation = *Val; } if (auto Val = Data.getEncodedPointer( diff --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp index d3c1cd5bb88f4..de5e11e084f47 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp @@ -410,6 +410,41 @@ static bool printCompactDWARFExpr(raw_ostream &OS, DWARFExpression::iterator I, S << MRI.getName(*LLVMRegNum); break; } + case dwarf::DW_OP_bregx: { + int DwarfRegNum = Op.getRawOperand(0); + int64_t Offset = Op.getRawOperand(1); + Optional LLVMRegNum = MRI.getLLVMRegNum(DwarfRegNum, false); + if (!LLVMRegNum) { + OS << ""; + return false; + } + raw_svector_ostream S(Stack.emplace_back().String); + S << MRI.getName(*LLVMRegNum); + if (Offset) + S << format("%+" PRId64, Offset); + break; + } + case dwarf::DW_OP_entry_value: + case dwarf::DW_OP_GNU_entry_value: { + // DW_OP_entry_value contains a sub-expression which must be rendered + // separately. + uint64_t SubExprLength = Op.getRawOperand(0); + DWARFExpression::iterator SubExprEnd = I.skipBytes(SubExprLength); + ++I; + raw_svector_ostream S(Stack.emplace_back().String); + S << "entry("; + printCompactDWARFExpr(S, I, SubExprEnd, MRI); + S << ")"; + I = SubExprEnd; + continue; + } + case dwarf::DW_OP_stack_value: { + // The top stack entry should be treated as the actual value of tne + // variable, rather than the address of the variable in memory. + assert(!Stack.empty()); + Stack.back().Kind = PrintedExpr::Value; + break; + } default: if (Opcode >= dwarf::DW_OP_reg0 && Opcode <= dwarf::DW_OP_reg31) { // DW_OP_reg: A register, with the register num implied by the @@ -422,6 +457,19 @@ static bool printCompactDWARFExpr(raw_ostream &OS, DWARFExpression::iterator I, } raw_svector_ostream S(Stack.emplace_back(PrintedExpr::Value).String); S << MRI.getName(*LLVMRegNum); + } else if (Opcode >= dwarf::DW_OP_breg0 && + Opcode <= dwarf::DW_OP_breg31) { + int DwarfRegNum = Opcode - dwarf::DW_OP_breg0; + int64_t Offset = Op.getRawOperand(0); + Optional LLVMRegNum = MRI.getLLVMRegNum(DwarfRegNum, false); + if (!LLVMRegNum) { + OS << ""; + return false; + } + raw_svector_ostream S(Stack.emplace_back().String); + S << MRI.getName(*LLVMRegNum); + if (Offset) + S << format("%+" PRId64, Offset); } else { // If we hit an unknown operand, we don't know its effect on the stack, // so bail out on the whole expression. @@ -435,7 +483,11 @@ static bool printCompactDWARFExpr(raw_ostream &OS, DWARFExpression::iterator I, } assert(Stack.size() == 1 && "expected one value on stack"); - OS << Stack.front().String; + + if (Stack.front().Kind == PrintedExpr::Address) + OS << "[" << Stack.front().String << "]"; + else + OS << Stack.front().String; return true; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp index 5f5f12a390833..2124a49bef606 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp @@ -29,13 +29,13 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data, uint8_t OffsetByteSize = Format == dwarf::DWARF64 ? 8 : 4; uint64_t FullLength = HeaderData.Length + dwarf::getUnitLengthFieldByteSize(Format); - assert(FullLength == length()); if (FullLength < getHeaderSize(Format)) return createStringError(errc::invalid_argument, "%s table at offset 0x%" PRIx64 " has too small length (0x%" PRIx64 ") to contain a complete header", SectionName.data(), HeaderOffset, FullLength); + assert(FullLength == length() && "Inconsistent calculation of length."); uint64_t End = HeaderOffset + FullLength; if (!Data.isValidOffsetForDataOfSize(HeaderOffset, FullLength)) return createStringError(errc::invalid_argument, diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp index 505f03590b6b0..8b078690dea24 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp @@ -11,17 +11,194 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h" +#include "BasicGOTAndStubsBuilder.h" #include "JITLinkGeneric.h" #include "llvm/ExecutionEngine/JITLink/JITLink.h" #include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Endian.h" #define DEBUG_TYPE "jitlink" using namespace llvm; using namespace llvm::jitlink; +using namespace llvm::jitlink::ELF_x86_64_Edges; + +namespace { +class ELF_x86_64_GOTAndStubsBuilder + : public BasicGOTAndStubsBuilder { +public: + static const uint8_t NullGOTEntryContent[8]; + static const uint8_t StubContent[6]; + + ELF_x86_64_GOTAndStubsBuilder(LinkGraph &G) + : BasicGOTAndStubsBuilder(G) {} + + bool isGOTEdge(Edge &E) const { + return E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad; + } + + Symbol &createGOTEntry(Symbol &Target) { + auto &GOTEntryBlock = G.createContentBlock( + getGOTSection(), getGOTEntryBlockContent(), 0, 8, 0); + GOTEntryBlock.addEdge(Pointer64, 0, Target, 0); + return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false); + } + + void fixGOTEdge(Edge &E, Symbol &GOTEntry) { + assert((E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad) && + "Not a GOT edge?"); + // If this is a PCRel32GOT then change it to an ordinary PCRel32. If it is + // a PCRel32GOTLoad then leave it as-is for now. We will use the kind to + // check for GOT optimization opportunities in the + // optimizeMachO_x86_64_GOTAndStubs pass below. + if (E.getKind() == PCRel32GOT) + E.setKind(PCRel32); + + E.setTarget(GOTEntry); + // Leave the edge addend as-is. + } + + bool isExternalBranchEdge(Edge &E) { + return E.getKind() == Branch32 && !E.getTarget().isDefined(); + } + + Symbol &createStub(Symbol &Target) { + auto &StubContentBlock = + G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 1, 0); + // Re-use GOT entries for stub targets. + auto &GOTEntrySymbol = getGOTEntrySymbol(Target); + StubContentBlock.addEdge(PCRel32, 2, GOTEntrySymbol, 0); + return G.addAnonymousSymbol(StubContentBlock, 0, 6, true, false); + } + + void fixExternalBranchEdge(Edge &E, Symbol &Stub) { + assert(E.getKind() == Branch32 && "Not a Branch32 edge?"); + assert(E.getAddend() == 0 && "Branch32 edge has non-zero addend?"); + + // Set the edge kind to Branch32ToStub. We will use this to check for stub + // optimization opportunities in the optimize ELF_x86_64_GOTAndStubs pass + // below. + E.setKind(Branch32ToStub); + E.setTarget(Stub); + } + +private: + Section &getGOTSection() { + if (!GOTSection) + GOTSection = &G.createSection("$__GOT", sys::Memory::MF_READ); + return *GOTSection; + } + + Section &getStubsSection() { + if (!StubsSection) { + auto StubsProt = static_cast( + sys::Memory::MF_READ | sys::Memory::MF_EXEC); + StubsSection = &G.createSection("$__STUBS", StubsProt); + } + return *StubsSection; + } + + StringRef getGOTEntryBlockContent() { + return StringRef(reinterpret_cast(NullGOTEntryContent), + sizeof(NullGOTEntryContent)); + } + + StringRef getStubBlockContent() { + return StringRef(reinterpret_cast(StubContent), + sizeof(StubContent)); + } + + Section *GOTSection = nullptr; + Section *StubsSection = nullptr; +}; +} // namespace + +const uint8_t ELF_x86_64_GOTAndStubsBuilder::NullGOTEntryContent[8] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +const uint8_t ELF_x86_64_GOTAndStubsBuilder::StubContent[6] = { + 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00}; static const char *CommonSectionName = "__common"; +static Error optimizeELF_x86_64_GOTAndStubs(LinkGraph &G) { + LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n"); + + for (auto *B : G.blocks()) + for (auto &E : B->edges()) + if (E.getKind() == PCRel32GOTLoad) { + assert(E.getOffset() >= 3 && "GOT edge occurs too early in block"); + + // Switch the edge kind to PCRel32: Whether we change the edge target + // or not this will be the desired kind. + E.setKind(PCRel32); + + // Optimize GOT references. + auto &GOTBlock = E.getTarget().getBlock(); + assert(GOTBlock.getSize() == G.getPointerSize() && + "GOT entry block should be pointer sized"); + assert(GOTBlock.edges_size() == 1 && + "GOT entry should only have one outgoing edge"); + + auto &GOTTarget = GOTBlock.edges().begin()->getTarget(); + JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset(); + JITTargetAddress TargetAddr = GOTTarget.getAddress(); + + // Check that this is a recognized MOV instruction. + // FIXME: Can we assume this? + constexpr uint8_t MOVQRIPRel[] = {0x48, 0x8b}; + if (strncmp(B->getContent().data() + E.getOffset() - 3, + reinterpret_cast(MOVQRIPRel), 2) != 0) + continue; + + int64_t Displacement = TargetAddr - EdgeAddr + 4; + if (Displacement >= std::numeric_limits::min() && + Displacement <= std::numeric_limits::max()) { + E.setTarget(GOTTarget); + auto *BlockData = reinterpret_cast( + const_cast(B->getContent().data())); + BlockData[E.getOffset() - 2] = 0x8d; + LLVM_DEBUG({ + dbgs() << " Replaced GOT load wih LEA:\n "; + printEdge(dbgs(), *B, E, getELFX86RelocationKindName(E.getKind())); + dbgs() << "\n"; + }); + } + } else if (E.getKind() == Branch32ToStub) { + + // Switch the edge kind to PCRel32: Whether we change the edge target + // or not this will be the desired kind. + E.setKind(Branch32); + + auto &StubBlock = E.getTarget().getBlock(); + assert(StubBlock.getSize() == + sizeof(ELF_x86_64_GOTAndStubsBuilder::StubContent) && + "Stub block should be stub sized"); + assert(StubBlock.edges_size() == 1 && + "Stub block should only have one outgoing edge"); + + auto &GOTBlock = StubBlock.edges().begin()->getTarget().getBlock(); + assert(GOTBlock.getSize() == G.getPointerSize() && + "GOT block should be pointer sized"); + assert(GOTBlock.edges_size() == 1 && + "GOT block should only have one outgoing edge"); + + auto &GOTTarget = GOTBlock.edges().begin()->getTarget(); + JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset(); + JITTargetAddress TargetAddr = GOTTarget.getAddress(); + + int64_t Displacement = TargetAddr - EdgeAddr + 4; + if (Displacement >= std::numeric_limits::min() && + Displacement <= std::numeric_limits::max()) { + E.setTarget(GOTTarget); + LLVM_DEBUG({ + dbgs() << " Replaced stub branch with direct branch:\n "; + printEdge(dbgs(), *B, E, getELFX86RelocationKindName(E.getKind())); + dbgs() << "\n"; + }); + } + } + return Error::success(); +} namespace llvm { namespace jitlink { @@ -35,7 +212,8 @@ class ELFLinkGraphBuilder_x86_64 { // Find a better way using SymbolTable = object::ELFFile::Elf_Shdr; // For now we just assume - std::map JITSymbolTable; + using SymbolMap = std::map; + SymbolMap JITSymbolTable; Section &getCommonSection() { if (!CommonSection) { @@ -51,6 +229,10 @@ class ELFLinkGraphBuilder_x86_64 { switch (Type) { case ELF::R_X86_64_PC32: return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32; + case ELF::R_X86_64_64: + return ELF_x86_64_Edges::ELFX86RelocationKind::Pointer64; + case ELF::R_X86_64_GOTPCREL: + return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32GOTLoad; } return make_error("Unsupported x86-64 relocation:" + formatv("{0:d}", Type)); @@ -101,10 +283,6 @@ class ELFLinkGraphBuilder_x86_64 { for (auto SymRef : *Symbols) { Optional Name; - uint64_t Size = 0; - - // FIXME: Read size. - (void)Size; if (auto NameOrErr = SymRef.getName(*StringTable)) Name = *NameOrErr; @@ -120,7 +298,8 @@ class ELFLinkGraphBuilder_x86_64 { dbgs() << ": value = " << formatv("{0:x16}", SymRef.getValue()) << ", type = " << formatv("{0:x2}", SymRef.getType()) << ", binding = " << SymRef.getBinding() - << ", size =" << Size; + << ", size =" << SymRef.st_size + << ", info =" << SymRef.st_info; dbgs() << "\n"; }); } @@ -147,8 +326,8 @@ class ELFLinkGraphBuilder_x86_64 { uint64_t Flags = SecRef.sh_flags; uint64_t Alignment = SecRef.sh_addralign; const char *Data = nullptr; - // TODO: figure out what it is that has 0 size no name and address - // 0000-0000 + // for now we just use this to skip the "undefined" section, probably need + // to revist if (Size == 0) continue; @@ -229,13 +408,22 @@ class ELFLinkGraphBuilder_x86_64 { dbgs() << "Relocation Type: " << Type << "\n" << "Name: " << Obj.getRelocationTypeName(Type) << "\n"; }); - + auto SymbolIndex = Rela.getSymbol(false); auto Symbol = Obj.getRelocationSymbol(&Rela, &SymTab); if (!Symbol) return Symbol.takeError(); auto BlockToFix = *(JITSection->blocks().begin()); - auto TargetSymbol = JITSymbolTable[(*Symbol)->st_shndx]; + auto *TargetSymbol = JITSymbolTable[SymbolIndex]; + + if (!TargetSymbol) { + return make_error( + "Could not find symbol at given index, did you add it to " + "JITSymbolTable? index: " + + std::to_string((*Symbol)->st_shndx) + + " Size of table: " + std::to_string(JITSymbolTable.size()), + llvm::inconvertibleErrorCode()); + } uint64_t Addend = Rela.r_addend; JITTargetAddress FixupAddress = (*UpdateSection)->sh_addr + Rela.r_offset; @@ -251,8 +439,8 @@ class ELFLinkGraphBuilder_x86_64 { LLVM_DEBUG({ Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol, Addend); - // TODO a mapping of KIND => type then call getRelocationTypeName4 - printEdge(dbgs(), *BlockToFix, GE, StringRef("")); + printEdge(dbgs(), *BlockToFix, GE, + getELFX86RelocationKindName(*Kind)); dbgs() << "\n"; }); BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(), @@ -299,10 +487,12 @@ class ELFLinkGraphBuilder_x86_64 { if (blocks.empty()) return make_error("Section has no block", llvm::inconvertibleErrorCode()); - + int SymbolIndex = -1; for (auto SymRef : *Symbols) { + ++SymbolIndex; auto Type = SymRef.getType(); - if (Type == ELF::STT_NOTYPE || Type == ELF::STT_FILE) + + if (Type == ELF::STT_FILE || SymbolIndex == 0) continue; // these should do it for now // if(Type != ELF::STT_NOTYPE && @@ -324,7 +514,8 @@ class ELFLinkGraphBuilder_x86_64 { bindings = {Linkage::Strong, Scope::Local}; if (SymRef.isDefined() && - (Type == ELF::STT_FUNC || Type == ELF::STT_OBJECT)) { + (Type == ELF::STT_FUNC || Type == ELF::STT_OBJECT || + Type == ELF::STT_SECTION)) { auto DefinedSection = Obj.getSection(SymRef.st_shndx); if (!DefinedSection) @@ -344,13 +535,19 @@ class ELFLinkGraphBuilder_x86_64 { auto B = *bs.begin(); LLVM_DEBUG({ dbgs() << " " << *Name << ": "; }); - + if (SymRef.getType() == ELF::STT_SECTION) + *Name = *sectName; auto &S = G->addDefinedSymbol( *B, SymRef.getValue(), *Name, SymRef.st_size, bindings.first, bindings.second, SymRef.getType() == ELF::STT_FUNC, false); - JITSymbolTable[SymRef.st_shndx] = &S; + JITSymbolTable[SymbolIndex] = &S; + } else if (SymRef.isUndefined() && SymRef.isExternal()) { + auto &S = G->addExternalSymbol(*Name, SymRef.st_size, bindings.first); + JITSymbolTable[SymbolIndex] = &S; } - //TODO: The following has to be implmented. + + // } + // TODO: The following has to be implmented. // leaving commented out to save time for future patchs /* G->addAbsoluteSymbol(*Name, SymRef.getValue(), SymRef.st_size, @@ -360,9 +557,6 @@ class ELFLinkGraphBuilder_x86_64 { G->addCommonSymbol(*Name, Scope::Default, getCommonSection(), 0, 0, SymRef.getValue(), false); } - - - //G->addExternalSymbol(*Name, SymRef.st_size, Linkage::Strong); */ } } @@ -413,7 +607,9 @@ class ELFJITLinker_x86_64 : public JITLinker { : JITLinker(std::move(Ctx), std::move(PassConfig)) {} private: - StringRef getEdgeKindName(Edge::Kind R) const override { return StringRef(); } + StringRef getEdgeKindName(Edge::Kind R) const override { + return getELFX86RelocationKindName(R); + } Expected> buildGraph(MemoryBufferRef ObjBuffer) override { @@ -430,16 +626,21 @@ class ELFJITLinker_x86_64 : public JITLinker { Error applyFixup(Block &B, const Edge &E, char *BlockWorkingMem) const { using namespace ELF_x86_64_Edges; + using namespace llvm::support; char *FixupPtr = BlockWorkingMem + E.getOffset(); JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); switch (E.getKind()) { - - case ELFX86RelocationKind::PCRel32: + case ELFX86RelocationKind::PCRel32: { int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress; - // verify - *(support::little32_t *)FixupPtr = Value; + endian::write32le(FixupPtr, Value); + break; + } + case ELFX86RelocationKind::Pointer64: { + int64_t Value = E.getTarget().getAddress() + E.getAddend(); + endian::write64le(FixupPtr, Value); break; } + } return Error::success(); } }; @@ -454,10 +655,30 @@ void jitLink_ELF_x86_64(std::unique_ptr Ctx) { else Config.PrePrunePasses.push_back(markAllSymbolsLive); + // Add an in-place GOT/Stubs pass. + Config.PostPrunePasses.push_back([](LinkGraph &G) -> Error { + ELF_x86_64_GOTAndStubsBuilder(G).run(); + return Error::success(); + }); + + // Add GOT/Stubs optimizer pass. + Config.PostAllocationPasses.push_back(optimizeELF_x86_64_GOTAndStubs); + if (auto Err = Ctx->modifyPassConfig(TT, Config)) return Ctx->notifyFailed(std::move(Err)); ELFJITLinker_x86_64::link(std::move(Ctx), std::move(Config)); } +StringRef getELFX86RelocationKindName(Edge::Kind R) { + switch (R) { + case PCRel32: + return "PCRel32"; + case Pointer64: + return "Pointer64"; + case PCRel32GOTLoad: + return "PCRel32GOTLoad"; + } + return getGenericEdgeKindName(static_cast(R)); +} } // end namespace jitlink } // end namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp index 463845a5b8cbd..28adf9b3fb718 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp @@ -148,10 +148,11 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { else return ToSymbolOrErr.takeError(); } else { - if (auto ToSymbolOrErr = findSymbolByAddress(FixupValue)) - ToSymbol = &*ToSymbolOrErr; - else - return ToSymbolOrErr.takeError(); + auto ToSymbolSec = findSectionByIndex(UnsignedRI.r_symbolnum - 1); + if (!ToSymbolSec) + return ToSymbolSec.takeError(); + ToSymbol = getSymbolByAddress(ToSymbolSec->Address); + assert(ToSymbol && "No symbol for section"); FixupValue -= ToSymbol->getAddress(); } diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp index a91bc3b6033cf..54d725eac144a 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp @@ -150,10 +150,11 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { else return ToSymbolOrErr.takeError(); } else { - if (auto ToSymbolOrErr = findSymbolByAddress(FixupValue)) - ToSymbol = &*ToSymbolOrErr; - else - return ToSymbolOrErr.takeError(); + auto ToSymbolSec = findSectionByIndex(UnsignedRI.r_symbolnum - 1); + if (!ToSymbolSec) + return ToSymbolSec.takeError(); + ToSymbol = getSymbolByAddress(ToSymbolSec->Address); + assert(ToSymbol && "No symbol for section"); FixupValue -= ToSymbol->getAddress(); } diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt index 473cf9299523c..f9d7924cd5e8c 100644 --- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt @@ -22,9 +22,11 @@ add_llvm_component_library(LLVMOrcJIT OrcV2CBindings.cpp OrcMCJITReplacement.cpp RTDyldObjectLinkingLayer.cpp - ThreadSafeModule.cpp Speculation.cpp SpeculateAnalyses.cpp + TargetProcessControl.cpp + ThreadSafeModule.cpp + TPCIndirectionUtils.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/ExecutionEngine/Orc diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp index ff66955082d85..5e604130d6eab 100644 --- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -17,13 +17,14 @@ namespace llvm { namespace orc { LazyCallThroughManager::LazyCallThroughManager( - ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr, - std::unique_ptr TP) - : ES(ES), ErrorHandlerAddr(ErrorHandlerAddr), TP(std::move(TP)) {} + ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr, TrampolinePool *TP) + : ES(ES), ErrorHandlerAddr(ErrorHandlerAddr), TP(TP) {} Expected LazyCallThroughManager::getCallThroughTrampoline( JITDylib &SourceJD, SymbolStringPtr SymbolName, NotifyResolvedFunction NotifyResolved) { + assert(TP && "TrampolinePool not set"); + std::lock_guard Lock(LCTMMutex); auto Trampoline = TP->getTrampoline(); @@ -74,27 +75,31 @@ void LazyCallThroughManager::resolveTrampolineLandingAddress( if (!Entry) return NotifyLandingResolved(reportCallThroughError(Entry.takeError())); - ES.lookup( - LookupKind::Static, - makeJITDylibSearchOrder(Entry->SourceJD, - JITDylibLookupFlags::MatchAllSymbols), - SymbolLookupSet({Entry->SymbolName}), SymbolState::Ready, - [this, TrampolineAddr, SymbolName = Entry->SymbolName, - NotifyLandingResolved = std::move(NotifyLandingResolved)]( - Expected Result) mutable { - if (Result) { - assert(Result->size() == 1 && "Unexpected result size"); - assert(Result->count(SymbolName) && "Unexpected result value"); - JITTargetAddress LandingAddr = (*Result)[SymbolName].getAddress(); - - if (auto Err = notifyResolved(TrampolineAddr, LandingAddr)) - NotifyLandingResolved(reportCallThroughError(std::move(Err))); - else - NotifyLandingResolved(LandingAddr); - } else - NotifyLandingResolved(reportCallThroughError(Result.takeError())); - }, - NoDependenciesToRegister); + // Declaring SLS and the callback outside of the call to ES.lookup is a + // workaround to fix build failures on AIX and on z/OS platforms. + SymbolLookupSet SLS({Entry->SymbolName}); + auto Callback = [this, TrampolineAddr, SymbolName = Entry->SymbolName, + NotifyLandingResolved = std::move(NotifyLandingResolved)]( + Expected Result) mutable { + if (Result) { + assert(Result->size() == 1 && "Unexpected result size"); + assert(Result->count(SymbolName) && "Unexpected result value"); + JITTargetAddress LandingAddr = (*Result)[SymbolName].getAddress(); + + if (auto Err = notifyResolved(TrampolineAddr, LandingAddr)) + NotifyLandingResolved(reportCallThroughError(std::move(Err))); + else + NotifyLandingResolved(LandingAddr); + } else { + NotifyLandingResolved(reportCallThroughError(Result.takeError())); + } + }; + + ES.lookup(LookupKind::Static, + makeJITDylibSearchOrder(Entry->SourceJD, + JITDylibLookupFlags::MatchAllSymbols), + std::move(SLS), SymbolState::Ready, std::move(Callback), + NoDependenciesToRegister); } Expected> diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp index 8ed23de419d1e..18b3c5e12b1c2 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp @@ -7,13 +7,46 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/Orc/OrcABISupport.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Process.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "orc" + +using namespace llvm; + +template +bool stubAndPointerRangesOk(JITTargetAddress StubBlockAddr, + JITTargetAddress PointerBlockAddr, + unsigned NumStubs) { + constexpr unsigned MaxDisp = ORCABI::StubToPointerMaxDisplacement; + JITTargetAddress FirstStub = StubBlockAddr; + JITTargetAddress LastStub = FirstStub + ((NumStubs - 1) * ORCABI::StubSize); + JITTargetAddress FirstPointer = PointerBlockAddr; + JITTargetAddress LastPointer = + FirstPointer + ((NumStubs - 1) * ORCABI::StubSize); + + if (FirstStub < FirstPointer) { + if (LastStub >= FirstPointer) + return false; // Ranges overlap. + return (FirstPointer - FirstStub <= MaxDisp) && + (LastPointer - LastStub <= MaxDisp); // out-of-range. + } + + if (LastPointer >= FirstStub) + return false; // Ranges overlap. + + return (FirstStub - FirstPointer <= MaxDisp) && + (LastStub - LastPointer <= MaxDisp); +} namespace llvm { namespace orc { -void OrcAArch64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn, - void *CallbackMgr) { +void OrcAArch64::writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) { const uint32_t ResolverCode[] = { // resolver_entry: @@ -48,7 +81,7 @@ void OrcAArch64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn, 0xadbf17e4, // 0x070: stp q4, q5, [sp, #-32]! 0xadbf0fe2, // 0x074: stp q2, q3, [sp, #-32]! 0xadbf07e0, // 0x078: stp q0, q1, [sp, #-32]! - 0x580004e0, // 0x07c: ldr x0, Lcallbackmgr + 0x580004e0, // 0x07c: ldr x0, Lreentry_ctx_ptr 0xaa1e03e1, // 0x080: mov x1, x30 0xd1003021, // 0x084: sub x1, x1, #12 0x58000442, // 0x088: ldr x2, Lreentry_fn_ptr @@ -87,43 +120,47 @@ void OrcAArch64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn, 0xd65f0220, // 0x10c: ret x17 0x01234567, // 0x110: Lreentry_fn_ptr: 0xdeadbeef, // 0x114: .quad 0 - 0x98765432, // 0x118: Lcallbackmgr: + 0x98765432, // 0x118: Lreentry_ctx_ptr: 0xcafef00d // 0x11c: .quad 0 }; const unsigned ReentryFnAddrOffset = 0x110; - const unsigned CallbackMgrAddrOffset = 0x118; + const unsigned ReentryCtxAddrOffset = 0x118; - memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode)); - memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn)); - memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr, - sizeof(CallbackMgr)); + memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr, + sizeof(uint64_t)); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr, + sizeof(uint64_t)); } -void OrcAArch64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr, +void OrcAArch64::writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverAddr, unsigned NumTrampolines) { unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8); - memcpy(TrampolineMem + OffsetToPtr, &ResolverAddr, sizeof(void *)); + memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr, + sizeof(uint64_t)); // OffsetToPtr is actually the offset from the PC for the 2nd instruction, so // subtract 32-bits. OffsetToPtr -= 4; - uint32_t *Trampolines = reinterpret_cast(TrampolineMem); + uint32_t *Trampolines = + reinterpret_cast(TrampolineBlockWorkingMem); for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) { Trampolines[3 * I + 0] = 0xaa1e03f1; // mov x17, x30 Trampolines[3 * I + 1] = 0x58000010 | (OffsetToPtr << 3); // adr x16, Lptr Trampolines[3 * I + 2] = 0xd63f0200; // blr x16 } - } -Error OrcAArch64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, - unsigned MinStubs, - void *InitialPtrVal) { +void OrcAArch64::writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { // Stub format is: // // .section __orc_stubs @@ -144,68 +181,41 @@ Error OrcAArch64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, // // ... - const unsigned StubSize = IndirectStubsInfo::StubSize; - - // Emit at least MinStubs, rounded up to fill the pages allocated. - static const unsigned PageSize = sys::Process::getPageSizeEstimate(); - unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize; - unsigned NumStubs = (NumPages * PageSize) / StubSize; - - // Allocate memory for stubs and pointers in one call. - std::error_code EC; - auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory( - 2 * NumPages * PageSize, nullptr, - sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC)); - - if (EC) - return errorCodeToError(EC); - - // Create separate MemoryBlocks representing the stubs and pointers. - sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize); - sys::MemoryBlock PtrsBlock(static_cast(StubsMem.base()) + - NumPages * PageSize, - NumPages * PageSize); - - // Populate the stubs page stubs and mark it executable. - uint64_t *Stub = reinterpret_cast(StubsBlock.base()); - uint64_t PtrOffsetField = static_cast(NumPages * PageSize) - << 3; + static_assert(StubSize == PointerSize, + "Pointer and stub size must match for algorithm below"); + assert(stubAndPointerRangesOk( + StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && + "PointersBlock is out of range"); + uint64_t PtrDisplacement = + PointersBlockTargetAddress - StubsBlockTargetAddress; + uint64_t *Stub = reinterpret_cast(StubsBlockWorkingMem); + uint64_t PtrOffsetField = PtrDisplacement << 3; for (unsigned I = 0; I < NumStubs; ++I) Stub[I] = 0xd61f020058000010 | PtrOffsetField; - - if (auto EC = sys::Memory::protectMappedMemory( - StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC)) - return errorCodeToError(EC); - - // Initialize all pointers to point at FailureAddress. - void **Ptr = reinterpret_cast(PtrsBlock.base()); - for (unsigned I = 0; I < NumStubs; ++I) - Ptr[I] = InitialPtrVal; - - StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem)); - - return Error::success(); } -void OrcX86_64_Base::writeTrampolines(uint8_t *TrampolineMem, - void *ResolverAddr, - unsigned NumTrampolines) { +void OrcX86_64_Base::writeTrampolines( + char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverAddr, unsigned NumTrampolines) { unsigned OffsetToPtr = NumTrampolines * TrampolineSize; - memcpy(TrampolineMem + OffsetToPtr, &ResolverAddr, sizeof(void *)); + memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr, + sizeof(uint64_t)); - uint64_t *Trampolines = reinterpret_cast(TrampolineMem); + uint64_t *Trampolines = + reinterpret_cast(TrampolineBlockWorkingMem); uint64_t CallIndirPCRel = 0xf1c40000000015ff; for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) Trampolines[I] = CallIndirPCRel | ((OffsetToPtr - 6) << 16); } -Error OrcX86_64_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, - unsigned MinStubs, - void *InitialPtrVal) { +void OrcX86_64_Base::writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { // Stub format is: // // .section __orc_stubs @@ -226,52 +236,28 @@ Error OrcX86_64_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, // // ... - const unsigned StubSize = IndirectStubsInfo::StubSize; - - // Emit at least MinStubs, rounded up to fill the pages allocated. - static const unsigned PageSize = sys::Process::getPageSizeEstimate(); - unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize; - unsigned NumStubs = (NumPages * PageSize) / StubSize; - - // Allocate memory for stubs and pointers in one call. - std::error_code EC; - auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory( - 2 * NumPages * PageSize, nullptr, - sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC)); - - if (EC) - return errorCodeToError(EC); - - // Create separate MemoryBlocks representing the stubs and pointers. - sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize); - sys::MemoryBlock PtrsBlock(static_cast(StubsMem.base()) + - NumPages * PageSize, - NumPages * PageSize); - // Populate the stubs page stubs and mark it executable. - uint64_t *Stub = reinterpret_cast(StubsBlock.base()); - uint64_t PtrOffsetField = static_cast(NumPages * PageSize - 6) - << 16; + static_assert(StubSize == PointerSize, + "Pointer and stub size must match for algorithm below"); + assert(stubAndPointerRangesOk( + StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && + "PointersBlock is out of range"); + uint64_t *Stub = reinterpret_cast(StubsBlockWorkingMem); + uint64_t PtrOffsetField = + (PointersBlockTargetAddress - StubsBlockTargetAddress - 6) << 16; for (unsigned I = 0; I < NumStubs; ++I) Stub[I] = 0xF1C40000000025ff | PtrOffsetField; - - if (auto EC = sys::Memory::protectMappedMemory( - StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC)) - return errorCodeToError(EC); - - // Initialize all pointers to point at FailureAddress. - void **Ptr = reinterpret_cast(PtrsBlock.base()); - for (unsigned I = 0; I < NumStubs; ++I) - Ptr[I] = InitialPtrVal; - - StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem)); - - return Error::success(); } -void OrcX86_64_SysV::writeResolverCode(uint8_t *ResolverMem, - JITReentryFn ReentryFn, - void *CallbackMgr) { +void OrcX86_64_SysV::writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) { + + LLVM_DEBUG({ + dbgs() << "Writing resolver code to " + << formatv("{0:x16}", ResolverTargetAddress) << "\n"; + }); const uint8_t ResolverCode[] = { // resolver_entry: @@ -295,7 +281,7 @@ void OrcX86_64_SysV::writeResolverCode(uint8_t *ResolverMem, 0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp) 0x48, 0xbf, // 0x26: movabsq , %rdi - // 0x28: Callback manager addr. + // 0x28: JIT re-entry ctx addr. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, 0x75, 0x08, // 0x30: movq 8(%rbp), %rsi @@ -325,23 +311,26 @@ void OrcX86_64_SysV::writeResolverCode(uint8_t *ResolverMem, 0x58, // 0x69: popq %rax 0x5d, // 0x6a: popq %rbp 0xc3, // 0x6b: retq - }; + }; const unsigned ReentryFnAddrOffset = 0x3a; - const unsigned CallbackMgrAddrOffset = 0x28; + const unsigned ReentryCtxAddrOffset = 0x28; - memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode)); - memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn)); - memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr, - sizeof(CallbackMgr)); + memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr, + sizeof(uint64_t)); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr, + sizeof(uint64_t)); } -void OrcX86_64_Win32::writeResolverCode(uint8_t *ResolverMem, - JITReentryFn ReentryFn, - void *CallbackMgr) { +void OrcX86_64_Win32::writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) { - // resolverCode is similar to OrcX86_64 with differences specific to windows x64 calling convention: - // arguments go into rcx, rdx and come in reverse order, shadow space allocation on stack + // resolverCode is similar to OrcX86_64 with differences specific to windows + // x64 calling convention: arguments go into rcx, rdx and come in reverse + // order, shadow space allocation on stack const uint8_t ResolverCode[] = { // resolver_entry: 0x55, // 0x00: pushq %rbp @@ -364,7 +353,7 @@ void OrcX86_64_Win32::writeResolverCode(uint8_t *ResolverMem, 0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp) 0x48, 0xb9, // 0x26: movabsq , %rcx - // 0x28: Callback manager addr. + // 0x28: JIT re-entry ctx addr. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8B, 0x55, 0x08, // 0x30: mov rdx, [rbp+0x8] @@ -402,18 +391,23 @@ void OrcX86_64_Win32::writeResolverCode(uint8_t *ResolverMem, 0xc3, // 0x73: retq }; - const unsigned ReentryFnAddrOffset = 0x3a; - const unsigned CallbackMgrAddrOffset = 0x28; + const unsigned ReentryCtxAddrOffset = 0x28; - memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode)); - memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn)); - memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr, - sizeof(CallbackMgr)); + memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr, + sizeof(uint64_t)); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr, + sizeof(uint64_t)); } -void OrcI386::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn, - void *CallbackMgr) { +void OrcI386::writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) { + + assert((ReentryFnAddr >> 32) == 0 && "ReentryFnAddr out of range"); + assert((ReentryCtxAddr >> 32) == 0 && "ReentryCtxAddr out of range"); const uint8_t ResolverCode[] = { // resolver_entry: @@ -451,29 +445,37 @@ void OrcI386::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn, }; const unsigned ReentryFnAddrOffset = 0x2a; - const unsigned CallbackMgrAddrOffset = 0x25; + const unsigned ReentryCtxAddrOffset = 0x25; - memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode)); - memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn)); - memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr, - sizeof(CallbackMgr)); + memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr, + sizeof(uint32_t)); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr, + sizeof(uint32_t)); } -void OrcI386::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr, +void OrcI386::writeTrampolines(char *TrampolineWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverAddr, unsigned NumTrampolines) { + assert((ResolverAddr >> 32) == 0 && "ResolverAddr out of range"); uint64_t CallRelImm = 0xF1C4C400000000e8; - uint64_t Resolver = reinterpret_cast(ResolverAddr); - uint64_t ResolverRel = - Resolver - reinterpret_cast(TrampolineMem) - 5; + uint64_t ResolverRel = ResolverAddr - TrampolineBlockTargetAddress - 5; - uint64_t *Trampolines = reinterpret_cast(TrampolineMem); + uint64_t *Trampolines = reinterpret_cast(TrampolineWorkingMem); for (unsigned I = 0; I < NumTrampolines; ++I, ResolverRel -= TrampolineSize) Trampolines[I] = CallRelImm | (ResolverRel << 8); } -Error OrcI386::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, - unsigned MinStubs, void *InitialPtrVal) { +void OrcI386::writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { + assert((StubsBlockTargetAddress >> 32) == 0 && + "StubsBlockTargetAddress is out of range"); + assert((PointersBlockTargetAddress >> 32) == 0 && + "PointersBlockTargetAddress is out of range"); + // Stub format is: // // .section __orc_stubs @@ -494,51 +496,21 @@ Error OrcI386::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, // // ... - const unsigned StubSize = IndirectStubsInfo::StubSize; + assert(stubAndPointerRangesOk( + StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && + "PointersBlock is out of range"); - // Emit at least MinStubs, rounded up to fill the pages allocated. - static const unsigned PageSize = sys::Process::getPageSizeEstimate(); - unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize; - unsigned NumStubs = (NumPages * PageSize) / StubSize; - - // Allocate memory for stubs and pointers in one call. - std::error_code EC; - auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory( - 2 * NumPages * PageSize, nullptr, - sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC)); - - if (EC) - return errorCodeToError(EC); - - // Create separate MemoryBlocks representing the stubs and pointers. - sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize); - sys::MemoryBlock PtrsBlock(static_cast(StubsMem.base()) + - NumPages * PageSize, - NumPages * PageSize); - - // Populate the stubs page stubs and mark it executable. - uint64_t *Stub = reinterpret_cast(StubsBlock.base()); - uint64_t PtrAddr = reinterpret_cast(PtrsBlock.base()); + uint64_t *Stub = reinterpret_cast(StubsBlockWorkingMem); + uint64_t PtrAddr = PointersBlockTargetAddress; for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 4) Stub[I] = 0xF1C40000000025ff | (PtrAddr << 16); - - if (auto EC = sys::Memory::protectMappedMemory( - StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC)) - return errorCodeToError(EC); - - // Initialize all pointers to point at FailureAddress. - void **Ptr = reinterpret_cast(PtrsBlock.base()); - for (unsigned I = 0; I < NumStubs; ++I) - Ptr[I] = InitialPtrVal; - - StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem)); - - return Error::success(); } -void OrcMips32_Base::writeResolverCode(uint8_t *ResolverMem, - JITReentryFn ReentryFn, - void *CallbackMgr, bool isBigEndian) { +void OrcMips32_Base::writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr, + bool isBigEndian) { const uint32_t ResolverCode[] = { // resolver_entry: @@ -570,9 +542,9 @@ void OrcMips32_Base::writeResolverCode(uint8_t *ResolverMem, 0xafbe0060, // 0x64: sw $fp,96($sp) 0xafbf0064, // 0x68: sw $ra,100($sp) - // Callback manager addr. - 0x00000000, // 0x6c: lui $a0,callbackmgr - 0x00000000, // 0x70: addiu $a0,$a0,callbackmgr + // JIT re-entry ctx addr. + 0x00000000, // 0x6c: lui $a0,ctx + 0x00000000, // 0x70: addiu $a0,$a0,ctx 0x03e02825, // 0x74: move $a1, $ra 0x24a5ffec, // 0x78: addiu $a1,$a1,-20 @@ -614,50 +586,63 @@ void OrcMips32_Base::writeResolverCode(uint8_t *ResolverMem, }; const unsigned ReentryFnAddrOffset = 0x7c; // JIT re-entry fn addr lui - const unsigned CallbackMgrAddrOffset = 0x6c; // Callback manager addr lui + const unsigned ReentryCtxAddrOffset = 0x6c; // JIT re-entry context addr lui const unsigned Offsett = 0xf8; - memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode)); + memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); // Depending on endian return value will be in v0 or v1. uint32_t MoveVxT9 = isBigEndian ? 0x0060c825 : 0x0040c825; - memcpy(ResolverMem + Offsett, &MoveVxT9, sizeof(MoveVxT9)); - - uint64_t CallMgrAddr = reinterpret_cast(CallbackMgr); - uint32_t CallMgrLUi = 0x3c040000 | (((CallMgrAddr + 0x8000) >> 16) & 0xFFFF); - uint32_t CallMgrADDiu = 0x24840000 | ((CallMgrAddr) & 0xFFFF); - memcpy(ResolverMem + CallbackMgrAddrOffset, &CallMgrLUi, sizeof(CallMgrLUi)); - memcpy(ResolverMem + CallbackMgrAddrOffset + 4, &CallMgrADDiu, - sizeof(CallMgrADDiu)); - - uint64_t ReentryAddr = reinterpret_cast(ReentryFn); - uint32_t ReentryLUi = 0x3c190000 | (((ReentryAddr + 0x8000) >> 16) & 0xFFFF); - uint32_t ReentryADDiu = 0x27390000 | ((ReentryAddr) & 0xFFFF); - memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryLUi, sizeof(ReentryLUi)); - memcpy(ResolverMem + ReentryFnAddrOffset + 4, &ReentryADDiu, - sizeof(ReentryADDiu)); + memcpy(ResolverWorkingMem + Offsett, &MoveVxT9, sizeof(MoveVxT9)); + + uint32_t ReentryCtxLUi = + 0x3c040000 | (((ReentryCtxAddr + 0x8000) >> 16) & 0xFFFF); + uint32_t ReentryCtxADDiu = 0x24840000 | ((ReentryCtxAddr)&0xFFFF); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLUi, + sizeof(ReentryCtxLUi)); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 4, &ReentryCtxADDiu, + sizeof(ReentryCtxADDiu)); + + uint32_t ReentryFnLUi = + 0x3c190000 | (((ReentryFnAddr + 0x8000) >> 16) & 0xFFFF); + uint32_t ReentryFnADDiu = 0x27390000 | ((ReentryFnAddr)&0xFFFF); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLUi, + sizeof(ReentryFnLUi)); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 4, &ReentryFnADDiu, + sizeof(ReentryFnADDiu)); } -void OrcMips32_Base::writeTrampolines(uint8_t *TrampolineMem, - void *ResolverAddr, - unsigned NumTrampolines) { +void OrcMips32_Base::writeTrampolines( + char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverAddr, unsigned NumTrampolines) { - uint32_t *Trampolines = reinterpret_cast(TrampolineMem); - uint64_t ResolveAddr = reinterpret_cast(ResolverAddr); - uint32_t RHiAddr = ((ResolveAddr + 0x8000) >> 16); + assert((ResolverAddr >> 32) == 0 && "ResolverAddr out of range"); + + uint32_t *Trampolines = + reinterpret_cast(TrampolineBlockWorkingMem); + uint32_t RHiAddr = ((ResolverAddr + 0x8000) >> 16); for (unsigned I = 0; I < NumTrampolines; ++I) { - Trampolines[5 * I + 0] = 0x03e0c025; // move $t8,$ra - Trampolines[5 * I + 1] = 0x3c190000 | (RHiAddr & 0xFFFF); // lui $t9,resolveAddr - Trampolines[5 * I + 2] = 0x27390000 | (ResolveAddr & 0xFFFF); // addiu $t9,$t9,resolveAddr - Trampolines[5 * I + 3] = 0x0320f809; // jalr $t9 - Trampolines[5 * I + 4] = 0x00000000; // nop + // move $t8,$ra + // lui $t9,ResolverAddr + // addiu $t9,$t9,ResolverAddr + // jalr $t9 + // nop + Trampolines[5 * I + 0] = 0x03e0c025; + Trampolines[5 * I + 1] = 0x3c190000 | (RHiAddr & 0xFFFF); + Trampolines[5 * I + 2] = 0x27390000 | (ResolverAddr & 0xFFFF); + Trampolines[5 * I + 3] = 0x0320f809; + Trampolines[5 * I + 4] = 0x00000000; } } -Error OrcMips32_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, - unsigned MinStubs, - void *InitialPtrVal) { +void OrcMips32_Base::writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { + assert((StubsBlockTargetAddress >> 32) == 0 && + "InitialPtrVal is out of range"); + // Stub format is: // // .section __orc_stubs @@ -678,33 +663,15 @@ Error OrcMips32_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, // ptr2: // .word 0x0 // - // ... - - const unsigned StubSize = IndirectStubsInfo::StubSize; - - // Emit at least MinStubs, rounded up to fill the pages allocated. - static const unsigned PageSize = sys::Process::getPageSizeEstimate(); - unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize; - unsigned NumStubs = (NumPages * PageSize) / StubSize; + // i.. - // Allocate memory for stubs and pointers in one call. - std::error_code EC; - auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory( - 2 * NumPages * PageSize, nullptr, - sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC)); - - if (EC) - return errorCodeToError(EC); - - // Create separate MemoryBlocks representing the stubs and pointers. - sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize); - sys::MemoryBlock PtrsBlock(static_cast(StubsMem.base()) + - NumPages * PageSize, - NumPages * PageSize); + assert(stubAndPointerRangesOk( + StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && + "PointersBlock is out of range"); // Populate the stubs page stubs and mark it executable. - uint32_t *Stub = reinterpret_cast(StubsBlock.base()); - uint64_t PtrAddr = reinterpret_cast(Stub) + NumPages * PageSize; + uint32_t *Stub = reinterpret_cast(StubsBlockWorkingMem); + uint64_t PtrAddr = PointersBlockTargetAddress; for (unsigned I = 0; I < NumStubs; ++I) { uint32_t HiAddr = ((PtrAddr + 0x8000) >> 16); @@ -714,26 +681,15 @@ Error OrcMips32_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, Stub[4 * I + 3] = 0x00000000; // nop PtrAddr += 4; } - - if (auto EC = sys::Memory::protectMappedMemory( - StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC)) - return errorCodeToError(EC); - - // Initialize all pointers to point at FailureAddress. - void **Ptr = reinterpret_cast(PtrsBlock.base()); - for (unsigned I = 0; I < NumStubs; ++I) - Ptr[I] = InitialPtrVal; - - StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem)); - - return Error::success(); } -void OrcMips64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn, - void *CallbackMgr) { +void OrcMips64::writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) { const uint32_t ResolverCode[] = { - //resolver_entry: + //resolver_entry: 0x67bdff30, // 0x00: daddiu $sp,$sp,-208 0xffa20000, // 0x04: sd v0,0(sp) 0xffa30008, // 0x08: sd v1,8(sp) @@ -762,13 +718,13 @@ void OrcMips64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn, 0xffbe00c0, // 0x64: sd fp,192(sp) 0xffbf00c8, // 0x68: sd ra,200(sp) - // Callback manager addr. - 0x00000000, // 0x6c: lui $a0,heighest(callbackmgr) - 0x00000000, // 0x70: daddiu $a0,$a0,heigher(callbackmgr) + // JIT re-entry ctx addr. + 0x00000000, // 0x6c: lui $a0,heighest(ctx) + 0x00000000, // 0x70: daddiu $a0,$a0,heigher(ctx) 0x00000000, // 0x74: dsll $a0,$a0,16 - 0x00000000, // 0x78: daddiu $a0,$a0,hi(callbackmgr) + 0x00000000, // 0x78: daddiu $a0,$a0,hi(ctx) 0x00000000, // 0x7c: dsll $a0,$a0,16 - 0x00000000, // 0x80: daddiu $a0,$a0,lo(callbackmgr) + 0x00000000, // 0x80: daddiu $a0,$a0,lo(ctx) 0x03e02825, // 0x84: move $a1, $ra 0x64a5ffdc, // 0x88: daddiu $a1,$a1,-36 @@ -814,73 +770,73 @@ void OrcMips64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn, }; const unsigned ReentryFnAddrOffset = 0x8c; // JIT re-entry fn addr lui - const unsigned CallbackMgrAddrOffset = 0x6c; // Callback manager addr lui - - memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode)); - - uint64_t CallMgrAddr = reinterpret_cast(CallbackMgr); - - uint32_t CallMgrLUi = - 0x3c040000 | (((CallMgrAddr + 0x800080008000) >> 48) & 0xFFFF); - uint32_t CallMgrDADDiu = - 0x64840000 | (((CallMgrAddr + 0x80008000) >> 32) & 0xFFFF); - uint32_t CallMgrDSLL = 0x00042438; - uint32_t CallMgrDADDiu2 = - 0x64840000 | ((((CallMgrAddr + 0x8000) >> 16) & 0xFFFF)); - uint32_t CallMgrDSLL2 = 0x00042438; - uint32_t CallMgrDADDiu3 = 0x64840000 | ((CallMgrAddr)&0xFFFF); - - memcpy(ResolverMem + CallbackMgrAddrOffset, &CallMgrLUi, sizeof(CallMgrLUi)); - memcpy(ResolverMem + (CallbackMgrAddrOffset + 4), &CallMgrDADDiu, - sizeof(CallMgrDADDiu)); - memcpy(ResolverMem + (CallbackMgrAddrOffset + 8), &CallMgrDSLL, - sizeof(CallMgrDSLL)); - memcpy(ResolverMem + (CallbackMgrAddrOffset + 12), &CallMgrDADDiu2, - sizeof(CallMgrDADDiu2)); - memcpy(ResolverMem + (CallbackMgrAddrOffset + 16), &CallMgrDSLL2, - sizeof(CallMgrDSLL2)); - memcpy(ResolverMem + (CallbackMgrAddrOffset + 20), &CallMgrDADDiu3, - sizeof(CallMgrDADDiu3)); - - uint64_t ReentryAddr = reinterpret_cast(ReentryFn); - - uint32_t ReentryLUi = - 0x3c190000 | (((ReentryAddr + 0x800080008000) >> 48) & 0xFFFF); - - uint32_t ReentryDADDiu = - 0x67390000 | (((ReentryAddr + 0x80008000) >> 32) & 0xFFFF); - - uint32_t ReentryDSLL = 0x0019cc38; - - uint32_t ReentryDADDiu2 = - 0x67390000 | (((ReentryAddr + 0x8000) >> 16) & 0xFFFF); - - uint32_t ReentryDSLL2 = 0x0019cc38; - - uint32_t ReentryDADDiu3 = 0x67390000 | ((ReentryAddr)&0xFFFF); - - memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryLUi, sizeof(ReentryLUi)); - memcpy(ResolverMem + (ReentryFnAddrOffset + 4), &ReentryDADDiu, - sizeof(ReentryDADDiu)); - memcpy(ResolverMem + (ReentryFnAddrOffset + 8), &ReentryDSLL, - sizeof(ReentryDSLL)); - memcpy(ResolverMem + (ReentryFnAddrOffset + 12), &ReentryDADDiu2, - sizeof(ReentryDADDiu2)); - memcpy(ResolverMem + (ReentryFnAddrOffset + 16), &ReentryDSLL2, - sizeof(ReentryDSLL2)); - memcpy(ResolverMem + (ReentryFnAddrOffset + 20), &ReentryDADDiu3, - sizeof(ReentryDADDiu3)); + const unsigned ReentryCtxAddrOffset = 0x6c; // JIT re-entry ctx addr lui + + memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); + + uint32_t ReentryCtxLUi = + 0x3c040000 | (((ReentryCtxAddr + 0x800080008000) >> 48) & 0xFFFF); + uint32_t ReentryCtxDADDiu = + 0x64840000 | (((ReentryCtxAddr + 0x80008000) >> 32) & 0xFFFF); + uint32_t ReentryCtxDSLL = 0x00042438; + uint32_t ReentryCtxDADDiu2 = + 0x64840000 | ((((ReentryCtxAddr + 0x8000) >> 16) & 0xFFFF)); + uint32_t ReentryCtxDSLL2 = 0x00042438; + uint32_t ReentryCtxDADDiu3 = 0x64840000 | ((ReentryCtxAddr)&0xFFFF); + + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLUi, + sizeof(ReentryCtxLUi)); + memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 4), &ReentryCtxDADDiu, + sizeof(ReentryCtxDADDiu)); + memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 8), &ReentryCtxDSLL, + sizeof(ReentryCtxDSLL)); + memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 12), &ReentryCtxDADDiu2, + sizeof(ReentryCtxDADDiu2)); + memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 16), &ReentryCtxDSLL2, + sizeof(ReentryCtxDSLL2)); + memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 20), &ReentryCtxDADDiu3, + sizeof(ReentryCtxDADDiu3)); + + uint32_t ReentryFnLUi = + 0x3c190000 | (((ReentryFnAddr + 0x800080008000) >> 48) & 0xFFFF); + + uint32_t ReentryFnDADDiu = + 0x67390000 | (((ReentryFnAddr + 0x80008000) >> 32) & 0xFFFF); + + uint32_t ReentryFnDSLL = 0x0019cc38; + + uint32_t ReentryFnDADDiu2 = + 0x67390000 | (((ReentryFnAddr + 0x8000) >> 16) & 0xFFFF); + + uint32_t ReentryFnDSLL2 = 0x0019cc38; + + uint32_t ReentryFnDADDiu3 = 0x67390000 | ((ReentryFnAddr)&0xFFFF); + + memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLUi, + sizeof(ReentryFnLUi)); + memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 4), &ReentryFnDADDiu, + sizeof(ReentryFnDADDiu)); + memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 8), &ReentryFnDSLL, + sizeof(ReentryFnDSLL)); + memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 12), &ReentryFnDADDiu2, + sizeof(ReentryFnDADDiu2)); + memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 16), &ReentryFnDSLL2, + sizeof(ReentryFnDSLL2)); + memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 20), &ReentryFnDADDiu3, + sizeof(ReentryFnDADDiu3)); } -void OrcMips64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr, +void OrcMips64::writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverAddr, unsigned NumTrampolines) { - uint32_t *Trampolines = reinterpret_cast(TrampolineMem); - uint64_t ResolveAddr = reinterpret_cast(ResolverAddr); + uint32_t *Trampolines = + reinterpret_cast(TrampolineBlockWorkingMem); - uint64_t HeighestAddr = ((ResolveAddr + 0x800080008000) >> 48); - uint64_t HeigherAddr = ((ResolveAddr + 0x80008000) >> 32); - uint64_t HiAddr = ((ResolveAddr + 0x8000) >> 16); + uint64_t HeighestAddr = ((ResolverAddr + 0x800080008000) >> 48); + uint64_t HeigherAddr = ((ResolverAddr + 0x80008000) >> 32); + uint64_t HiAddr = ((ResolverAddr + 0x8000) >> 16); for (unsigned I = 0; I < NumTrampolines; ++I) { Trampolines[10 * I + 0] = 0x03e0c025; // move $t8,$ra @@ -889,16 +845,17 @@ void OrcMips64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr, Trampolines[10 * I + 3] = 0x0019cc38; // dsll $t9,$t9,16 Trampolines[10 * I + 4] = 0x67390000 | (HiAddr & 0xFFFF); // daddiu $t9,$t9,%hi(ptr) Trampolines[10 * I + 5] = 0x0019cc38; // dsll $t9,$t9,16 - Trampolines[10 * I + 6] = 0x67390000 | (ResolveAddr & 0xFFFF); // daddiu $t9,$t9,%lo(ptr) + Trampolines[10 * I + 6] = + 0x67390000 | (ResolverAddr & 0xFFFF); // daddiu $t9,$t9,%lo(ptr) Trampolines[10 * I + 7] = 0x0320f809; // jalr $t9 Trampolines[10 * I + 8] = 0x00000000; // nop Trampolines[10 * I + 9] = 0x00000000; // nop } } -Error OrcMips64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, - unsigned MinStubs, - void *InitialPtrVal) { +void OrcMips64::writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { // Stub format is: // // .section __orc_stubs @@ -926,31 +883,14 @@ Error OrcMips64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, // .dword 0x0 // // ... - const unsigned StubSize = IndirectStubsInfo::StubSize; - - // Emit at least MinStubs, rounded up to fill the pages allocated. - static const unsigned PageSize = sys::Process::getPageSizeEstimate(); - unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize; - unsigned NumStubs = (NumPages * PageSize) / StubSize; - // Allocate memory for stubs and pointers in one call. - std::error_code EC; - auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory( - 2 * NumPages * PageSize, nullptr, - sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC)); - - if (EC) - return errorCodeToError(EC); - - // Create separate MemoryBlocks representing the stubs and pointers. - sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize); - sys::MemoryBlock PtrsBlock(static_cast(StubsMem.base()) + - NumPages * PageSize, - NumPages * PageSize); + assert(stubAndPointerRangesOk( + StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && + "PointersBlock is out of range"); // Populate the stubs page stubs and mark it executable. - uint32_t *Stub = reinterpret_cast(StubsBlock.base()); - uint64_t PtrAddr = reinterpret_cast(PtrsBlock.base()); + uint32_t *Stub = reinterpret_cast(StubsBlockWorkingMem); + uint64_t PtrAddr = PointersBlockTargetAddress; for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) { uint64_t HeighestAddr = ((PtrAddr + 0x800080008000) >> 48); @@ -965,19 +905,6 @@ Error OrcMips64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, Stub[8 * I + 6] = 0x03200008; // jr $t9 Stub[8 * I + 7] = 0x00000000; // nop } - - if (auto EC = sys::Memory::protectMappedMemory( - StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC)) - return errorCodeToError(EC); - - // Initialize all pointers to point at FailureAddress. - void **Ptr = reinterpret_cast(PtrsBlock.base()); - for (unsigned I = 0; I < NumStubs; ++I) - Ptr[I] = InitialPtrVal; - - StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem)); - - return Error::success(); } } // End namespace orc. } // End namespace llvm. diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp index 21925726072e3..7888c2fcbdbd9 100644 --- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp @@ -18,7 +18,7 @@ class JITDylibSearchOrderResolver : public JITSymbolResolver { public: JITDylibSearchOrderResolver(MaterializationResponsibility &MR) : MR(MR) {} - void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) { + void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) override { auto &ES = MR.getTargetJITDylib().getExecutionSession(); SymbolLookupSet InternedSymbols; @@ -55,7 +55,7 @@ class JITDylibSearchOrderResolver : public JITSymbolResolver { RegisterDependencies); } - Expected getResponsibilitySet(const LookupSet &Symbols) { + Expected getResponsibilitySet(const LookupSet &Symbols) override { LookupSet Result; for (auto &KV : MR.getSymbols()) { diff --git a/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp new file mode 100644 index 0000000000000..150040cd11e5d --- /dev/null +++ b/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp @@ -0,0 +1,425 @@ +//===------ TargetProcessControl.cpp -- Target process control APIs -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h" + +#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" +#include "llvm/Support/MathExtras.h" + +using namespace llvm; +using namespace llvm::orc; + +namespace llvm { +namespace orc { + +class TPCIndirectionUtilsAccess { +public: + using IndirectStubInfo = TPCIndirectionUtils::IndirectStubInfo; + using IndirectStubInfoVector = TPCIndirectionUtils::IndirectStubInfoVector; + + static Expected + getIndirectStubs(TPCIndirectionUtils &TPCIU, unsigned NumStubs) { + return TPCIU.getIndirectStubs(NumStubs); + }; +}; + +} // end namespace orc +} // end namespace llvm + +namespace { + +class TPCTrampolinePool : public TrampolinePool { +public: + TPCTrampolinePool(TPCIndirectionUtils &TPCIU); + Error deallocatePool(); + Expected getTrampoline() override; + void releaseTrampoline(JITTargetAddress TrampolineAddr); + +protected: + Error grow(); + + using Allocation = jitlink::JITLinkMemoryManager::Allocation; + + std::mutex TPMutex; + TPCIndirectionUtils &TPCIU; + unsigned TrampolineSize = 0; + unsigned TrampolinesPerPage = 0; + std::vector> TrampolineBlocks; + std::vector AvailableTrampolines; +}; + +class TPCIndirectStubsManager : public IndirectStubsManager, + private TPCIndirectionUtilsAccess { +public: + TPCIndirectStubsManager(TPCIndirectionUtils &TPCIU) : TPCIU(TPCIU) {} + + Error deallocateStubs(); + + Error createStub(StringRef StubName, JITTargetAddress StubAddr, + JITSymbolFlags StubFlags) override; + + Error createStubs(const StubInitsMap &StubInits) override; + + JITEvaluatedSymbol findStub(StringRef Name, bool ExportedStubsOnly) override; + + JITEvaluatedSymbol findPointer(StringRef Name) override; + + Error updatePointer(StringRef Name, JITTargetAddress NewAddr) override; + +private: + using StubInfo = std::pair; + + std::mutex ISMMutex; + TPCIndirectionUtils &TPCIU; + StringMap StubInfos; +}; + +TPCTrampolinePool::TPCTrampolinePool(TPCIndirectionUtils &TPCIU) + : TPCIU(TPCIU) { + auto &TPC = TPCIU.getTargetProcessControl(); + auto &ABI = TPCIU.getABISupport(); + + TrampolineSize = ABI.getTrampolineSize(); + TrampolinesPerPage = + (TPC.getPageSize() - ABI.getPointerSize()) / TrampolineSize; +} + +Error TPCTrampolinePool::deallocatePool() { + Error Err = Error::success(); + for (auto &Alloc : TrampolineBlocks) + Err = joinErrors(std::move(Err), Alloc->deallocate()); + return Err; +} + +Expected TPCTrampolinePool::getTrampoline() { + std::lock_guard Lock(TPMutex); + if (AvailableTrampolines.empty()) { + if (auto Err = grow()) + return std::move(Err); + } + + assert(!AvailableTrampolines.empty() && "Failed to grow trampoline pool"); + auto TrampolineAddr = AvailableTrampolines.back(); + AvailableTrampolines.pop_back(); + return TrampolineAddr; +} + +void TPCTrampolinePool::releaseTrampoline(JITTargetAddress TrampolineAddr) { + std::lock_guard Lock(TPMutex); + AvailableTrampolines.push_back(TrampolineAddr); +} + +Error TPCTrampolinePool::grow() { + assert(this->AvailableTrampolines.empty() && + "Grow called with trampolines still available"); + + auto ResolverAddress = TPCIU.getResolverBlockAddress(); + assert(ResolverAddress && "Resolver address can not be null"); + + auto &TPC = TPCIU.getTargetProcessControl(); + constexpr auto TrampolinePagePermissions = + static_cast(sys::Memory::MF_READ | + sys::Memory::MF_EXEC); + auto PageSize = TPC.getPageSize(); + jitlink::JITLinkMemoryManager::SegmentsRequestMap Request; + Request[TrampolinePagePermissions] = {PageSize, static_cast(PageSize), + 0}; + auto Alloc = TPC.getMemMgr().allocate(Request); + + if (!Alloc) + return Alloc.takeError(); + + unsigned NumTrampolines = TrampolinesPerPage; + + auto WorkingMemory = (*Alloc)->getWorkingMemory(TrampolinePagePermissions); + auto TargetAddress = (*Alloc)->getTargetMemory(TrampolinePagePermissions); + + TPCIU.getABISupport().writeTrampolines(WorkingMemory.data(), TargetAddress, + ResolverAddress, NumTrampolines); + + auto TargetAddr = (*Alloc)->getTargetMemory(TrampolinePagePermissions); + for (unsigned I = 0; I < NumTrampolines; ++I) + this->AvailableTrampolines.push_back(TargetAddr + (I * TrampolineSize)); + + if (auto Err = (*Alloc)->finalize()) + return Err; + + TrampolineBlocks.push_back(std::move(*Alloc)); + + return Error::success(); +} + +Error TPCIndirectStubsManager::createStub(StringRef StubName, + JITTargetAddress StubAddr, + JITSymbolFlags StubFlags) { + StubInitsMap SIM; + SIM[StubName] = std::make_pair(StubAddr, StubFlags); + return createStubs(SIM); +} + +Error TPCIndirectStubsManager::createStubs(const StubInitsMap &StubInits) { + auto AvailableStubInfos = getIndirectStubs(TPCIU, StubInits.size()); + if (!AvailableStubInfos) + return AvailableStubInfos.takeError(); + + { + std::lock_guard Lock(ISMMutex); + unsigned ASIdx = 0; + for (auto &SI : StubInits) { + auto &A = (*AvailableStubInfos)[ASIdx++]; + StubInfos[SI.first()] = std::make_pair(A, SI.second.second); + } + } + + auto &MemAccess = TPCIU.getTargetProcessControl().getMemoryAccess(); + switch (TPCIU.getABISupport().getPointerSize()) { + case 4: { + unsigned ASIdx = 0; + std::vector PtrUpdates; + for (auto &SI : StubInits) + PtrUpdates.push_back({(*AvailableStubInfos)[ASIdx++].PointerAddress, + static_cast(SI.second.first)}); + return MemAccess.writeUInt32s(PtrUpdates); + } + case 8: { + unsigned ASIdx = 0; + std::vector PtrUpdates; + for (auto &SI : StubInits) + PtrUpdates.push_back({(*AvailableStubInfos)[ASIdx++].PointerAddress, + static_cast(SI.second.first)}); + return MemAccess.writeUInt64s(PtrUpdates); + } + default: + return make_error("Unsupported pointer size", + inconvertibleErrorCode()); + } +} + +JITEvaluatedSymbol TPCIndirectStubsManager::findStub(StringRef Name, + bool ExportedStubsOnly) { + std::lock_guard Lock(ISMMutex); + auto I = StubInfos.find(Name); + if (I == StubInfos.end()) + return nullptr; + return {I->second.first.StubAddress, I->second.second}; +} + +JITEvaluatedSymbol TPCIndirectStubsManager::findPointer(StringRef Name) { + std::lock_guard Lock(ISMMutex); + auto I = StubInfos.find(Name); + if (I == StubInfos.end()) + return nullptr; + return {I->second.first.PointerAddress, I->second.second}; +} + +Error TPCIndirectStubsManager::updatePointer(StringRef Name, + JITTargetAddress NewAddr) { + + JITTargetAddress PtrAddr = 0; + { + std::lock_guard Lock(ISMMutex); + auto I = StubInfos.find(Name); + if (I == StubInfos.end()) + return make_error("Unknown stub name", + inconvertibleErrorCode()); + PtrAddr = I->second.first.PointerAddress; + } + + auto &MemAccess = TPCIU.getTargetProcessControl().getMemoryAccess(); + switch (TPCIU.getABISupport().getPointerSize()) { + case 4: { + TargetProcessControl::MemoryAccess::UInt32Write PUpdate(PtrAddr, NewAddr); + return MemAccess.writeUInt32s(PUpdate); + } + case 8: { + TargetProcessControl::MemoryAccess::UInt64Write PUpdate(PtrAddr, NewAddr); + return MemAccess.writeUInt64s(PUpdate); + } + default: + return make_error("Unsupported pointer size", + inconvertibleErrorCode()); + } +} + +} // end anonymous namespace. + +namespace llvm { +namespace orc { + +TPCIndirectionUtils::ABISupport::~ABISupport() {} + +Expected> +TPCIndirectionUtils::Create(TargetProcessControl &TPC) { + const auto &TT = TPC.getTargetTriple(); + switch (TT.getArch()) { + default: + return make_error( + std::string("No TPCIndirectionUtils available for ") + TT.str(), + inconvertibleErrorCode()); + case Triple::aarch64: + case Triple::aarch64_32: + return CreateWithABI(TPC); + + case Triple::x86: + return CreateWithABI(TPC); + + case Triple::mips: + return CreateWithABI(TPC); + + case Triple::mipsel: + return CreateWithABI(TPC); + + case Triple::mips64: + case Triple::mips64el: + return CreateWithABI(TPC); + + case Triple::x86_64: + if (TT.getOS() == Triple::OSType::Win32) + return CreateWithABI(TPC); + else + return CreateWithABI(TPC); + } +} + +Error TPCIndirectionUtils::cleanup() { + Error Err = Error::success(); + + for (auto &A : IndirectStubAllocs) + Err = joinErrors(std::move(Err), A->deallocate()); + + if (TP) + Err = joinErrors(std::move(Err), + static_cast(*TP).deallocatePool()); + + if (ResolverBlock) + Err = joinErrors(std::move(Err), ResolverBlock->deallocate()); + + return Err; +} + +Expected +TPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) { + assert(ABI && "ABI can not be null"); + constexpr auto ResolverBlockPermissions = + static_cast(sys::Memory::MF_READ | + sys::Memory::MF_EXEC); + auto ResolverSize = ABI->getResolverCodeSize(); + + jitlink::JITLinkMemoryManager::SegmentsRequestMap Request; + Request[ResolverBlockPermissions] = {TPC.getPageSize(), + static_cast(ResolverSize), 0}; + auto Alloc = TPC.getMemMgr().allocate(Request); + if (!Alloc) + return Alloc.takeError(); + + auto WorkingMemory = (*Alloc)->getWorkingMemory(ResolverBlockPermissions); + auto TargetAddress = (*Alloc)->getTargetMemory(ResolverBlockPermissions); + ABI->writeResolverCode(WorkingMemory.data(), TargetAddress, ReentryFnAddr, + ReentryCtxAddr); + + if (auto Err = (*Alloc)->finalize()) + return std::move(Err); + + ResolverBlock = std::move(*Alloc); + ResolverBlockAddr = ResolverBlock->getTargetMemory(ResolverBlockPermissions); + return ResolverBlockAddr; +} + +std::unique_ptr +TPCIndirectionUtils::createIndirectStubsManager() { + return std::make_unique(*this); +} + +TrampolinePool &TPCIndirectionUtils::getTrampolinePool() { + if (!TP) + TP = std::make_unique(*this); + return *TP; +} + +LazyCallThroughManager &TPCIndirectionUtils::createLazyCallThroughManager( + ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr) { + assert(!LCTM && + "createLazyCallThroughManager can not have been called before"); + LCTM = std::make_unique(ES, ErrorHandlerAddr, + &getTrampolinePool()); + return *LCTM; +} + +TPCIndirectionUtils::TPCIndirectionUtils(TargetProcessControl &TPC, + std::unique_ptr ABI) + : TPC(TPC), ABI(std::move(ABI)) { + assert(this->ABI && "ABI can not be null"); + + assert(TPC.getPageSize() > getABISupport().getStubSize() && + "Stubs larger than one page are not supported"); +} + +Expected +TPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) { + + std::lock_guard Lock(TPCUIMutex); + + // If there aren't enough stubs available then allocate some more. + if (NumStubs > AvailableIndirectStubs.size()) { + auto NumStubsToAllocate = NumStubs; + auto PageSize = TPC.getPageSize(); + auto StubBytes = alignTo(NumStubsToAllocate * ABI->getStubSize(), PageSize); + NumStubsToAllocate = StubBytes / ABI->getStubSize(); + auto PointerBytes = + alignTo(NumStubsToAllocate * ABI->getPointerSize(), PageSize); + + constexpr auto StubPagePermissions = + static_cast(sys::Memory::MF_READ | + sys::Memory::MF_EXEC); + constexpr auto PointerPagePermissions = + static_cast(sys::Memory::MF_READ | + sys::Memory::MF_WRITE); + + jitlink::JITLinkMemoryManager::SegmentsRequestMap Request; + Request[StubPagePermissions] = {PageSize, static_cast(StubBytes), + 0}; + Request[PointerPagePermissions] = {PageSize, 0, PointerBytes}; + auto Alloc = TPC.getMemMgr().allocate(Request); + if (!Alloc) + return Alloc.takeError(); + + auto StubTargetAddr = (*Alloc)->getTargetMemory(StubPagePermissions); + auto PointerTargetAddr = (*Alloc)->getTargetMemory(PointerPagePermissions); + + ABI->writeIndirectStubsBlock( + (*Alloc)->getWorkingMemory(StubPagePermissions).data(), StubTargetAddr, + PointerTargetAddr, NumStubsToAllocate); + + if (auto Err = (*Alloc)->finalize()) + return std::move(Err); + + for (unsigned I = 0; I != NumStubsToAllocate; ++I) { + AvailableIndirectStubs.push_back( + IndirectStubInfo(StubTargetAddr, PointerTargetAddr)); + StubTargetAddr += ABI->getStubSize(); + PointerTargetAddr += ABI->getPointerSize(); + } + + IndirectStubAllocs.push_back(std::move(*Alloc)); + } + + assert(NumStubs <= AvailableIndirectStubs.size() && + "Sufficient stubs should have been allocated above"); + + IndirectStubInfoVector Result; + while (NumStubs--) { + Result.push_back(AvailableIndirectStubs.back()); + AvailableIndirectStubs.pop_back(); + } + + return std::move(Result); +} + +} // end namespace orc +} // end namespace llvm diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp new file mode 100644 index 0000000000000..833b597fe712a --- /dev/null +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp @@ -0,0 +1,79 @@ +//===------ TargetProcessControl.cpp -- Target process control APIs -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/Process.h" + +#include + +namespace llvm { +namespace orc { + +TargetProcessControl::MemoryAccess::~MemoryAccess() {} + +TargetProcessControl::TargetProcessControl(Triple TT, unsigned PageSize) + : TT(std::move(TT)), PageSize(PageSize) {} + +TargetProcessControl::~TargetProcessControl() {} + +SelfTargetProcessControl::SelfTargetProcessControl(Triple TT, unsigned PageSize) + : TargetProcessControl(std::move(TT), PageSize) { + this->MemMgr = IPMM.get(); + this->MemAccess = this; +} + +Expected> +SelfTargetProcessControl::Create() { + auto PageSize = sys::Process::getPageSize(); + if (!PageSize) + return PageSize.takeError(); + + Triple TT(sys::getProcessTriple()); + + return std::make_unique(std::move(TT), *PageSize); +} + +void SelfTargetProcessControl::writeUInt8s(ArrayRef Ws, + WriteResultFn OnWriteComplete) { + for (auto &W : Ws) + *jitTargetAddressToPointer(W.Address) = W.Value; + OnWriteComplete(Error::success()); +} + +void SelfTargetProcessControl::writeUInt16s(ArrayRef Ws, + WriteResultFn OnWriteComplete) { + for (auto &W : Ws) + *jitTargetAddressToPointer(W.Address) = W.Value; + OnWriteComplete(Error::success()); +} + +void SelfTargetProcessControl::writeUInt32s(ArrayRef Ws, + WriteResultFn OnWriteComplete) { + for (auto &W : Ws) + *jitTargetAddressToPointer(W.Address) = W.Value; + OnWriteComplete(Error::success()); +} + +void SelfTargetProcessControl::writeUInt64s(ArrayRef Ws, + WriteResultFn OnWriteComplete) { + for (auto &W : Ws) + *jitTargetAddressToPointer(W.Address) = W.Value; + OnWriteComplete(Error::success()); +} + +void SelfTargetProcessControl::writeBuffers(ArrayRef Ws, + WriteResultFn OnWriteComplete) { + for (auto &W : Ws) + memcpy(jitTargetAddressToPointer(W.Address), W.Buffer.data(), + W.Buffer.size()); + OnWriteComplete(Error::success()); +} + +} // end namespace orc +} // end namespace llvm diff --git a/llvm/lib/Frontend/CMakeLists.txt b/llvm/lib/Frontend/CMakeLists.txt index 9730c8414edff..ea66917b8936a 100644 --- a/llvm/lib/Frontend/CMakeLists.txt +++ b/llvm/lib/Frontend/CMakeLists.txt @@ -1 +1,2 @@ +add_subdirectory(OpenACC) add_subdirectory(OpenMP) diff --git a/llvm/lib/Frontend/OpenACC/CMakeLists.txt b/llvm/lib/Frontend/OpenACC/CMakeLists.txt new file mode 100644 index 0000000000000..ba340ab9c5619 --- /dev/null +++ b/llvm/lib/Frontend/OpenACC/CMakeLists.txt @@ -0,0 +1,18 @@ +set(LLVM_TARGET_DEFINITIONS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend/OpenACC/ACC.td) +tablegen(LLVM ACC.cpp --gen-directive-impl) +add_public_tablegen_target(acc_cpp) + +add_llvm_component_library(LLVMFrontendOpenACC + ACC.cpp # Generated by tablegen above + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend/OpenACC + + DEPENDS + acc_gen + acc_cpp +) + +target_link_libraries(LLVMFrontendOpenACC LLVMSupport) + diff --git a/llvm/lib/Frontend/OpenMP/CMakeLists.txt b/llvm/lib/Frontend/OpenMP/CMakeLists.txt index f88e3ed986623..068283fd82e07 100644 --- a/llvm/lib/Frontend/OpenMP/CMakeLists.txt +++ b/llvm/lib/Frontend/OpenMP/CMakeLists.txt @@ -15,4 +15,4 @@ add_llvm_component_library(LLVMFrontendOpenMP intrinsics_gen omp_gen omp_cpp - ) \ No newline at end of file + ) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index b7212edab6ab2..9468a3aa3c8dd 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -127,13 +127,16 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) { void OpenMPIRBuilder::initialize() { initializeTypes(M); } void OpenMPIRBuilder::finalize() { + SmallPtrSet ParallelRegionBlockSet; + SmallVector Blocks; for (OutlineInfo &OI : OutlineInfos) { - assert(!OI.Blocks.empty() && - "Outlined regions should have at least a single block!"); - BasicBlock *RegEntryBB = OI.Blocks.front(); - Function *OuterFn = RegEntryBB->getParent(); + ParallelRegionBlockSet.clear(); + Blocks.clear(); + OI.collectBlocks(ParallelRegionBlockSet, Blocks); + + Function *OuterFn = OI.EntryBB->getParent(); CodeExtractorAnalysisCache CEAC(*OuterFn); - CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr, + CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, /* AggregateArgs */ false, /* BlockFrequencyInfo */ nullptr, /* BranchProbabilityInfo */ nullptr, @@ -143,6 +146,8 @@ void OpenMPIRBuilder::finalize() { /* Suffix */ ".omp_par"); LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n"); + LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName() + << " Exit: " << OI.ExitBB->getName() << "\n"); assert(Extractor.isEligible() && "Expected OpenMP outlining to be possible!"); @@ -162,12 +167,12 @@ void OpenMPIRBuilder::finalize() { // made our own entry block after all. { BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); - assert(ArtificialEntry.getUniqueSuccessor() == RegEntryBB); - assert(RegEntryBB->getUniquePredecessor() == &ArtificialEntry); - RegEntryBB->moveBefore(&ArtificialEntry); + assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB); + assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry); + OI.EntryBB->moveBefore(&ArtificialEntry); ArtificialEntry.eraseFromParent(); } - assert(&OutlinedFn->getEntryBlock() == RegEntryBB); + assert(&OutlinedFn->getEntryBlock() == OI.EntryBB); assert(OutlinedFn && OutlinedFn->getNumUses() == 1); // Run a user callback, e.g. to add attributes. @@ -614,20 +619,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); FiniCB(PreFiniIP); - SmallPtrSet ParallelRegionBlockSet; - SmallVector Worklist; - ParallelRegionBlockSet.insert(PRegEntryBB); - ParallelRegionBlockSet.insert(PRegExitBB); + OI.EntryBB = PRegEntryBB; + OI.ExitBB = PRegExitBB; - // Collect all blocks in-between PRegEntryBB and PRegExitBB. - Worklist.push_back(PRegEntryBB); - while (!Worklist.empty()) { - BasicBlock *BB = Worklist.pop_back_val(); - OI.Blocks.push_back(BB); - for (BasicBlock *SuccBB : successors(BB)) - if (ParallelRegionBlockSet.insert(SuccBB).second) - Worklist.push_back(SuccBB); - } + SmallPtrSet ParallelRegionBlockSet; + SmallVector Blocks; + OI.collectBlocks(ParallelRegionBlockSet, Blocks); // Ensure a single exit node for the outlined region by creating one. // We might have multiple incoming edges to the exit now due to finalizations, @@ -635,10 +632,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( BasicBlock *PRegOutlinedExitBB = PRegExitBB; PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt()); PRegOutlinedExitBB->setName("omp.par.outlined.exit"); - OI.Blocks.push_back(PRegOutlinedExitBB); + Blocks.push_back(PRegOutlinedExitBB); CodeExtractorAnalysisCache CEAC(*OuterFn); - CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr, + CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, /* AggregateArgs */ false, /* BlockFrequencyInfo */ nullptr, /* BranchProbabilityInfo */ nullptr, @@ -694,7 +691,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n"); LLVM_DEBUG({ - for (auto *BB : OI.Blocks) + for (auto *BB : Blocks) dbgs() << " PBR: " << BB->getName() << "\n"; }); @@ -1112,3 +1109,20 @@ void OpenMPIRBuilder::initializeTypes(Module &M) { VarName##Ptr = PointerType::getUnqual(T); #include "llvm/Frontend/OpenMP/OMPKinds.def" } + +void OpenMPIRBuilder::OutlineInfo::collectBlocks( + SmallPtrSetImpl &BlockSet, + SmallVectorImpl &BlockVector) { + SmallVector Worklist; + BlockSet.insert(EntryBB); + BlockSet.insert(ExitBB); + + Worklist.push_back(EntryBB); + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + BlockVector.push_back(BB); + for (BasicBlock *SuccBB : successors(BB)) + if (BlockSet.insert(SuccBB).second) + Worklist.push_back(SuccBB); + } +} diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index d408d7a4705b4..fd08310316b3a 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -2861,6 +2861,8 @@ static const char *getWholeProgDevirtResByArgKindName( static const char *getTTResKindName(TypeTestResolution::Kind K) { switch (K) { + case TypeTestResolution::Unknown: + return "unknown"; case TypeTestResolution::Unsat: return "unsat"; case TypeTestResolution::ByteArray: diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index f67d96a854f4d..8bf4e82357c69 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -925,14 +925,13 @@ MaybeAlign AttributeSetNode::getStackAlignment() const { Type *AttributeSetNode::getByValType() const { if (auto A = findEnumAttribute(Attribute::ByVal)) return A->getValueAsType(); - return 0; + return nullptr; } Type *AttributeSetNode::getPreallocatedType() const { - for (const auto &I : *this) - if (I.hasAttribute(Attribute::Preallocated)) - return I.getValueAsType(); - return 0; + if (auto A = findEnumAttribute(Attribute::Preallocated)) + return A->getValueAsType(); + return nullptr; } uint64_t AttributeSetNode::getDereferenceableBytes() const { @@ -970,7 +969,7 @@ std::string AttributeSetNode::getAsString(bool InAttrGrp) const { /// Map from AttributeList index to the internal array index. Adding one happens /// to work, because -1 wraps around to 0. -static constexpr unsigned attrIdxToArrayIdx(unsigned Index) { +static unsigned attrIdxToArrayIdx(unsigned Index) { return Index + 1; } @@ -983,9 +982,7 @@ AttributeListImpl::AttributeListImpl(ArrayRef Sets) // Initialize AvailableFunctionAttrs and AvailableSomewhereAttrs // summary bitsets. - static_assert(attrIdxToArrayIdx(AttributeList::FunctionIndex) == 0U, - "function should be stored in slot 0"); - for (const auto &I : Sets[0]) + for (const auto &I : Sets[attrIdxToArrayIdx(AttributeList::FunctionIndex)]) if (!I.isStringAttribute()) AvailableFunctionAttrs.addAttribute(I.getKindAsEnum()); diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index f02246cda7fc6..f3c3e9ad9f696 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -779,30 +779,10 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond, if (isa(V1)) return V1; return V2; } - + if (isa(V1)) return V2; + if (isa(V2)) return V1; if (V1 == V2) return V1; - // If the true or false value is undef, we can fold to the other value as - // long as the other value isn't poison. - auto NotPoison = [](Constant *C) { - // TODO: We can analyze ConstExpr by opcode to determine if there is any - // possibility of poison. - if (isa(C)) - return false; - - if (isa(C) || isa(C) || isa(C) || - isa(C) || isa(C)) - return true; - - if (C->getType()->isVectorTy()) - return !C->containsUndefElement() && !C->containsConstantExpression(); - - // TODO: Recursively analyze aggregates or other constants. - return false; - }; - if (isa(V1) && NotPoison(V2)) return V2; - if (isa(V2) && NotPoison(V1)) return V1; - if (ConstantExpr *TrueVal = dyn_cast(V1)) { if (TrueVal->getOpcode() == Instruction::Select) if (TrueVal->getOperand(0) == Cond) diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 0ec0cce83a8c5..8db2389ef5428 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/AbstractCallSite.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -120,7 +121,7 @@ bool Argument::hasPreallocatedAttr() const { return hasAttribute(Attribute::Preallocated); } -bool Argument::hasPassPointeeByValueAttr() const { +bool Argument::hasPassPointeeByValueCopyAttr() const { if (!getType()->isPointerTy()) return false; AttributeList Attrs = getParent()->getAttributes(); return Attrs.hasParamAttribute(getArgNo(), Attribute::ByVal) || @@ -1484,12 +1485,21 @@ Optional Intrinsic::remangleIntrinsicFunction(Function *F) { } /// hasAddressTaken - returns true if there are any uses of this function -/// other than direct calls or invokes to it. -bool Function::hasAddressTaken(const User* *PutOffender) const { +/// other than direct calls or invokes to it. Optionally ignores callback +/// uses. +bool Function::hasAddressTaken(const User **PutOffender, + bool IgnoreCallbackUses) const { for (const Use &U : uses()) { const User *FU = U.getUser(); if (isa(FU)) continue; + + if (IgnoreCallbackUses) { + AbstractCallSite ACS(&U); + if (ACS && ACS.isCallbackCall()) + continue; + } + const auto *Call = dyn_cast(FU); if (!Call) { if (PutOffender) diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index f650ad9130ac5..2f17a0d73af40 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -1262,11 +1262,15 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) { } static Align computeAllocaDefaultAlign(Type *Ty, BasicBlock *BB) { + assert(BB && "Insertion BB cannot be null when alignment not provided!"); + assert(BB->getParent() && + "BB must be in a Function when alignment not provided!"); const DataLayout &DL = BB->getModule()->getDataLayout(); return DL.getPrefTypeAlign(Ty); } static Align computeAllocaDefaultAlign(Type *Ty, Instruction *I) { + assert(I && "Insertion position cannot be null when alignment not provided!"); return computeAllocaDefaultAlign(Ty, I->getParent()); } @@ -1342,11 +1346,15 @@ void LoadInst::AssertOK() { } static Align computeLoadStoreDefaultAlign(Type *Ty, BasicBlock *BB) { + assert(BB && "Insertion BB cannot be null when alignment not provided!"); + assert(BB->getParent() && + "BB must be in a Function when alignment not provided!"); const DataLayout &DL = BB->getModule()->getDataLayout(); return DL.getABITypeAlign(Ty); } static Align computeLoadStoreDefaultAlign(Type *Ty, Instruction *I) { + assert(I && "Insertion position cannot be null when alignment not provided!"); return computeLoadStoreDefaultAlign(Ty, I->getParent()); } diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index 4189aea46294c..74869fa62c66f 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -1475,6 +1475,74 @@ void FPPassManager::dumpPassStructure(unsigned Offset) { } } +#ifdef EXPENSIVE_CHECKS +namespace { +namespace details { + +// Basic hashing mechanism to detect structural change to the IR, used to verify +// pass return status consistency with actual change. Loosely copied from +// llvm/lib/Transforms/Utils/FunctionComparator.cpp + +class StructuralHash { + uint64_t Hash = 0x6acaa36bef8325c5ULL; + + void update(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } + +public: + StructuralHash() = default; + + void update(Function &F) { + if (F.empty()) + return; + + update(F.isVarArg()); + update(F.arg_size()); + + SmallVector BBs; + SmallPtrSet VisitedBBs; + + BBs.push_back(&F.getEntryBlock()); + VisitedBBs.insert(BBs[0]); + while (!BBs.empty()) { + const BasicBlock *BB = BBs.pop_back_val(); + update(45798); // Block header + for (auto &Inst : *BB) + update(Inst.getOpcode()); + + const Instruction *Term = BB->getTerminator(); + for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(Term->getSuccessor(i)).second) + continue; + BBs.push_back(Term->getSuccessor(i)); + } + } + } + + void update(Module &M) { + for (Function &F : M) + update(F); + } + + uint64_t getHash() const { return Hash; } +}; + +} // namespace details + +uint64_t StructuralHash(Function &F) { + details::StructuralHash H; + H.update(F); + return H.getHash(); +} + +uint64_t StructuralHash(Module &M) { + details::StructuralHash H; + H.update(M); + return H.getHash(); +} + +} // end anonymous namespace + +#endif /// Execute all of the passes scheduled for execution by invoking /// runOnFunction method. Keep track of whether any of the passes modifies @@ -1513,7 +1581,16 @@ bool FPPassManager::runOnFunction(Function &F) { { PassManagerPrettyStackEntry X(FP, F); TimeRegion PassTimer(getPassTimer(FP)); +#ifdef EXPENSIVE_CHECKS + uint64_t RefHash = StructuralHash(F); +#endif LocalChanged |= FP->runOnFunction(F); + +#ifdef EXPENSIVE_CHECKS + assert((LocalChanged || (RefHash == StructuralHash(F))) && + "Pass modifies its input and doesn't report it."); +#endif + if (EmitICRemark) { unsigned NewSize = F.getInstructionCount(); @@ -1614,7 +1691,17 @@ MPPassManager::runOnModule(Module &M) { PassManagerPrettyStackEntry X(MP, M); TimeRegion PassTimer(getPassTimer(MP)); +#ifdef EXPENSIVE_CHECKS + uint64_t RefHash = StructuralHash(M); +#endif + LocalChanged |= MP->runOnModule(M); + +#ifdef EXPENSIVE_CHECKS + assert((LocalChanged || (RefHash == StructuralHash(M))) && + "Pass modifies its input and doesn't report it."); +#endif + if (EmitICRemark) { // Update the size of the module. unsigned ModuleCount = M.getInstructionCount(); diff --git a/llvm/lib/IR/Mangler.cpp b/llvm/lib/IR/Mangler.cpp index 0d66e321c396b..218c12ba433f8 100644 --- a/llvm/lib/IR/Mangler.cpp +++ b/llvm/lib/IR/Mangler.cpp @@ -100,7 +100,7 @@ static void addByteCountSuffix(raw_ostream &OS, const Function *F, for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI) { // 'Dereference' type in case of byval or inalloca parameter attribute. - uint64_t AllocSize = AI->hasPassPointeeByValueAttr() ? + uint64_t AllocSize = AI->hasPassPointeeByValueCopyAttr() ? AI->getPassPointeeByValueCopySize(DL) : DL.getTypeAllocSize(AI->getType()); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 8fa87b7489013..c518ae87ea9b0 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5006,36 +5006,73 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::matrix_transpose: case Intrinsic::matrix_column_major_load: case Intrinsic::matrix_column_major_store: { + Function *IF = Call.getCalledFunction(); + ConstantInt *Stride = nullptr; ConstantInt *NumRows; ConstantInt *NumColumns; - VectorType *TypeToCheck; + VectorType *ResultTy; + Type *Op0ElemTy = nullptr; + Type *Op1ElemTy = nullptr; switch (ID) { case Intrinsic::matrix_multiply: NumRows = cast(Call.getArgOperand(2)); NumColumns = cast(Call.getArgOperand(4)); - TypeToCheck = cast(Call.getType()); + ResultTy = cast(Call.getType()); + Op0ElemTy = + cast(Call.getArgOperand(0)->getType())->getElementType(); + Op1ElemTy = + cast(Call.getArgOperand(1)->getType())->getElementType(); break; case Intrinsic::matrix_transpose: NumRows = cast(Call.getArgOperand(1)); NumColumns = cast(Call.getArgOperand(2)); - TypeToCheck = cast(Call.getType()); + ResultTy = cast(Call.getType()); + Op0ElemTy = + cast(Call.getArgOperand(0)->getType())->getElementType(); break; case Intrinsic::matrix_column_major_load: + Stride = dyn_cast(Call.getArgOperand(1)); NumRows = cast(Call.getArgOperand(3)); NumColumns = cast(Call.getArgOperand(4)); - TypeToCheck = cast(Call.getType()); + ResultTy = cast(Call.getType()); + Op0ElemTy = + cast(Call.getArgOperand(0)->getType())->getElementType(); break; case Intrinsic::matrix_column_major_store: + Stride = dyn_cast(Call.getArgOperand(2)); NumRows = cast(Call.getArgOperand(4)); NumColumns = cast(Call.getArgOperand(5)); - TypeToCheck = cast(Call.getArgOperand(0)->getType()); + ResultTy = cast(Call.getArgOperand(0)->getType()); + Op0ElemTy = + cast(Call.getArgOperand(0)->getType())->getElementType(); + Op1ElemTy = + cast(Call.getArgOperand(1)->getType())->getElementType(); break; default: llvm_unreachable("unexpected intrinsic"); } - Assert(TypeToCheck->getNumElements() == + + Assert(ResultTy->getElementType()->isIntegerTy() || + ResultTy->getElementType()->isFloatingPointTy(), + "Result type must be an integer or floating-point type!", IF); + + Assert(ResultTy->getElementType() == Op0ElemTy, + "Vector element type mismatch of the result and first operand " + "vector!", IF); + + if (Op1ElemTy) + Assert(ResultTy->getElementType() == Op1ElemTy, + "Vector element type mismatch of the result and second operand " + "vector!", IF); + + Assert(ResultTy->getNumElements() == NumRows->getZExtValue() * NumColumns->getZExtValue(), - "result of a matrix operation does not fit in the returned vector"); + "Result of a matrix operation does not fit in the returned vector!"); + + if (Stride) + Assert(Stride->getZExtValue() >= NumRows->getZExtValue(), + "Stride must be greater or equal than the number of rows!", IF); + break; } }; diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 0747ab2372abe..6a8572e57922c 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -349,9 +349,9 @@ class MCAsmStreamer final : public MCStreamer { void emitBundleLock(bool AlignToEnd) override; void emitBundleUnlock() override; - bool emitRelocDirective(const MCExpr &Offset, StringRef Name, - const MCExpr *Expr, SMLoc Loc, - const MCSubtargetInfo &STI) override; + Optional> + emitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr, + SMLoc Loc, const MCSubtargetInfo &STI) override; void emitAddrsig() override; void emitAddrsigSym(const MCSymbol *Sym) override; @@ -2072,9 +2072,10 @@ void MCAsmStreamer::emitBundleUnlock() { EmitEOL(); } -bool MCAsmStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name, - const MCExpr *Expr, SMLoc, - const MCSubtargetInfo &STI) { +Optional> +MCAsmStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name, + const MCExpr *Expr, SMLoc, + const MCSubtargetInfo &STI) { OS << "\t.reloc "; Offset.print(OS, MAI); OS << ", " << Name; @@ -2083,7 +2084,7 @@ bool MCAsmStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name, Expr->print(OS, MAI); } EmitEOL(); - return false; + return None; } void MCAsmStreamer::emitAddrsig() { diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index 9c564c83b6b5d..e39c4a03bc1ef 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SourceMgr.h" using namespace llvm; @@ -664,12 +665,13 @@ void MCObjectStreamer::emitGPRel64Value(const MCExpr *Value) { DF->getContents().resize(DF->getContents().size() + 8, 0); } -bool MCObjectStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name, - const MCExpr *Expr, SMLoc Loc, - const MCSubtargetInfo &STI) { +Optional> +MCObjectStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name, + const MCExpr *Expr, SMLoc Loc, + const MCSubtargetInfo &STI) { Optional MaybeKind = Assembler->getBackend().getFixupKind(Name); if (!MaybeKind.hasValue()) - return true; + return std::make_pair(true, std::string("unknown relocation name")); MCFixupKind Kind = *MaybeKind; @@ -680,27 +682,33 @@ bool MCObjectStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name, MCDataFragment *DF = getOrCreateDataFragment(&STI); flushPendingLabels(DF, DF->getContents().size()); - int64_t OffsetValue; - if (Offset.evaluateAsAbsolute(OffsetValue)) { - if (OffsetValue < 0) - llvm_unreachable(".reloc offset is negative"); - DF->getFixups().push_back(MCFixup::create(OffsetValue, Expr, Kind, Loc)); - return false; + MCValue OffsetVal; + if (!Offset.evaluateAsRelocatable(OffsetVal, nullptr, nullptr)) + return std::make_pair(false, + std::string(".reloc offset is not relocatable")); + if (OffsetVal.isAbsolute()) { + if (OffsetVal.getConstant() < 0) + return std::make_pair(false, std::string(".reloc offset is negative")); + DF->getFixups().push_back( + MCFixup::create(OffsetVal.getConstant(), Expr, Kind, Loc)); + return None; } + if (OffsetVal.getSymB()) + return std::make_pair(false, + std::string(".reloc offset is not representable")); - if (Offset.getKind() != llvm::MCExpr::SymbolRef) - llvm_unreachable(".reloc offset is not absolute nor a label"); - - const MCSymbolRefExpr &SRE = cast(Offset); + const MCSymbolRefExpr &SRE = cast(*OffsetVal.getSymA()); if (SRE.getSymbol().isDefined()) { - DF->getFixups().push_back(MCFixup::create(SRE.getSymbol().getOffset(), - Expr, Kind, Loc)); - return false; + // FIXME SRE.getSymbol() may not be relative to DF. + DF->getFixups().push_back( + MCFixup::create(SRE.getSymbol().getOffset() + OffsetVal.getConstant(), + Expr, Kind, Loc)); + return None; } PendingFixups.emplace_back(&SRE.getSymbol(), DF, - MCFixup::create(-1, Expr, Kind, Loc)); - return false; + MCFixup::create(-1, Expr, Kind, Loc)); + return None; } void MCObjectStreamer::emitFill(const MCExpr &NumBytes, uint64_t FillValue, diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 9e92ce09986f6..c05f26cbdda5c 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -3011,20 +3011,12 @@ bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) { bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) { const MCExpr *Offset; const MCExpr *Expr = nullptr; - int64_t OffsetValue; SMLoc OffsetLoc = Lexer.getTok().getLoc(); if (parseExpression(Offset)) return true; - - if ((Offset->evaluateAsAbsolute(OffsetValue, - getStreamer().getAssemblerPtr()) && - check(OffsetValue < 0, OffsetLoc, "expression is negative")) || - (check(Offset->getKind() != llvm::MCExpr::Constant && - Offset->getKind() != llvm::MCExpr::SymbolRef, - OffsetLoc, "expected non-negative number or a label")) || - (parseToken(AsmToken::Comma, "expected comma") || - check(getTok().isNot(AsmToken::Identifier), "expected relocation name"))) + if (parseToken(AsmToken::Comma, "expected comma") || + check(getTok().isNot(AsmToken::Identifier), "expected relocation name")) return true; SMLoc NameLoc = Lexer.getTok().getLoc(); @@ -3048,8 +3040,10 @@ bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) { const MCTargetAsmParser &MCT = getTargetParser(); const MCSubtargetInfo &STI = MCT.getSTI(); - if (getStreamer().emitRelocDirective(*Offset, Name, Expr, DirectiveLoc, STI)) - return Error(NameLoc, "unknown relocation name"); + if (Optional> Err = + getStreamer().emitRelocDirective(*Offset, Name, Expr, DirectiveLoc, + STI)) + return Error(Err->first ? NameLoc : OffsetLoc, Err->second); return false; } diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 3dbd00aae47a2..fb3bb11d1f430 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -490,8 +490,10 @@ class MasmParser : public MCAsmParser { bool isParsingMasm() const override { return true; } - bool LookUpFieldOffset(StringRef Base, StringRef Member, - unsigned &Offset) override; + bool lookUpField(StringRef Name, StringRef &Type, + unsigned &Offset) const override; + bool lookUpField(StringRef Base, StringRef Member, StringRef &Type, + unsigned &Offset) const override; bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, @@ -561,8 +563,8 @@ class MasmParser : public MCAsmParser { } static void DiagHandler(const SMDiagnostic &Diag, void *Context); - bool LookUpFieldOffset(const StructInfo &Structure, StringRef Member, - unsigned &Offset); + bool lookUpField(const StructInfo &Structure, StringRef Member, + StringRef &Type, unsigned &Offset) const; /// Should we emit DWARF describing this assembler source? (Returns false if /// the source has .file directives, which means we don't want to generate @@ -792,8 +794,6 @@ class MasmParser : public MCAsmParser { bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents); bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents); - bool emitStructValue(const StructInfo &Structure); - bool emitFieldInitializer(const FieldInfo &Field, const FieldInitializer &Initializer); bool emitFieldInitializer(const FieldInfo &Field, @@ -810,9 +810,6 @@ class MasmParser : public MCAsmParser { const StructInitializer &Initializer); // User-defined types (structs, unions): - bool emitStructValue(const StructInfo &Structure, - const StructInitializer &Initializer, - size_t InitialOffset = 0, size_t InitialField = 0); bool emitStructValues(const StructInfo &Structure); bool addStructField(StringRef Name, const StructInfo &Structure); bool parseDirectiveStructValue(const StructInfo &Structure, @@ -1397,12 +1394,13 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { } // Find the field offset if used. + StringRef Type; unsigned Offset = 0; Split = SymbolName.split('.'); if (!Split.second.empty()) { SymbolName = Split.first; if (Structs.count(SymbolName.lower()) && - !LookUpFieldOffset(SymbolName, Split.second, Offset)) { + !lookUpField(SymbolName, Split.second, Type, Offset)) { // This is actually a reference to a field offset. Res = MCConstantExpr::create(Offset, getContext()); return false; @@ -1410,10 +1408,10 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { auto TypeIt = KnownType.find(SymbolName); if (TypeIt == KnownType.end() || - LookUpFieldOffset(*TypeIt->second, Split.second, Offset)) { + lookUpField(*TypeIt->second, Split.second, Type, Offset)) { std::pair BaseMember = Split.second.split('.'); StringRef Base = BaseMember.first, Member = BaseMember.second; - LookUpFieldOffset(Base, Member, Offset); + lookUpField(Base, Member, Type, Offset); } } @@ -3830,20 +3828,6 @@ bool MasmParser::emitFieldValue(const FieldInfo &Field) { llvm_unreachable("Unhandled FieldType enum"); } -bool MasmParser::emitStructValue(const StructInfo &Structure) { - size_t Offset = 0; - for (const auto &Field : Structure.Fields) { - getStreamer().emitZeros(Field.Offset - Offset); - if (emitFieldValue(Field)) - return true; - Offset = Field.Offset + Field.SizeOf; - } - // Add final padding. - if (Offset != Structure.Size) - getStreamer().emitZeros(Structure.Size - Offset); - return false; -} - bool MasmParser::emitFieldInitializer(const FieldInfo &Field, const IntFieldInfo &Contents, const IntFieldInfo &Initializer) { @@ -4081,11 +4065,8 @@ bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) { return Error(NameLoc, "mismatched name in ENDS directive; expected '" + StructInProgress.back().Name + "'"); StructInfo Structure = StructInProgress.pop_back_val(); - if (Structure.Size % Structure.Alignment != 0) { - // Pad to make the structure's size divisible by its alignment. - Structure.Size += - Structure.Alignment - (Structure.Size % Structure.Alignment); - } + // Pad to make the structure's size divisible by its alignment. + Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment); Structs[Name.lower()] = Structure; if (parseToken(AsmToken::EndOfStatement)) @@ -4104,29 +4085,49 @@ bool MasmParser::parseDirectiveNestedEnds() { return addErrorSuffix(" in nested ENDS directive"); StructInfo Structure = StructInProgress.pop_back_val(); - if (Structure.Size % Structure.Alignment != 0) { - // Pad to make the structure's size divisible by its alignment. - Structure.Size += - Structure.Alignment - (Structure.Size % Structure.Alignment); - } - StructInfo &ParentStruct = StructInProgress.back(); + // Pad to make the structure's size divisible by its alignment. + Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment); - FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT); - StructFieldInfo &StructInfo = Field.Contents.StructInfo; - Field.Type = Structure.Size; - Field.LengthOf = 1; - Field.SizeOf = Structure.Size; + StructInfo &ParentStruct = StructInProgress.back(); + if (Structure.Name.empty()) { + const size_t OldFields = ParentStruct.Fields.size(); + ParentStruct.Fields.insert( + ParentStruct.Fields.end(), + std::make_move_iterator(Structure.Fields.begin()), + std::make_move_iterator(Structure.Fields.end())); + for (const auto &FieldByName : Structure.FieldsByName) { + ParentStruct.FieldsByName[FieldByName.getKey()] = + FieldByName.getValue() + OldFields; + } + if (!ParentStruct.IsUnion) { + for (auto FieldIter = ParentStruct.Fields.begin() + OldFields; + FieldIter != ParentStruct.Fields.end(); ++FieldIter) { + FieldIter->Offset += ParentStruct.Size; + } + } - if (ParentStruct.IsUnion) - ParentStruct.Size = std::max(ParentStruct.Size, Field.SizeOf); - else - ParentStruct.Size += Field.SizeOf; + if (ParentStruct.IsUnion) + ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size); + else + ParentStruct.Size += Structure.Size; + } else { + FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT); + StructFieldInfo &StructInfo = Field.Contents.StructInfo; + Field.Type = Structure.Size; + Field.LengthOf = 1; + Field.SizeOf = Structure.Size; + + if (ParentStruct.IsUnion) + ParentStruct.Size = std::max(ParentStruct.Size, Field.SizeOf); + else + ParentStruct.Size += Field.SizeOf; - StructInfo.Structure = Structure; - StructInfo.Initializers.emplace_back(); - auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers; - for (const auto &SubField : Structure.Fields) { - FieldInitializers.push_back(SubField.Contents); + StructInfo.Structure = Structure; + StructInfo.Initializers.emplace_back(); + auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers; + for (const auto &SubField : Structure.Fields) { + FieldInitializers.push_back(SubField.Contents); + } } return false; @@ -6519,34 +6520,56 @@ static int rewritesSort(const AsmRewrite *AsmRewriteA, llvm_unreachable("Unstable rewrite sort."); } -bool MasmParser::LookUpFieldOffset(StringRef Base, StringRef Member, - unsigned &Offset) { +bool MasmParser::lookUpField(StringRef Name, StringRef &Type, + unsigned &Offset) const { + const std::pair BaseMember = Name.split('.'); + const StringRef Base = BaseMember.first, Member = BaseMember.second; + return lookUpField(Base, Member, Type, Offset); +} + +bool MasmParser::lookUpField(StringRef Base, StringRef Member, StringRef &Type, + unsigned &Offset) const { if (Base.empty()) return true; + unsigned BaseOffset = 0; + if (Base.contains('.') && !lookUpField(Base, Type, BaseOffset)) + Base = Type; + auto TypeIt = KnownType.find(Base); if (TypeIt != KnownType.end()) - return LookUpFieldOffset(*TypeIt->second, Member, Offset); + return lookUpField(*TypeIt->second, Member, Type, Offset); auto StructIt = Structs.find(Base.lower()); if (StructIt != Structs.end()) - return LookUpFieldOffset(StructIt->second, Member, Offset); + return lookUpField(StructIt->second, Member, Type, Offset); return true; } -bool MasmParser::LookUpFieldOffset(const StructInfo &Structure, - StringRef Member, unsigned &Offset) { +bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member, + StringRef &Type, unsigned &Offset) const { + if (Member.empty()) { + Type = Structure.Name; + return false; + } + std::pair Split = Member.split('.'); const StringRef FieldName = Split.first, FieldMember = Split.second; + auto StructIt = Structs.find(FieldName.lower()); + if (StructIt != Structs.end()) + return lookUpField(StructIt->second, FieldMember, Type, Offset); + auto FieldIt = Structure.FieldsByName.find(FieldName.lower()); if (FieldIt == Structure.FieldsByName.end()) return true; const FieldInfo &Field = Structure.Fields[FieldIt->second]; if (FieldMember.empty()) { - Offset = Field.Offset; + Offset += Field.Offset; + if (Field.Contents.FT == FT_STRUCT) + Type = Field.Contents.StructInfo.Structure.Name; return false; } @@ -6554,7 +6577,7 @@ bool MasmParser::LookUpFieldOffset(const StructInfo &Structure, return true; const StructFieldInfo &StructInfo = Field.Contents.StructInfo; - bool Result = LookUpFieldOffset(StructInfo.Structure, FieldMember, Offset); + bool Result = lookUpField(StructInfo.Structure, FieldMember, Type, Offset); if (Result) return true; diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index f51d908c53e13..af4620361c34d 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -556,7 +556,9 @@ WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry, switch (RelEntry.Type) { case wasm::R_WASM_TABLE_INDEX_REL_SLEB: case wasm::R_WASM_TABLE_INDEX_SLEB: - case wasm::R_WASM_TABLE_INDEX_I32: { + case wasm::R_WASM_TABLE_INDEX_SLEB64: + case wasm::R_WASM_TABLE_INDEX_I32: + case wasm::R_WASM_TABLE_INDEX_I64: { // Provisional value is table address of the resolved symbol itself const MCSymbolWasm *Base = cast(Layout.getBaseSymbol(*RelEntry.Symbol)); @@ -688,6 +690,7 @@ void WasmObjectWriter::applyRelocations( case wasm::R_WASM_GLOBAL_INDEX_I32: patchI32(Stream, Value, Offset); break; + case wasm::R_WASM_TABLE_INDEX_I64: case wasm::R_WASM_MEMORY_ADDR_I64: patchI64(Stream, Value, Offset); break; @@ -697,6 +700,7 @@ void WasmObjectWriter::applyRelocations( case wasm::R_WASM_MEMORY_ADDR_REL_SLEB: writePatchableSLEB<5>(Stream, Value, Offset); break; + case wasm::R_WASM_TABLE_INDEX_SLEB64: case wasm::R_WASM_MEMORY_ADDR_SLEB64: case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64: writePatchableSLEB<10>(Stream, Value, Offset); @@ -1599,7 +1603,9 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, // purely to make the object file's provisional values readable, and is // ignored by the linker, which re-calculates the relocations itself. if (Rel.Type != wasm::R_WASM_TABLE_INDEX_I32 && + Rel.Type != wasm::R_WASM_TABLE_INDEX_I64 && Rel.Type != wasm::R_WASM_TABLE_INDEX_SLEB && + Rel.Type != wasm::R_WASM_TABLE_INDEX_SLEB64 && Rel.Type != wasm::R_WASM_TABLE_INDEX_REL_SLEB) return; assert(Rel.Symbol->isFunction()); diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp index 94a8d56c55fce..4796ef531054b 100644 --- a/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -1116,9 +1116,9 @@ uint64_t WinCOFFObjectWriter::writeObject(MCAssembler &Asm, for (const MCAssembler::CGProfileEntry &CGPE : Asm.CGProfile) { uint32_t FromIndex = CGPE.From->getSymbol().getIndex(); uint32_t ToIndex = CGPE.To->getSymbol().getIndex(); - OS.write((const char *)&FromIndex, sizeof(uint32_t)); - OS.write((const char *)&ToIndex, sizeof(uint32_t)); - OS.write((const char *)&CGPE.Count, sizeof(uint64_t)); + support::endian::write(OS, FromIndex, W.Endian); + support::endian::write(OS, ToIndex, W.Endian); + support::endian::write(OS, CGPE.Count, W.Endian); } } diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp index 3f3f79b0f4ff7..83170bbc4e6d9 100644 --- a/llvm/lib/Object/RelocationResolver.cpp +++ b/llvm/lib/Object/RelocationResolver.cpp @@ -62,6 +62,8 @@ static bool supportsAArch64(uint64_t Type) { switch (Type) { case ELF::R_AARCH64_ABS32: case ELF::R_AARCH64_ABS64: + case ELF::R_AARCH64_PREL32: + case ELF::R_AARCH64_PREL64: return true; default: return false; @@ -74,6 +76,10 @@ static uint64_t resolveAArch64(RelocationRef R, uint64_t S, uint64_t A) { return (S + getELFAddend(R)) & 0xFFFFFFFF; case ELF::R_AARCH64_ABS64: return S + getELFAddend(R); + case ELF::R_AARCH64_PREL32: + return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF; + case ELF::R_AARCH64_PREL64: + return S + getELFAddend(R) - R.getOffset(); default: llvm_unreachable("Invalid relocation type"); } @@ -152,6 +158,8 @@ static bool supportsPPC64(uint64_t Type) { switch (Type) { case ELF::R_PPC64_ADDR32: case ELF::R_PPC64_ADDR64: + case ELF::R_PPC64_REL32: + case ELF::R_PPC64_REL64: return true; default: return false; @@ -164,6 +172,10 @@ static uint64_t resolvePPC64(RelocationRef R, uint64_t S, uint64_t A) { return (S + getELFAddend(R)) & 0xFFFFFFFF; case ELF::R_PPC64_ADDR64: return S + getELFAddend(R); + case ELF::R_PPC64_REL32: + return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF; + case ELF::R_PPC64_REL64: + return S + getELFAddend(R) - R.getOffset(); default: llvm_unreachable("Invalid relocation type"); } @@ -259,22 +271,42 @@ static uint64_t resolveX86(RelocationRef R, uint64_t S, uint64_t A) { } static bool supportsPPC32(uint64_t Type) { - return Type == ELF::R_PPC_ADDR32; + switch (Type) { + case ELF::R_PPC_ADDR32: + case ELF::R_PPC_REL32: + return true; + default: + return false; + } } static uint64_t resolvePPC32(RelocationRef R, uint64_t S, uint64_t A) { - if (R.getType() == ELF::R_PPC_ADDR32) + switch (R.getType()) { + case ELF::R_PPC_ADDR32: return (S + getELFAddend(R)) & 0xFFFFFFFF; + case ELF::R_PPC_REL32: + return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF; + } llvm_unreachable("Invalid relocation type"); } static bool supportsARM(uint64_t Type) { - return Type == ELF::R_ARM_ABS32; + switch (Type) { + case ELF::R_ARM_ABS32: + case ELF::R_ARM_REL32: + return true; + default: + return false; + } } static uint64_t resolveARM(RelocationRef R, uint64_t S, uint64_t A) { - if (R.getType() == ELF::R_ARM_ABS32) + switch (R.getType()) { + case ELF::R_ARM_ABS32: return (S + A) & 0xFFFFFFFF; + case ELF::R_ARM_REL32: + return (S + A - R.getOffset()) & 0xFFFFFFFF; + } llvm_unreachable("Invalid relocation type"); } @@ -531,6 +563,8 @@ static bool supportsWasm64(uint64_t Type) { case wasm::R_WASM_MEMORY_ADDR_LEB64: case wasm::R_WASM_MEMORY_ADDR_SLEB64: case wasm::R_WASM_MEMORY_ADDR_I64: + case wasm::R_WASM_TABLE_INDEX_SLEB64: + case wasm::R_WASM_TABLE_INDEX_I64: return true; default: return supportsWasm32(Type); @@ -563,6 +597,8 @@ static uint64_t resolveWasm64(RelocationRef R, uint64_t S, uint64_t A) { case wasm::R_WASM_MEMORY_ADDR_LEB64: case wasm::R_WASM_MEMORY_ADDR_SLEB64: case wasm::R_WASM_MEMORY_ADDR_I64: + case wasm::R_WASM_TABLE_INDEX_SLEB64: + case wasm::R_WASM_TABLE_INDEX_I64: // For wasm section, its offset at 0 -- ignoring Value return A; default: diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp index bb2e81d64047f..23418a358fa40 100644 --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -791,7 +791,9 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) { switch (Reloc.Type) { case wasm::R_WASM_FUNCTION_INDEX_LEB: case wasm::R_WASM_TABLE_INDEX_SLEB: + case wasm::R_WASM_TABLE_INDEX_SLEB64: case wasm::R_WASM_TABLE_INDEX_I32: + case wasm::R_WASM_TABLE_INDEX_I64: case wasm::R_WASM_TABLE_INDEX_REL_SLEB: if (!isValidFunctionSymbol(Reloc.Index)) return make_error("Bad relocation function index", @@ -871,7 +873,8 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) { Reloc.Type == wasm::R_WASM_FUNCTION_OFFSET_I32 || Reloc.Type == wasm::R_WASM_GLOBAL_INDEX_I32) Size = 4; - if (Reloc.Type == wasm::R_WASM_MEMORY_ADDR_I64) + if (Reloc.Type == wasm::R_WASM_TABLE_INDEX_I64 || + Reloc.Type == wasm::R_WASM_MEMORY_ADDR_I64) Size = 8; if (Reloc.Offset + Size > EndOffset) return make_error("Bad relocation offset", @@ -957,6 +960,8 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) { break; case wasm::WASM_EXTERNAL_MEMORY: Im.Memory = readLimits(Ctx); + if (Im.Memory.Flags & wasm::WASM_LIMITS_FLAG_IS_64) + HasMemory64 = true; break; case wasm::WASM_EXTERNAL_TABLE: Im.Table = readTable(Ctx); @@ -1019,7 +1024,10 @@ Error WasmObjectFile::parseMemorySection(ReadContext &Ctx) { uint32_t Count = readVaruint32(Ctx); Memories.reserve(Count); while (Count--) { - Memories.push_back(readLimits(Ctx)); + auto Limits = readLimits(Ctx); + if (Limits.Flags & wasm::WASM_LIMITS_FLAG_IS_64) + HasMemory64 = true; + Memories.push_back(Limits); } if (Ctx.Ptr != Ctx.End) return make_error("Memory section ended prematurely", @@ -1576,11 +1584,15 @@ section_iterator WasmObjectFile::section_end() const { return section_iterator(SectionRef(Ref, this)); } -uint8_t WasmObjectFile::getBytesInAddress() const { return 4; } +uint8_t WasmObjectFile::getBytesInAddress() const { + return HasMemory64 ? 8 : 4; +} StringRef WasmObjectFile::getFileFormatName() const { return "WASM"; } -Triple::ArchType WasmObjectFile::getArch() const { return Triple::wasm32; } +Triple::ArchType WasmObjectFile::getArch() const { + return HasMemory64 ? Triple::wasm64 : Triple::wasm32; +} SubtargetFeatures WasmObjectFile::getFeatures() const { return SubtargetFeatures(); diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp index a8b467af7b2d5..dc815cd69c129 100644 --- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp +++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp @@ -168,7 +168,7 @@ Error DWARFYAML::emitDebugRanges(raw_ostream &OS, const DWARFYAML::Data &DI) { if (DebugRanges.AddrSize) AddrSize = *DebugRanges.AddrSize; else - AddrSize = DI.Is64bit ? 8 : 4; + AddrSize = DI.Is64BitAddrSize ? 8 : 4; for (auto Entry : DebugRanges.Entries) { if (Error Err = writeVariableSizedInteger(Entry.LowOffset, AddrSize, OS, DI.IsLittleEndian)) @@ -381,7 +381,7 @@ Error DWARFYAML::emitDebugAddr(raw_ostream &OS, const Data &DI) { if (TableEntry.AddrSize) AddrSize = *TableEntry.AddrSize; else - AddrSize = DI.Is64bit ? 8 : 4; + AddrSize = DI.Is64BitAddrSize ? 8 : 4; uint64_t Length; if (TableEntry.Length) @@ -416,6 +416,31 @@ Error DWARFYAML::emitDebugAddr(raw_ostream &OS, const Data &DI) { return Error::success(); } +Error DWARFYAML::emitDebugStrOffsets(raw_ostream &OS, const Data &DI) { + assert(DI.DebugStrOffsets && "unexpected emitDebugStrOffsets() call"); + for (const DWARFYAML::StringOffsetsTable &Table : *DI.DebugStrOffsets) { + uint64_t Length; + if (Table.Length) + Length = *Table.Length; + else + // sizeof(version) + sizeof(padding) = 4 + Length = + 4 + Table.Offsets.size() * (Table.Format == dwarf::DWARF64 ? 8 : 4); + + writeInitialLength(Table.Format, Length, OS, DI.IsLittleEndian); + writeInteger((uint16_t)Table.Version, OS, DI.IsLittleEndian); + writeInteger((uint16_t)Table.Padding, OS, DI.IsLittleEndian); + + for (uint64_t Offset : Table.Offsets) { + cantFail(writeVariableSizedInteger(Offset, + Table.Format == dwarf::DWARF64 ? 8 : 4, + OS, DI.IsLittleEndian)); + } + } + + return Error::success(); +} + using EmitFuncType = Error (*)(raw_ostream &, const DWARFYAML::Data &); static Error diff --git a/llvm/lib/ObjectYAML/DWARFVisitor.cpp b/llvm/lib/ObjectYAML/DWARFVisitor.cpp index f478a1b84397f..bea71fb3235d2 100644 --- a/llvm/lib/ObjectYAML/DWARFVisitor.cpp +++ b/llvm/lib/ObjectYAML/DWARFVisitor.cpp @@ -49,8 +49,6 @@ static unsigned getRefSize(const DWARFYAML::Unit &Unit) { template Error DWARFYAML::VisitorImpl::traverseDebugInfo() { for (auto &Unit : DebugInfo.CompileUnits) { onStartCompileUnit(Unit); - if (Unit.Entries.empty()) - continue; for (auto &Entry : Unit.Entries) { onStartDIE(Unit, Entry); @@ -116,6 +114,16 @@ template Error DWARFYAML::VisitorImpl::traverseDebugInfo() { "")); break; } + case dwarf::DW_FORM_strx: + case dwarf::DW_FORM_addrx: + case dwarf::DW_FORM_rnglistx: + case dwarf::DW_FORM_loclistx: + case dwarf::DW_FORM_udata: + case dwarf::DW_FORM_ref_udata: + case dwarf::DW_FORM_GNU_addr_index: + case dwarf::DW_FORM_GNU_str_index: + onValue((uint64_t)FormVal->Value, /*LEB=*/true); + break; case dwarf::DW_FORM_data1: case dwarf::DW_FORM_ref1: case dwarf::DW_FORM_flag: @@ -139,15 +147,12 @@ template Error DWARFYAML::VisitorImpl::traverseDebugInfo() { case dwarf::DW_FORM_data8: case dwarf::DW_FORM_ref8: case dwarf::DW_FORM_ref_sup8: + case dwarf::DW_FORM_ref_sig8: onValue((uint64_t)FormVal->Value); break; case dwarf::DW_FORM_sdata: onValue((int64_t)FormVal->Value, true); break; - case dwarf::DW_FORM_udata: - case dwarf::DW_FORM_ref_udata: - onValue((uint64_t)FormVal->Value, true); - break; case dwarf::DW_FORM_string: onValue(FormVal->CStr); break; @@ -165,13 +170,6 @@ template Error DWARFYAML::VisitorImpl::traverseDebugInfo() { case dwarf::DW_FORM_strp_sup: onVariableSizeValue(FormVal->Value, getOffsetSize(Unit)); break; - case dwarf::DW_FORM_ref_sig8: - onValue((uint64_t)FormVal->Value); - break; - case dwarf::DW_FORM_GNU_addr_index: - case dwarf::DW_FORM_GNU_str_index: - onValue((uint64_t)FormVal->Value, true); - break; default: break; } diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp index bedf31dc8179f..4ed3b48b67852 100644 --- a/llvm/lib/ObjectYAML/DWARFYAML.cpp +++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp @@ -46,6 +46,8 @@ SetVector DWARFYAML::Data::getUsedSectionNames() const { SecNames.insert("debug_gnu_pubnames"); if (GNUPubTypes) SecNames.insert("debug_gnu_pubtypes"); + if (DebugStrOffsets) + SecNames.insert("debug_str_offsets"); return SecNames; } @@ -69,6 +71,7 @@ void MappingTraits::mapping(IO &IO, DWARFYAML::Data &DWARF) { IO.mapOptional("debug_info", DWARF.CompileUnits); IO.mapOptional("debug_line", DWARF.DebugLines); IO.mapOptional("debug_addr", DWARF.DebugAddr); + IO.mapOptional("debug_str_offsets", DWARF.DebugStrOffsets); IO.setContext(OldContext); } @@ -221,6 +224,15 @@ void MappingTraits::mapping( IO.mapOptional("Entries", AddrTable.SegAddrPairs); } +void MappingTraits::mapping( + IO &IO, DWARFYAML::StringOffsetsTable &StrOffsetsTable) { + IO.mapOptional("Format", StrOffsetsTable.Format, dwarf::DWARF32); + IO.mapOptional("Length", StrOffsetsTable.Length); + IO.mapOptional("Version", StrOffsetsTable.Version, 5); + IO.mapOptional("Padding", StrOffsetsTable.Padding, 0); + IO.mapOptional("Offsets", StrOffsetsTable.Offsets); +} + void MappingTraits::mapping( IO &IO, DWARFYAML::InitialLength &InitialLength) { IO.mapRequired("TotalLength", InitialLength.TotalLength); diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 218e7df8e39a5..26fe1236752fa 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -393,27 +393,47 @@ void ELFState::writeELFHeader(raw_ostream &OS, uint64_t SHOff) { Header.e_machine = Doc.Header.Machine; Header.e_version = EV_CURRENT; Header.e_entry = Doc.Header.Entry; - Header.e_phoff = Doc.ProgramHeaders.size() ? sizeof(Header) : 0; Header.e_flags = Doc.Header.Flags; Header.e_ehsize = sizeof(Elf_Ehdr); - Header.e_phentsize = Doc.ProgramHeaders.size() ? sizeof(Elf_Phdr) : 0; - Header.e_phnum = Doc.ProgramHeaders.size(); - Header.e_shentsize = - Doc.Header.SHEntSize ? (uint16_t)*Doc.Header.SHEntSize : sizeof(Elf_Shdr); + if (Doc.Header.EPhOff) + Header.e_phoff = *Doc.Header.EPhOff; + else if (!Doc.ProgramHeaders.empty()) + Header.e_phoff = sizeof(Header); + else + Header.e_phoff = 0; + + if (Doc.Header.EPhEntSize) + Header.e_phentsize = *Doc.Header.EPhEntSize; + else if (!Doc.ProgramHeaders.empty()) + Header.e_phentsize = sizeof(Elf_Phdr); + else + Header.e_phentsize = 0; + + if (Doc.Header.EPhNum) + Header.e_phnum = *Doc.Header.EPhNum; + else if (!Doc.ProgramHeaders.empty()) + Header.e_phnum = Doc.ProgramHeaders.size(); + else + Header.e_phnum = 0; + + Header.e_shentsize = Doc.Header.EShEntSize ? (uint16_t)*Doc.Header.EShEntSize + : sizeof(Elf_Shdr); - const bool NoShdrs = Doc.SectionHeaders && Doc.SectionHeaders->NoHeaders; + const bool NoShdrs = + Doc.SectionHeaders && Doc.SectionHeaders->NoHeaders.getValueOr(false); - if (Doc.Header.SHOff) - Header.e_shoff = *Doc.Header.SHOff; + if (Doc.Header.EShOff) + Header.e_shoff = *Doc.Header.EShOff; else if (NoShdrs) Header.e_shoff = 0; else Header.e_shoff = SHOff; - if (Doc.Header.SHNum) - Header.e_shnum = *Doc.Header.SHNum; - else if (!Doc.SectionHeaders) + if (Doc.Header.EShNum) + Header.e_shnum = *Doc.Header.EShNum; + else if (!Doc.SectionHeaders || + (Doc.SectionHeaders->NoHeaders && !*Doc.SectionHeaders->NoHeaders)) Header.e_shnum = Doc.getSections().size(); else if (NoShdrs) Header.e_shnum = 0; @@ -423,8 +443,8 @@ void ELFState::writeELFHeader(raw_ostream &OS, uint64_t SHOff) { : 0) + /*Null section*/ 1; - if (Doc.Header.SHStrNdx) - Header.e_shstrndx = *Doc.Header.SHStrNdx; + if (Doc.Header.EShStrNdx) + Header.e_shstrndx = *Doc.Header.EShStrNdx; else if (NoShdrs || ExcludedSectionHeaders.count(".shstrtab")) Header.e_shstrndx = 0; else @@ -485,11 +505,12 @@ unsigned ELFState::toSectionIndex(StringRef S, StringRef LocSec, return 0; } - if (!Doc.SectionHeaders || - (!Doc.SectionHeaders->NoHeaders && !Doc.SectionHeaders->Excluded)) + if (!Doc.SectionHeaders || (Doc.SectionHeaders->NoHeaders && + !Doc.SectionHeaders->NoHeaders.getValue())) return Index; - assert(!Doc.SectionHeaders->NoHeaders || !Doc.SectionHeaders->Sections); + assert(!Doc.SectionHeaders->NoHeaders.getValueOr(false) || + !Doc.SectionHeaders->Sections); size_t FirstExcluded = Doc.SectionHeaders->Sections ? Doc.SectionHeaders->Sections->size() : 0; if (Index >= FirstExcluded) { @@ -953,6 +974,8 @@ Expected emitDWARF(typename ELFT::Shdr &SHeader, StringRef Name, else if (Name == ".debug_gnu_pubtypes") Err = DWARFYAML::emitPubSection(*OS, *DWARF.GNUPubTypes, DWARF.IsLittleEndian, /*IsGNUStyle=*/true); + else if (Name == ".debug_str_offsets") + Err = DWARFYAML::emitDebugStrOffsets(*OS, DWARF); else llvm_unreachable("unexpected emitDWARF() call"); @@ -1758,7 +1781,7 @@ template void ELFState::buildSectionIndex() { if (!ExcludedSectionHeaders.insert(Hdr.Name).second) llvm_unreachable("buildSectionIndex() failed"); - if (Doc.SectionHeaders->NoHeaders) + if (Doc.SectionHeaders->NoHeaders.getValueOr(false)) for (const ELFYAML::Section *S : Sections) if (!ExcludedSectionHeaders.insert(S->Name).second) llvm_unreachable("buildSectionIndex() failed"); diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 51ca5dd726216..f85d6a5d30efa 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -823,6 +823,8 @@ void ScalarBitSetTraits::bitset( BCase(MIPS16); BCase(MICROMIPS); BCase(XPA); + BCase(CRC); + BCase(GINV); #undef BCase } @@ -842,7 +844,7 @@ void MappingTraits::mapping( IO &IO, ELFYAML::SectionHeaderTable &SectionHeader) { IO.mapOptional("Sections", SectionHeader.Sections); IO.mapOptional("Excluded", SectionHeader.Excluded); - IO.mapOptional("NoHeaders", SectionHeader.NoHeaders, false); + IO.mapOptional("NoHeaders", SectionHeader.NoHeaders); } StringRef MappingTraits::validate( @@ -866,10 +868,16 @@ void MappingTraits::mapping(IO &IO, IO.mapOptional("Flags", FileHdr.Flags, ELFYAML::ELF_EF(0)); IO.mapOptional("Entry", FileHdr.Entry, Hex64(0)); - IO.mapOptional("SHEntSize", FileHdr.SHEntSize); - IO.mapOptional("SHOff", FileHdr.SHOff); - IO.mapOptional("SHNum", FileHdr.SHNum); - IO.mapOptional("SHStrNdx", FileHdr.SHStrNdx); + // obj2yaml does not dump these fields. + assert(!IO.outputting() || + (!FileHdr.EPhOff && !FileHdr.EPhEntSize && !FileHdr.EPhNum)); + IO.mapOptional("EPhOff", FileHdr.EPhOff); + IO.mapOptional("EPhEntSize", FileHdr.EPhEntSize); + IO.mapOptional("EPhNum", FileHdr.EPhNum); + IO.mapOptional("EShEntSize", FileHdr.EShEntSize); + IO.mapOptional("EShOff", FileHdr.EShOff); + IO.mapOptional("EShNum", FileHdr.EShNum); + IO.mapOptional("EShStrNdx", FileHdr.EShStrNdx); } void MappingTraits::mapping( @@ -1681,7 +1689,7 @@ void MappingTraits::mapping(IO &IO, ELFYAML::Object &Object) { if (Object.DWARF) { Object.DWARF->IsLittleEndian = Object.Header.Data == ELFYAML::ELF_ELFDATA(ELF::ELFDATA2LSB); - Object.DWARF->Is64bit = + Object.DWARF->Is64BitAddrSize = Object.Header.Class == ELFYAML::ELF_ELFCLASS(ELF::ELFCLASS64); } IO.setContext(nullptr); diff --git a/llvm/lib/ObjectYAML/MachOYAML.cpp b/llvm/lib/ObjectYAML/MachOYAML.cpp index cd9ca76f5d2f0..86aad0233767e 100644 --- a/llvm/lib/ObjectYAML/MachOYAML.cpp +++ b/llvm/lib/ObjectYAML/MachOYAML.cpp @@ -107,8 +107,8 @@ void MappingTraits::mapping(IO &IO, Object.DWARF.IsLittleEndian = Object.IsLittleEndian; IO.mapRequired("FileHeader", Object.Header); - Object.DWARF.Is64bit = Object.Header.magic == MachO::MH_MAGIC_64 || - Object.Header.magic == MachO::MH_CIGAM_64; + Object.DWARF.Is64BitAddrSize = Object.Header.magic == MachO::MH_MAGIC_64 || + Object.Header.magic == MachO::MH_CIGAM_64; IO.mapOptional("LoadCommands", Object.LoadCommands); if(!Object.LinkEdit.isEmpty() || !IO.outputting()) IO.mapOptional("LinkEditData", Object.LinkEdit); diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp index 926eb8e0437f6..16404d3d81078 100644 --- a/llvm/lib/Option/OptTable.cpp +++ b/llvm/lib/Option/OptTable.cpp @@ -330,6 +330,60 @@ bool OptTable::addValues(const char *Option, const char *Values) { return false; } +// Parse a single argument, return the new argument, and update Index. If +// GroupedShortOptions is true, -a matches "-abc" and the argument in Args will +// be updated to "-bc". This overload does not support +// FlagsToInclude/FlagsToExclude or case insensitive options. +Arg *OptTable::parseOneArgGrouped(InputArgList &Args, unsigned &Index) const { + // Anything that doesn't start with PrefixesUnion is an input, as is '-' + // itself. + const char *CStr = Args.getArgString(Index); + StringRef Str(CStr); + if (isInput(PrefixesUnion, Str)) + return new Arg(getOption(TheInputOptionID), Str, Index++, CStr); + + const Info *End = OptionInfos.data() + OptionInfos.size(); + StringRef Name = Str.ltrim(PrefixChars); + const Info *Start = std::lower_bound( + OptionInfos.data() + FirstSearchableIndex, End, Name.data()); + const Info *Fallback = nullptr; + unsigned Prev = Index; + + // Search for the option which matches Str. + for (; Start != End; ++Start) { + unsigned ArgSize = matchOption(Start, Str, IgnoreCase); + if (!ArgSize) + continue; + + Option Opt(Start, this); + if (Arg *A = Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize), + false, Index)) + return A; + + // If Opt is a Flag of length 2 (e.g. "-a"), we know it is a prefix of + // the current argument (e.g. "-abc"). Match it as a fallback if no longer + // option (e.g. "-ab") exists. + if (ArgSize == 2 && Opt.getKind() == Option::FlagClass) + Fallback = Start; + + // Otherwise, see if the argument is missing. + if (Prev != Index) + return nullptr; + } + if (Fallback) { + Option Opt(Fallback, this); + if (Arg *A = Opt.accept(Args, Str.substr(0, 2), true, Index)) { + if (Str.size() == 2) + ++Index; + else + Args.replaceArgString(Index, Twine('-') + Str.substr(2)); + return A; + } + } + + return new Arg(getOption(TheUnknownOptionID), Str, Index++, CStr); +} + Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index, unsigned FlagsToInclude, unsigned FlagsToExclude) const { @@ -373,7 +427,8 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index, continue; // See if this option matches. - if (Arg *A = Opt.accept(Args, Index, ArgSize)) + if (Arg *A = Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize), + false, Index)) return A; // Otherwise, see if this argument was missing values. @@ -414,8 +469,11 @@ InputArgList OptTable::ParseArgs(ArrayRef ArgArr, } unsigned Prev = Index; - Arg *A = ParseOneArg(Args, Index, FlagsToInclude, FlagsToExclude); - assert(Index > Prev && "Parser failed to consume argument."); + Arg *A = GroupedShortOptions + ? parseOneArgGrouped(Args, Index) + : ParseOneArg(Args, Index, FlagsToInclude, FlagsToExclude); + assert((Index > Prev || GroupedShortOptions) && + "Parser failed to consume argument."); // Check for missing argument error. if (!A) { diff --git a/llvm/lib/Option/Option.cpp b/llvm/lib/Option/Option.cpp index 9abc9fdce4c72..68d074b2702e2 100644 --- a/llvm/lib/Option/Option.cpp +++ b/llvm/lib/Option/Option.cpp @@ -106,9 +106,9 @@ bool Option::matches(OptSpecifier Opt) const { return false; } -Arg *Option::acceptInternal(const ArgList &Args, unsigned &Index, - unsigned ArgSize) const { - StringRef Spelling = StringRef(Args.getArgString(Index), ArgSize); +Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling, + unsigned &Index) const { + size_t ArgSize = Spelling.size(); switch (getKind()) { case FlagClass: { if (ArgSize != strlen(Args.getArgString(Index))) @@ -230,10 +230,11 @@ Arg *Option::acceptInternal(const ArgList &Args, unsigned &Index, } } -Arg *Option::accept(const ArgList &Args, - unsigned &Index, - unsigned ArgSize) const { - std::unique_ptr A(acceptInternal(Args, Index, ArgSize)); +Arg *Option::accept(const ArgList &Args, StringRef CurArg, + bool GroupedShortOption, unsigned &Index) const { + std::unique_ptr A(GroupedShortOption && getKind() == FlagClass + ? new Arg(*this, CurArg, Index) + : acceptInternal(Args, CurArg, Index)); if (!A) return nullptr; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 771cdfd17aa54..1766e579c33d6 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -35,6 +35,7 @@ #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineFeaturesAnalysis.h" +#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/LoopAccessAnalysis.h" @@ -970,6 +971,12 @@ ModulePassManager PassBuilder::buildModuleSimplificationPipeline( if (AttributorRun & AttributorRunOption::MODULE) MPM.addPass(AttributorPass()); + // Lower type metadata and the type.test intrinsic in the ThinLTO + // post link pipeline after ICP. This is to enable usage of the type + // tests in ICP sequences. + if (Phase == ThinLTOPhase::PostLink) + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); + // Interprocedural constant propagation now that basic cleanup has occurred // and prior to optimizing globals. // FIXME: This position in the pipeline hasn't been carefully considered in @@ -1354,6 +1361,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // metadata and intrinsics. MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP. + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); return MPM; } @@ -1420,6 +1430,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // The LowerTypeTestsPass needs to run to lower type metadata and the // type.test intrinsics. The pass does nothing if CFI is disabled. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP (which is performed earlier than this in the regular LTO + // pipeline). + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); return MPM; } @@ -1547,6 +1561,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // to be run at link time if CFI is enabled. This pass does nothing if // CFI is disabled. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP (which is performed earlier than this in the regular LTO pipeline). + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); // Enable splitting late in the FullLTO post-link pipeline. This is done in // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses). @@ -2195,6 +2212,40 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, std::remove_reference::type>()); \ return Error::success(); \ } +#define CGSCC_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(CREATE_PASS)); \ + return Error::success(); \ + } +#define FUNCTION_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS)); \ + return Error::success(); \ + } +#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) { \ + auto Params = parsePassParameters(PARSER, Name, NAME); \ + if (!Params) \ + return Params.takeError(); \ + MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \ + return Error::success(); \ + } +#define LOOP_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + MPM.addPass(createModuleToFunctionPassAdaptor( \ + createFunctionToLoopPassAdaptor(CREATE_PASS, false, DebugLogging))); \ + return Error::success(); \ + } +#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) { \ + auto Params = parsePassParameters(PARSER, Name, NAME); \ + if (!Params) \ + return Params.takeError(); \ + MPM.addPass( \ + createModuleToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \ + CREATE_PASS(Params.get()), false, DebugLogging))); \ + return Error::success(); \ + } #include "PassRegistry.def" for (auto &C : ModulePipelineParsingCallbacks) @@ -2278,6 +2329,35 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, std::remove_reference::type>()); \ return Error::success(); \ } +#define FUNCTION_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS)); \ + return Error::success(); \ + } +#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) { \ + auto Params = parsePassParameters(PARSER, Name, NAME); \ + if (!Params) \ + return Params.takeError(); \ + CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \ + return Error::success(); \ + } +#define LOOP_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + CGPM.addPass(createCGSCCToFunctionPassAdaptor( \ + createFunctionToLoopPassAdaptor(CREATE_PASS, false, DebugLogging))); \ + return Error::success(); \ + } +#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) { \ + auto Params = parsePassParameters(PARSER, Name, NAME); \ + if (!Params) \ + return Params.takeError(); \ + CGPM.addPass( \ + createCGSCCToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \ + CREATE_PASS(Params.get()), false, DebugLogging))); \ + return Error::success(); \ + } #include "PassRegistry.def" for (auto &C : CGSCCPipelineParsingCallbacks) @@ -2361,6 +2441,25 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, std::remove_reference::type>()); \ return Error::success(); \ } +// FIXME: UseMemorySSA is set to false. Maybe we could do things like: +// bool UseMemorySSA = !("canon-freeze" || "loop-predication" || +// "guard-widening"); +// The risk is that it may become obsolete if we're not careful. +#define LOOP_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + FPM.addPass( \ + createFunctionToLoopPassAdaptor(CREATE_PASS, false, DebugLogging)); \ + return Error::success(); \ + } +#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) { \ + auto Params = parsePassParameters(PARSER, Name, NAME); \ + if (!Params) \ + return Params.takeError(); \ + FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), \ + false, DebugLogging)); \ + return Error::success(); \ + } #include "PassRegistry.def" for (auto &C : FunctionPipelineParsingCallbacks) @@ -2665,3 +2764,20 @@ bool PassBuilder::isAAPassName(StringRef PassName) { #include "PassRegistry.def" return false; } + +bool PassBuilder::isAnalysisPassName(StringRef PassName) { +#define MODULE_ANALYSIS(NAME, CREATE_PASS) \ + if (PassName == NAME) \ + return true; +#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ + if (PassName == NAME) \ + return true; +#define LOOP_ANALYSIS(NAME, CREATE_PASS) \ + if (PassName == NAME) \ + return true; +#define CGSSC_ANALYSIS(NAME, CREATE_PASS) \ + if (PassName == NAME) \ + return true; +#include "PassRegistry.def" + return false; +} diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index eb2b740db5612..ad20d02436dae 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -133,6 +133,7 @@ FUNCTION_ANALYSIS("loops", LoopAnalysis()) FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis()) FUNCTION_ANALYSIS("da", DependenceAnalysis()) FUNCTION_ANALYSIS("inliner-features", InlineFeaturesAnalysis()) +FUNCTION_ANALYSIS("inliner-size-estimator", InlineSizeEstimatorAnalysis()) FUNCTION_ANALYSIS("memdep", MemoryDependenceAnalysis()) FUNCTION_ANALYSIS("memoryssa", MemorySSAAnalysis()) FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis()) @@ -234,6 +235,8 @@ FUNCTION_PASS("print", PostDominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print", DemandedBitsPrinterPass(dbgs())) FUNCTION_PASS("print", DominanceFrontierPrinterPass(dbgs())) FUNCTION_PASS("print", InlineCostAnnotationPrinterPass(dbgs())) +FUNCTION_PASS("print", + InlineSizeEstimatorAnalysisPrinterPass(dbgs())) FUNCTION_PASS("print", LoopPrinterPass(dbgs())) FUNCTION_PASS("print", MemorySSAPrinterPass(dbgs())) FUNCTION_PASS("print", PhiValuesPrinterPass(dbgs())) @@ -315,7 +318,7 @@ FUNCTION_PASS_WITH_PARAMS("print", LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis()) LOOP_ANALYSIS("access-info", LoopAccessAnalysis()) LOOP_ANALYSIS("ddg", DDGAnalysis()) -LOOP_ANALYSIS("ivusers", IVUsersAnalysis()) +LOOP_ANALYSIS("iv-users", IVUsersAnalysis()) LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) #undef LOOP_ANALYSIS @@ -337,7 +340,7 @@ LOOP_PASS("indvars", IndVarSimplifyPass()) LOOP_PASS("loop-unroll-full", LoopFullUnrollPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print", DDGAnalysisPrinterPass(dbgs())) -LOOP_PASS("print", IVUsersPrinterPass(dbgs())) +LOOP_PASS("print", IVUsersPrinterPass(dbgs())) LOOP_PASS("print", LoopNestPrinterPass(dbgs())) LOOP_PASS("print", LoopCachePrinterPass(dbgs())) LOOP_PASS("loop-predication", LoopPredicationPass()) diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp index 71ea44a1a722d..7b97723da60cc 100644 --- a/llvm/lib/ProfileData/GCOV.cpp +++ b/llvm/lib/ProfileData/GCOV.cpp @@ -522,8 +522,11 @@ class LineConsumer { public: LineConsumer() = default; LineConsumer(StringRef Filename) { + // Open source files without requiring a NUL terminator. The concurrent + // modification may nullify the NUL terminator condition. ErrorOr> BufferOrErr = - MemoryBuffer::getFileOrSTDIN(Filename); + MemoryBuffer::getFileOrSTDIN(Filename, -1, + /*RequiresNullTerminator=*/false); if (std::error_code EC = BufferOrErr.getError()) { errs() << Filename << ": " << EC.message() << "\n"; Remaining = ""; diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index b9d8ae9ba60d6..8879674c324d5 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1111,6 +1111,8 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) { return true; } +// FIXME: This is to be removed after switching to the new memop value +// profiling. // Parse the value profile options. void getMemOPSizeRangeFromOption(StringRef MemOPSizeRange, int64_t &RangeStart, int64_t &RangeLast) { diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index 4a591efb141aa..9a6f93feaa29f 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -3086,7 +3086,8 @@ void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting /// from Src into IntVal, which is assumed to be wide enough and to hold zero. -void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) { +void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, + unsigned LoadBytes) { assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!"); uint8_t *Dst = reinterpret_cast( const_cast(IntVal.getRawData())); diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 8dc8c4e9775ac..658c1ee74cfec 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -583,7 +583,7 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family, } } -static void +static StringRef getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, const unsigned *Features, unsigned *Type, unsigned *Subtype) { @@ -591,51 +591,24 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, return (Features[F / 32] & (1U << (F % 32))) != 0; }; + StringRef CPU; + switch (Family) { case 3: - *Type = X86::INTEL_i386; + CPU = "i386"; break; case 4: - *Type = X86::INTEL_i486; + CPU = "i486"; break; case 5: if (testFeature(X86::FEATURE_MMX)) { - *Type = X86::INTEL_PENTIUM_MMX; + CPU = "pentium-mmx"; break; } - *Type = X86::INTEL_PENTIUM; + CPU = "pentium"; break; case 6: switch (Model) { - case 0x01: // Pentium Pro processor - *Type = X86::INTEL_PENTIUM_PRO; - break; - case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor, - // model 03 - case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor, - // model 05, and Intel Celeron processor, model 05 - case 0x06: // Celeron processor, model 06 - *Type = X86::INTEL_PENTIUM_II; - break; - case 0x07: // Pentium III processor, model 07, and Pentium III Xeon - // processor, model 07 - case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor, - // model 08, and Celeron processor, model 08 - case 0x0a: // Pentium III Xeon processor, model 0Ah - case 0x0b: // Pentium III processor, model 0Bh - *Type = X86::INTEL_PENTIUM_III; - break; - case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09. - case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model - // 0Dh. All processors are manufactured using the 90 nm process. - case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579 - // Integrated Processor with Intel QuickAssist Technology - *Type = X86::INTEL_PENTIUM_M; - break; - case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model - // 0Eh. All processors are manufactured using the 65 nm process. - *Type = X86::INTEL_CORE_DUO; - break; // yonah case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile // processor, Intel Core 2 Quad processor, Intel Core 2 Quad // mobile processor, Intel Core 2 Extreme processor, Intel @@ -643,8 +616,8 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, // 0Fh. All processors are manufactured using the 65 nm process. case 0x16: // Intel Celeron processor model 16h. All processors are // manufactured using the 65 nm process - *Type = X86::INTEL_CORE2; // "core2" - *Subtype = X86::INTEL_CORE2_65; + CPU = "core2"; + *Type = X86::INTEL_CORE2; break; case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model // 17h. All processors are manufactured using the 45 nm process. @@ -652,34 +625,38 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, // 45nm: Penryn , Wolfdale, Yorkfield (XE) case 0x1d: // Intel Xeon processor MP. All processors are manufactured using // the 45 nm process. - *Type = X86::INTEL_CORE2; // "penryn" - *Subtype = X86::INTEL_CORE2_45; + CPU = "penryn"; + *Type = X86::INTEL_CORE2; break; case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All // processors are manufactured using the 45 nm process. case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. // As found in a Summer 2010 model iMac. case 0x1f: - case 0x2e: // Nehalem EX - *Type = X86::INTEL_COREI7; // "nehalem" + case 0x2e: // Nehalem EX + CPU = "nehalem"; + *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_NEHALEM; break; case 0x25: // Intel Core i7, laptop version. case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All // processors are manufactured using the 32 nm process. case 0x2f: // Westmere EX - *Type = X86::INTEL_COREI7; // "westmere" + CPU = "westmere"; + *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_WESTMERE; break; case 0x2a: // Intel Core i7 processor. All processors are manufactured // using the 32 nm process. case 0x2d: - *Type = X86::INTEL_COREI7; //"sandybridge" + CPU = "sandybridge"; + *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; break; case 0x3a: - case 0x3e: // Ivy Bridge EP - *Type = X86::INTEL_COREI7; // "ivybridge" + case 0x3e: // Ivy Bridge EP + CPU = "ivybridge"; + *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_IVYBRIDGE; break; @@ -688,7 +665,8 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 0x3f: case 0x45: case 0x46: - *Type = X86::INTEL_COREI7; // "haswell" + CPU = "haswell"; + *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_HASWELL; break; @@ -697,7 +675,8 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 0x47: case 0x4f: case 0x56: - *Type = X86::INTEL_COREI7; // "broadwell" + CPU = "broadwell"; + *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_BROADWELL; break; @@ -708,39 +687,47 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 0x9e: // Kaby Lake desktop case 0xa5: // Comet Lake-H/S case 0xa6: // Comet Lake-U - *Type = X86::INTEL_COREI7; // "skylake" + CPU = "skylake"; + *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_SKYLAKE; break; // Skylake Xeon: case 0x55: *Type = X86::INTEL_COREI7; - if (testFeature(X86::FEATURE_AVX512BF16)) - *Subtype = X86::INTEL_COREI7_COOPERLAKE; // "cooperlake" - else if (testFeature(X86::FEATURE_AVX512VNNI)) - *Subtype = X86::INTEL_COREI7_CASCADELAKE; // "cascadelake" - else - *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" + if (testFeature(X86::FEATURE_AVX512BF16)) { + CPU = "cooperlake"; + *Subtype = X86::INTEL_COREI7_COOPERLAKE; + } else if (testFeature(X86::FEATURE_AVX512VNNI)) { + CPU = "cascadelake"; + *Subtype = X86::INTEL_COREI7_CASCADELAKE; + } else { + CPU = "skylake-avx512"; + *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; + } break; // Cannonlake: case 0x66: + CPU = "cannonlake"; *Type = X86::INTEL_COREI7; - *Subtype = X86::INTEL_COREI7_CANNONLAKE; // "cannonlake" + *Subtype = X86::INTEL_COREI7_CANNONLAKE; break; // Icelake: case 0x7d: case 0x7e: + CPU = "icelake-client"; *Type = X86::INTEL_COREI7; - *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client" + *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; break; // Icelake Xeon: case 0x6a: case 0x6c: + CPU = "icelake-server"; *Type = X86::INTEL_COREI7; - *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; // "icelake-server" + *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; break; case 0x1c: // Most 45 nm Intel Atom processors @@ -748,8 +735,9 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 0x27: // 32 nm Atom Medfield case 0x35: // 32 nm Atom Midview case 0x36: // 32 nm Atom Midview + CPU = "bonnell"; *Type = X86::INTEL_BONNELL; - break; // "bonnell" + break; // Atom Silvermont codes from the Intel software optimization guide. case 0x37: @@ -758,14 +746,17 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 0x5a: case 0x5d: case 0x4c: // really airmont + CPU = "silvermont"; *Type = X86::INTEL_SILVERMONT; - break; // "silvermont" + break; // Goldmont: case 0x5c: // Apollo Lake case 0x5f: // Denverton + CPU = "goldmont"; *Type = X86::INTEL_GOLDMONT; - break; // "goldmont" + break; case 0x7a: + CPU = "goldmont-plus"; *Type = X86::INTEL_GOLDMONT_PLUS; break; case 0x86: @@ -773,193 +764,140 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, break; case 0x57: - *Type = X86::INTEL_KNL; // knl + CPU = "tremont"; + *Type = X86::INTEL_KNL; break; case 0x85: - *Type = X86::INTEL_KNM; // knm + CPU = "knm"; + *Type = X86::INTEL_KNM; break; default: // Unknown family 6 CPU, try to guess. + // Don't both with Type/Subtype here, they aren't used by the caller. + // They're used above to keep the code in sync with compiler-rt. // TODO detect tigerlake host from model if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) { - *Type = X86::INTEL_COREI7; - *Subtype = X86::INTEL_COREI7_TIGERLAKE; - break; - } - - if (testFeature(X86::FEATURE_AVX512VBMI2)) { - *Type = X86::INTEL_COREI7; - *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; - break; - } - - if (testFeature(X86::FEATURE_AVX512VBMI)) { - *Type = X86::INTEL_COREI7; - *Subtype = X86::INTEL_COREI7_CANNONLAKE; - break; - } - - if (testFeature(X86::FEATURE_AVX512BF16)) { - *Type = X86::INTEL_COREI7; - *Subtype = X86::INTEL_COREI7_COOPERLAKE; - break; - } - - if (testFeature(X86::FEATURE_AVX512VNNI)) { - *Type = X86::INTEL_COREI7; - *Subtype = X86::INTEL_COREI7_CASCADELAKE; - break; - } - - if (testFeature(X86::FEATURE_AVX512VL)) { - *Type = X86::INTEL_COREI7; - *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; - break; - } - - if (testFeature(X86::FEATURE_AVX512ER)) { - *Type = X86::INTEL_KNL; // knl - break; + CPU = "tigerlake"; + } else if (testFeature(X86::FEATURE_AVX512VBMI2)) { + CPU = "icelake-client"; + } else if (testFeature(X86::FEATURE_AVX512VBMI)) { + CPU = "cannonlake"; + } else if (testFeature(X86::FEATURE_AVX512BF16)) { + CPU = "cooperlake"; + } else if (testFeature(X86::FEATURE_AVX512VNNI)) { + CPU = "cascadelake"; + } else if (testFeature(X86::FEATURE_AVX512VL)) { + CPU = "skylake-avx512"; + } else if (testFeature(X86::FEATURE_AVX512ER)) { + CPU = "knl"; + } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) { + if (testFeature(X86::FEATURE_SHA)) + CPU = "goldmont"; + else + CPU = "skylake"; + } else if (testFeature(X86::FEATURE_ADX)) { + CPU = "broadwell"; + } else if (testFeature(X86::FEATURE_AVX2)) { + CPU = "haswell"; + } else if (testFeature(X86::FEATURE_AVX)) { + CPU = "sandybridge"; + } else if (testFeature(X86::FEATURE_SSE4_2)) { + if (testFeature(X86::FEATURE_MOVBE)) + CPU = "silvermont"; + else + CPU = "nehalem"; + } else if (testFeature(X86::FEATURE_SSE4_1)) { + CPU = "penryn"; + } else if (testFeature(X86::FEATURE_SSSE3)) { + if (testFeature(X86::FEATURE_MOVBE)) + CPU = "bonnell"; + else + CPU = "core2"; + } else if (testFeature(X86::FEATURE_64BIT)) { + CPU = "core2"; + } else if (testFeature(X86::FEATURE_SSE3)) { + CPU = "yonah"; + } else if (testFeature(X86::FEATURE_SSE2)) { + CPU = "pentium-m"; + } else if (testFeature(X86::FEATURE_SSE)) { + CPU = "pentium3"; + } else if (testFeature(X86::FEATURE_MMX)) { + CPU = "pentium2"; + } else { + CPU = "pentiumpro"; } - - if (testFeature(X86::FEATURE_CLFLUSHOPT)) { - if (testFeature(X86::FEATURE_SHA)) { - *Type = X86::INTEL_GOLDMONT; - } else { - *Type = X86::INTEL_COREI7; - *Subtype = X86::INTEL_COREI7_SKYLAKE; - } - break; - } - if (testFeature(X86::FEATURE_ADX)) { - *Type = X86::INTEL_COREI7; - *Subtype = X86::INTEL_COREI7_BROADWELL; - break; - } - if (testFeature(X86::FEATURE_AVX2)) { - *Type = X86::INTEL_COREI7; - *Subtype = X86::INTEL_COREI7_HASWELL; - break; - } - if (testFeature(X86::FEATURE_AVX)) { - *Type = X86::INTEL_COREI7; - *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; - break; - } - if (testFeature(X86::FEATURE_SSE4_2)) { - if (testFeature(X86::FEATURE_MOVBE)) { - *Type = X86::INTEL_SILVERMONT; - } else { - *Type = X86::INTEL_COREI7; - *Subtype = X86::INTEL_COREI7_NEHALEM; - } - break; - } - if (testFeature(X86::FEATURE_SSE4_1)) { - *Type = X86::INTEL_CORE2; // "penryn" - *Subtype = X86::INTEL_CORE2_45; - break; - } - if (testFeature(X86::FEATURE_SSSE3)) { - if (testFeature(X86::FEATURE_MOVBE)) { - *Type = X86::INTEL_BONNELL; // "bonnell" - } else { - *Type = X86::INTEL_CORE2; // "core2" - *Subtype = X86::INTEL_CORE2_65; - } - break; - } - if (testFeature(X86::FEATURE_64BIT)) { - *Type = X86::INTEL_CORE2; // "core2" - *Subtype = X86::INTEL_CORE2_65; - break; - } - if (testFeature(X86::FEATURE_SSE3)) { - *Type = X86::INTEL_CORE_DUO; - break; - } - if (testFeature(X86::FEATURE_SSE2)) { - *Type = X86::INTEL_PENTIUM_M; - break; - } - if (testFeature(X86::FEATURE_SSE)) { - *Type = X86::INTEL_PENTIUM_III; - break; - } - if (testFeature(X86::FEATURE_MMX)) { - *Type = X86::INTEL_PENTIUM_II; - break; - } - *Type = X86::INTEL_PENTIUM_PRO; break; } break; case 15: { if (testFeature(X86::FEATURE_64BIT)) { - *Type = X86::INTEL_NOCONA; + CPU = "nocona"; break; } if (testFeature(X86::FEATURE_SSE3)) { - *Type = X86::INTEL_PRESCOTT; + CPU = "prescott"; break; } - *Type = X86::INTEL_PENTIUM_IV; + CPU = "pentium4"; break; } default: - break; /*"generic"*/ + break; // Unknown. } + + return CPU; } -static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, - const unsigned *Features, - unsigned *Type, unsigned *Subtype) { +static StringRef +getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, + const unsigned *Features, + unsigned *Type, unsigned *Subtype) { auto testFeature = [&](unsigned F) { return (Features[F / 32] & (1U << (F % 32))) != 0; }; - // FIXME: this poorly matches the generated SubtargetFeatureKV table. There - // appears to be no way to generate the wide variety of AMD-specific targets - // from the information returned from CPUID. + StringRef CPU; + switch (Family) { case 4: - *Type = X86::AMD_i486; + CPU = "i486"; break; case 5: - *Type = X86::AMDPENTIUM; + CPU = "pentium"; switch (Model) { case 6: case 7: - *Subtype = X86::AMDPENTIUM_K6; - break; // "k6" + CPU = "k6"; + break; case 8: - *Subtype = X86::AMDPENTIUM_K62; - break; // "k6-2" + CPU = "k6-2"; + break; case 9: case 13: - *Subtype = X86::AMDPENTIUM_K63; - break; // "k6-3" + CPU = "k6-3"; + break; case 10: - *Subtype = X86::AMDPENTIUM_GEODE; - break; // "geode" + CPU = "geode"; + break; } break; case 6: if (testFeature(X86::FEATURE_SSE)) { - *Type = X86::AMD_ATHLON_XP; - break; // "athlon-xp" + CPU = "athlon-xp"; + break; } - *Type = X86::AMD_ATHLON; - break; // "athlon" + CPU = "athlon"; + break; case 15: if (testFeature(X86::FEATURE_SSE3)) { - *Type = X86::AMD_K8SSE3; - break; // "k8-sse3" + CPU = "k8-sse3"; + break; } - *Type = X86::AMD_K8; - break; // "k8" + CPU = "k8"; + break; case 16: + CPU = "amdfam10"; *Type = X86::AMDFAM10H; // "amdfam10" switch (Model) { case 2: @@ -974,44 +912,54 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, } break; case 20: + CPU = "btver1"; *Type = X86::AMD_BTVER1; - break; // "btver1"; + break; case 21: + CPU = "bdver1"; *Type = X86::AMDFAM15H; if (Model >= 0x60 && Model <= 0x7f) { + CPU = "bdver4"; *Subtype = X86::AMDFAM15H_BDVER4; - break; // "bdver4"; 60h-7Fh: Excavator + break; // 60h-7Fh: Excavator } if (Model >= 0x30 && Model <= 0x3f) { + CPU = "bdver3"; *Subtype = X86::AMDFAM15H_BDVER3; - break; // "bdver3"; 30h-3Fh: Steamroller + break; // 30h-3Fh: Steamroller } if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { + CPU = "bdver2"; *Subtype = X86::AMDFAM15H_BDVER2; - break; // "bdver2"; 02h, 10h-1Fh: Piledriver + break; // 02h, 10h-1Fh: Piledriver } if (Model <= 0x0f) { *Subtype = X86::AMDFAM15H_BDVER1; - break; // "bdver1"; 00h-0Fh: Bulldozer + break; // 00h-0Fh: Bulldozer } break; case 22: + CPU = "btver2"; *Type = X86::AMD_BTVER2; - break; // "btver2" + break; case 23: + CPU = "znver1"; *Type = X86::AMDFAM17H; if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { + CPU = "znver2"; *Subtype = X86::AMDFAM17H_ZNVER2; - break; // "znver2"; 30h-3fh, 71h: Zen2 + break; // 30h-3fh, 71h: Zen2 } if (Model <= 0x0f) { *Subtype = X86::AMDFAM17H_ZNVER1; - break; // "znver1"; 00h-0Fh: Zen1 + break; // 00h-0Fh: Zen1 } break; default: - break; // "generic" + break; // Unknown AMD CPU. } + + return CPU; } static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, @@ -1161,26 +1109,23 @@ StringRef sys::getHostCPUName() { detectX86FamilyModel(EAX, &Family, &Model); getAvailableFeatures(ECX, EDX, MaxLeaf, Features); + // These aren't consumed in this file, but we try to keep some source code the + // same or similar to compiler-rt. unsigned Type = 0; unsigned Subtype = 0; + StringRef CPU; + if (Vendor == SIG_INTEL) { - getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); + CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type, + &Subtype); } else if (Vendor == SIG_AMD) { - getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); + CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, + &Subtype); } - // Check subtypes first since those are more specific. -#define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \ - if (Subtype == X86::ENUM) \ - return ARCHNAME; -#include "llvm/Support/X86TargetParser.def" - - // Now check types. -#define X86_CPU_TYPE(ARCHNAME, ENUM) \ - if (Type == X86::ENUM) \ - return ARCHNAME; -#include "llvm/Support/X86TargetParser.def" + if (!CPU.empty()) + return CPU; return "generic"; } diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp index be9b541237c74..031384ebaa91c 100644 --- a/llvm/lib/Support/TargetParser.cpp +++ b/llvm/lib/Support/TargetParser.cpp @@ -11,11 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/TargetParser.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/ARMBuildAttributes.h" using namespace llvm; using namespace AMDGPU; @@ -208,3 +209,64 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { default: return {0, 0, 0}; } } + +namespace llvm { +namespace RISCV { + +struct CPUInfo { + StringLiteral Name; + CPUKind Kind; + unsigned Features; + StringLiteral DefaultMarch; + bool is64Bit() const { return (Features & FK_64BIT); } +}; + +constexpr CPUInfo RISCVCPUInfo[] = { +#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH) \ + {NAME, CK_##ENUM, FEATURES, DEFAULT_MARCH}, +#include "llvm/Support/RISCVTargetParser.def" +}; + +bool checkCPUKind(CPUKind Kind, bool IsRV64) { + if (Kind == CK_INVALID) + return false; + return RISCVCPUInfo[static_cast(Kind)].is64Bit() == IsRV64; +} + +CPUKind parseCPUKind(StringRef CPU) { + return llvm::StringSwitch(CPU) +#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH) .Case(NAME, CK_##ENUM) +#include "llvm/Support/RISCVTargetParser.def" + .Default(CK_INVALID); +} + +StringRef getMArchFromMcpu(StringRef CPU) { + CPUKind Kind = parseCPUKind(CPU); + return RISCVCPUInfo[static_cast(Kind)].DefaultMarch; +} + +void fillValidCPUArchList(SmallVectorImpl &Values, bool IsRV64) { + for (const auto &C : RISCVCPUInfo) { + if (C.Kind != CK_INVALID && IsRV64 == C.is64Bit()) + Values.emplace_back(C.Name); + } +} + +// Get all features except standard extension feature +bool getCPUFeaturesExceptStdExt(CPUKind Kind, + std::vector &Features) { + unsigned CPUFeatures = RISCVCPUInfo[static_cast(Kind)].Features; + + if (CPUFeatures == FK_INVALID) + return false; + + if (CPUFeatures & FK_64BIT) + Features.push_back("+64bit"); + else + Features.push_back("-64bit"); + + return true; +} + +} // namespace RISCV +} // namespace llvm diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc index 399a0cc7a25c5..e352beb77616b 100644 --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -957,9 +957,9 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &IT, return EC; // Convert path to the format that Windows is happy with. - if (PathUTF16.size() > 0 && - !is_separator(PathUTF16[Path.size() - 1]) && - PathUTF16[Path.size() - 1] != L':') { + size_t PathUTF16Len = PathUTF16.size(); + if (PathUTF16Len > 0 && !is_separator(PathUTF16[PathUTF16Len - 1]) && + PathUTF16[PathUTF16Len - 1] != L':') { PathUTF16.push_back(L'\\'); PathUTF16.push_back(L'*'); } else { diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 1e39db5a984a6..aa41cae289e8b 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -79,6 +79,6 @@ def shuffle_vector_pseudos : GICombineGroup<[dup, rev, ext, zip, uzp, trn]>; def AArch64PostLegalizerCombinerHelper : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", [erase_undef_store, combines_for_extload, - shuffle_vector_pseudos]> { + sext_already_extended, shuffle_vector_pseudos]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index bd76855f7c644..efa3fd5ca9cef 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1818,10 +1818,8 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( bool CanUseBP = RegInfo->hasBasePointer(MF); if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best. UseFP = PreferFP; - else if (!CanUseBP) { // Can't use BP. Forced to use FP. - assert(!SVEStackSize && "Expected BP to be available"); + else if (!CanUseBP) // Can't use BP. Forced to use FP. UseFP = true; - } // else we can use BP and FP, but the offset from FP won't fit. // That will make us scavenge registers which we can probably avoid by // using BP. If it won't fit for BP either, we'll scavenge anyway. diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 0134d3f2d88a4..9d0a6d9eaf255 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -24,8 +24,9 @@ class AArch64FrameLowering : public TargetFrameLowering { : TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16), true /*StackRealignable*/) {} - void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI) const; + void + emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 65ccc18ed6013..dae347cd8c2b9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -948,7 +948,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::FADD, VT, Custom); + setOperationAction(ISD::FDIV, VT, Custom); setOperationAction(ISD::FMA, VT, Custom); + setOperationAction(ISD::FMUL, VT, Custom); + setOperationAction(ISD::FSUB, VT, Custom); } } @@ -963,12 +967,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, addTypeForFixedLengthSVE(VT); // 64bit results can mean a bigger than NEON input. - for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32}) + for (auto VT : {MVT::v8i8, MVT::v4i16}) setOperationAction(ISD::TRUNCATE, VT, Custom); + setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom); // 128bit results imply a bigger than NEON input. for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) setOperationAction(ISD::TRUNCATE, VT, Custom); + for (auto VT : {MVT::v8f16, MVT::v4f32}) + setOperationAction(ISD::FP_ROUND, VT, Expand); } } @@ -1480,11 +1487,14 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FADD_PRED) MAKE_CASE(AArch64ISD::FADDA_PRED) MAKE_CASE(AArch64ISD::FADDV_PRED) + MAKE_CASE(AArch64ISD::FDIV_PRED) MAKE_CASE(AArch64ISD::FMA_PRED) MAKE_CASE(AArch64ISD::FMAXV_PRED) MAKE_CASE(AArch64ISD::FMAXNMV_PRED) MAKE_CASE(AArch64ISD::FMINV_PRED) MAKE_CASE(AArch64ISD::FMINNMV_PRED) + MAKE_CASE(AArch64ISD::FMUL_PRED) + MAKE_CASE(AArch64ISD::FSUB_PRED) MAKE_CASE(AArch64ISD::NOT) MAKE_CASE(AArch64ISD::BIT) MAKE_CASE(AArch64ISD::CBZ) @@ -2712,13 +2722,19 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { bool IsStrict = Op->isStrictFPOpcode(); SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); - if (SrcVal.getValueType() != MVT::f128) { + EVT SrcVT = SrcVal.getValueType(); + + if (SrcVT != MVT::f128) { + // Expand cases where the input is a vector bigger than NEON. + if (useSVEForFixedLengthVectorVT(SrcVT)) + return SDValue(); + // It's legal except when f128 is involved return Op; } RTLIB::Libcall LC; - LC = RTLIB::getFPROUND(SrcVal.getValueType(), Op.getValueType()); + LC = RTLIB::getFPROUND(SrcVT, Op.getValueType()); // FP_ROUND node has a second operand indicating whether it is known to be // precise. That doesn't take part in the LibCall so we can't directly use @@ -3459,16 +3475,23 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::FADD: - if (useSVEForFixedLengthVectorVT(Op.getValueType())) + if (Op.getValueType().isScalableVector() || + useSVEForFixedLengthVectorVT(Op.getValueType())) return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED); return LowerF128Call(Op, DAG, RTLIB::ADD_F128); case ISD::FSUB: + if (Op.getValueType().isScalableVector()) + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED); return LowerF128Call(Op, DAG, RTLIB::SUB_F128); case ISD::FMUL: + if (Op.getValueType().isScalableVector()) + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED); return LowerF128Call(Op, DAG, RTLIB::MUL_F128); case ISD::FMA: return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED); case ISD::FDIV: + if (Op.getValueType().isScalableVector()) + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED); return LowerF128Call(Op, DAG, RTLIB::DIV_F128); case ISD::FP_ROUND: case ISD::STRICT_FP_ROUND: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 4fe77481706b3..982dbc86d1694 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -75,9 +75,12 @@ enum NodeType : unsigned { // Arithmetic instructions ADD_PRED, FADD_PRED, + FDIV_PRED, + FMA_PRED, + FMUL_PRED, + FSUB_PRED, SDIV_PRED, UDIV_PRED, - FMA_PRED, SMIN_MERGE_OP1, UMIN_MERGE_OP1, SMAX_MERGE_OP1, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 28a54e6f7d79f..1d7b774f2ee43 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -175,7 +175,10 @@ def SDT_AArch64FMA : SDTypeProfile<1, 4, [ // Predicated operations with the result of inactive lanes being unspecified. def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>; def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>; +def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>; def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>; +def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>; +def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>; def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>; def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>; @@ -361,6 +364,9 @@ let Predicates = [HasSVE] in { defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ">; defm FADD_ZPZZ : sve_fp_bin_pred_hfd; + defm FSUB_ZPZZ : sve_fp_bin_pred_hfd; + defm FMUL_ZPZZ : sve_fp_bin_pred_hfd; + defm FDIV_ZPZZ : sve_fp_bin_pred_hfd; let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in { defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; @@ -377,10 +383,10 @@ let Predicates = [HasSVE] in { defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; } - defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>; - defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>; - defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul>; - defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>; + defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>; + defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>; + defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>; + defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>; defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>; defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>; @@ -404,8 +410,14 @@ let Predicates = [HasSVE] in { // regalloc. def : Pat<(nxv8f16 (AArch64fma_p nxv8i1:$P, nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3)), (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>; + def : Pat<(nxv4f16 (AArch64fma_p nxv4i1:$P, nxv4f16:$Op1, nxv4f16:$Op2, nxv4f16:$Op3)), + (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>; + def : Pat<(nxv2f16 (AArch64fma_p nxv2i1:$P, nxv2f16:$Op1, nxv2f16:$Op2, nxv2f16:$Op3)), + (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>; def : Pat<(nxv4f32 (AArch64fma_p nxv4i1:$P, nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3)), (FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>; + def : Pat<(nxv2f32 (AArch64fma_p nxv2i1:$P, nxv2f32:$Op1, nxv2f32:$Op2, nxv2f32:$Op3)), + (FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>; def : Pat<(nxv2f64 (AArch64fma_p nxv2i1:$P, nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3)), (FMLA_ZPmZZ_D $P, $Op3, $Op1, $Op2)>; @@ -1109,6 +1121,20 @@ multiclass sve_prefetch; defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>; + // Extract lo/hi halves of legal predicate types. + def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))), + (ZIP1_PPP_S PPR:$Ps, (PFALSE))>; + def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))), + (ZIP2_PPP_S PPR:$Ps, (PFALSE))>; + def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))), + (ZIP1_PPP_H PPR:$Ps, (PFALSE))>; + def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))), + (ZIP2_PPP_H PPR:$Ps, (PFALSE))>; + def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))), + (ZIP1_PPP_B PPR:$Ps, (PFALSE))>; + def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), + (ZIP2_PPP_B PPR:$Ps, (PFALSE))>; + defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>; defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>; defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>; diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index a63b9a97ada55..b0cef9b66e017 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -453,7 +453,11 @@ void AArch64PassConfig::addIRPasses() { // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) - addPass(createCFGSimplificationPass(1, true, true, false, true)); + addPass(createCFGSimplificationPass(SimplifyCFGOptions() + .forwardSwitchCondToPhi(true) + .convertSwitchToLookupTable(true) + .needCanonicalLoops(false) + .sinkCommonInsts(true))); // Run LoopDataPrefetch // diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 860c9c20044c5..0ac09c4f96f04 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -2844,6 +2844,7 @@ static const struct Extension { {"tlb-rmi", {AArch64::FeatureTLB_RMI}}, {"pan-rwv", {AArch64::FeaturePAN_RWV}}, {"ccpp", {AArch64::FeatureCCPP}}, + {"rcpc", {AArch64::FeatureRCPC}}, {"sve", {AArch64::FeatureSVE}}, {"sve2", {AArch64::FeatureSVE2}}, {"sve2-aes", {AArch64::FeatureSVE2AES}}, diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index a005d1e65abe1..ee36ac0168003 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -340,6 +340,12 @@ class SVE_2_Op_Pat; +class SVE_2_Op_Pred_All_Active +: Pat<(vtd (op (pt (AArch64ptrue 31)), vt1:$Op1, vt2:$Op2)), + (inst $Op1, $Op2)>; + class SVE_2_Op_Pat_Reduce_To_Neon : Pat<(vtd (op vt1:$Op1, vt2:$Op2)), @@ -1665,7 +1671,8 @@ class sve_fp_3op_u_zd sz, bits<3> opc, string asm, ZPRRegOp zprty> let Inst{4-0} = Zd; } -multiclass sve_fp_3op_u_zd opc, string asm, SDPatternOperator op> { +multiclass sve_fp_3op_u_zd opc, string asm, SDPatternOperator op, + SDPatternOperator predicated_op = null_frag> { def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>; def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>; def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>; @@ -1674,6 +1681,9 @@ multiclass sve_fp_3op_u_zd opc, string asm, SDPatternOperator op> { def : SVE_2_Op_Pat(NAME # _S)>; def : SVE_2_Op_Pat(NAME # _D)>; + def : SVE_2_Op_Pred_All_Active(NAME # _H)>; + def : SVE_2_Op_Pred_All_Active(NAME # _S)>; + def : SVE_2_Op_Pred_All_Active(NAME # _D)>; } multiclass sve_fp_3op_u_zd_ftsmul opc, string asm, SDPatternOperator op> { @@ -7804,7 +7814,10 @@ multiclass sve_fp_bin_pred_hfd { def _UNDEF_D : PredTwoOpPseudo; def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_H)>; def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; + def : SVE_3_Op_Pat(NAME # _UNDEF_S)>; def : SVE_3_Op_Pat(NAME # _UNDEF_D)>; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index aaf448346b533..5539f4e8699db 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -994,7 +994,7 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { static const unsigned OpcMap[2][2][2] = { {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32}, - {AMDGPU::V_SUB_I32_e32, AMDGPU::V_ADD_I32_e32}}, + {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}}, {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32}, {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}}; @@ -1073,7 +1073,7 @@ void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { } if (IsVALU) { - unsigned Opc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; + unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64; CurDAG->SelectNodeTo( N, Opc, N->getVTList(), @@ -1190,7 +1190,7 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, Opnds.push_back(Addr.getOperand(1)); // FIXME: Select to VOP3 version for with-carry. - unsigned SubOp = AMDGPU::V_SUB_I32_e32; + unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32; if (Subtarget->hasAddNoCarry()) { SubOp = AMDGPU::V_SUB_U32_e64; Opnds.push_back( @@ -1269,7 +1269,7 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, SmallVector Opnds; Opnds.push_back(Zero); Opnds.push_back(Addr.getOperand(1)); - unsigned SubOp = AMDGPU::V_SUB_I32_e32; + unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32; if (Subtarget->hasAddNoCarry()) { SubOp = AMDGPU::V_SUB_U32_e64; Opnds.push_back( @@ -1688,33 +1688,27 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, } else { // If the offset doesn't fit, put the low bits into the offset field and // add the rest. + // + // For a FLAT instruction the hardware decides whether to access + // global/scratch/shared memory based on the high bits of vaddr, + // ignoring the offset field, so we have to ensure that when we add + // remainder to vaddr it still points into the same underlying object. + // The easiest way to do that is to make sure that we split the offset + // into two pieces that are both >= 0 or both <= 0. SDLoc DL(N); - uint64_t ImmField; + uint64_t RemainderOffset = COffsetVal; + uint64_t ImmField = 0; const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned); if (IsSigned) { - ImmField = SignExtend64(COffsetVal, NumBits); - - // Don't use a negative offset field if the base offset is positive. - // Since the scheduler currently relies on the offset field, doing so - // could result in strange scheduling decisions. - - // TODO: Should we not do this in the opposite direction as well? - if (static_cast(COffsetVal) > 0) { - if (static_cast(ImmField) < 0) { - const uint64_t OffsetMask = - maskTrailingOnes(NumBits - 1); - ImmField = COffsetVal & OffsetMask; - } - } - } else { - // TODO: Should we do this for a negative offset? - const uint64_t OffsetMask = maskTrailingOnes(NumBits); - ImmField = COffsetVal & OffsetMask; + // Use signed division by a power of two to truncate towards 0. + int64_t D = 1LL << (NumBits - 1); + RemainderOffset = (static_cast(COffsetVal) / D) * D; + ImmField = COffsetVal - RemainderOffset; + } else if (static_cast(COffsetVal) >= 0) { + ImmField = COffsetVal & maskTrailingOnes(NumBits); + RemainderOffset = COffsetVal - ImmField; } - - uint64_t RemainderOffset = COffsetVal - ImmField; - assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned)); assert(RemainderOffset + ImmField == COffsetVal); @@ -1739,7 +1733,7 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); SDNode *Add = - CurDAG->getMachineNode(AMDGPU::V_ADD_I32_e64, DL, VTs, + CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs, {AddOffsetLo, SDValue(N0Lo, 0), Clamp}); SDNode *Addc = CurDAG->getMachineNode( diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 9f49136c986f9..940ec6f31c698 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3462,24 +3462,24 @@ SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue C ISD::CondCode CCOpcode = cast(Cond.getOperand(2))->get(); SDValue CmpLHS = Cond.getOperand(0); - unsigned Opc = isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : - AMDGPUISD::FFBH_U32; - // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x // select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x if (CCOpcode == ISD::SETEQ && (isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) && - RHS.getOperand(0) == CmpLHS && - isNegativeOne(LHS)) { + RHS.getOperand(0) == CmpLHS && isNegativeOne(LHS)) { + unsigned Opc = + isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32; return getFFBX_U32(DAG, CmpLHS, SL, Opc); } // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x // select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x if (CCOpcode == ISD::SETNE && - (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) && - LHS.getOperand(0) == CmpLHS && - isNegativeOne(RHS)) { + (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(LHS.getOpcode())) && + LHS.getOperand(0) == CmpLHS && isNegativeOne(RHS)) { + unsigned Opc = + isCttzOpc(LHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32; + return getFFBX_U32(DAG, CmpLHS, SL, Opc); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index c3d5e78964c87..74e6f0c438b2d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -365,7 +365,7 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64; + const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64; Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass()); MachineInstr *Add @@ -403,7 +403,7 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { } else { const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass(); Register CarryReg = MRI->createVirtualRegister(CarryRC); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo) + BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo) .addDef(CarryReg) .add(Lo1) .add(Lo2) @@ -449,7 +449,7 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE( // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned // carry out despite the _i32 name. These were renamed in VI to _U32. // FIXME: We should probably rename the opcodes here. - unsigned NoCarryOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; + unsigned NoCarryOpc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64; unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64; I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc)); I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); @@ -891,6 +891,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { return selectDivScale(I); case Intrinsic::amdgcn_icmp: return selectIntrinsicIcmp(I); + case Intrinsic::amdgcn_ballot: + return selectBallot(I); default: return selectImpl(I, *CoverageInfo); } @@ -1039,6 +1041,38 @@ bool AMDGPUInstructionSelector::selectIntrinsicIcmp(MachineInstr &I) const { return Ret; } +bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + const DebugLoc &DL = I.getDebugLoc(); + Register DstReg = I.getOperand(0).getReg(); + const unsigned Size = MRI->getType(DstReg).getSizeInBits(); + const bool Is64 = Size == 64; + + if (Size != STI.getWavefrontSize()) + return false; + + Optional Arg = + getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI, true); + + if (Arg.hasValue()) { + const int64_t Value = Arg.getValue().Value; + if (Value == 0) { + unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; + BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg).addImm(0); + } else if (Value == -1) { // all ones + Register SrcReg = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO; + BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(SrcReg); + } else + return false; + } else { + Register SrcReg = I.getOperand(2).getReg(); + BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(SrcReg); + } + + I.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const { // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick // SelectionDAG uses for wave32 vs wave64. @@ -2773,6 +2807,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { return selectG_PTR_ADD(I); case TargetOpcode::G_IMPLICIT_DEF: return selectG_IMPLICIT_DEF(I); + case TargetOpcode::G_FREEZE: + return selectCOPY(I); case TargetOpcode::G_INSERT: return selectG_INSERT(I); case TargetOpcode::G_INTRINSIC: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index f8a8b5db4b556..1fe80958917d6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -107,6 +107,7 @@ class AMDGPUInstructionSelector : public InstructionSelector { bool selectInterpP1F16(MachineInstr &MI) const; bool selectDivScale(MachineInstr &MI) const; bool selectIntrinsicIcmp(MachineInstr &MI) const; + bool selectBallot(MachineInstr &I) const; bool selectG_INTRINSIC(MachineInstr &I) const; bool selectEndCfIntrinsic(MachineInstr &MI) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 0802f2a2d08a7..92ff345cd78c4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -648,6 +648,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (ST.has16BitInsts()) IToFP.legalFor({{S16, S16}}); IToFP.clampScalar(1, S32, S64) + .minScalar(0, S32) .scalarize(0) .widenScalarToNextPow2(1); @@ -1427,6 +1428,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, SextInReg.lowerFor({{S32}, {S64}}); } + // FIXME: Placeholder rule. Really depends on whether the clamp modifier is + // available, and is selectively legal for s16, s32, v2s16. + getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT, G_UADDSAT, G_USUBSAT}) + .scalarize(0) + .clampScalar(0, S16, S32); + SextInReg .scalarize(0) .clampScalar(0, S32, S64) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 56bc0c44779d8..be4f3e1be0386 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2989,6 +2989,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( constrainOpWithReadfirstlane(MI, MRI, 3); // Index return; } + case Intrinsic::amdgcn_ballot: case Intrinsic::amdgcn_interp_p1: case Intrinsic::amdgcn_interp_p2: case Intrinsic::amdgcn_interp_mov: @@ -3360,7 +3361,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - if (MI.isCopy()) { + if (MI.isCopy() || MI.getOpcode() == AMDGPU::G_FREEZE) { // The default logic bothers to analyze impossible alternative mappings. We // want the most straightforward mapping, so just directly handle this. const RegisterBank *DstBank = getRegBank(MI.getOperand(0).getReg(), MRI, @@ -3376,9 +3377,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { return getInvalidInstructionMapping(); const ValueMapping &ValMap = getValueMapping(0, Size, *DstBank); + unsigned OpdsMappingSize = MI.isCopy() ? 1 : 2; + SmallVector OpdsMapping(OpdsMappingSize); + OpdsMapping[0] = &ValMap; + if (MI.getOpcode() == AMDGPU::G_FREEZE) + OpdsMapping[1] = &ValMap; + return getInstructionMapping( 1, /*Cost*/ 1, - /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1); + /*OperandsMapping*/ getOperandsMapping(OpdsMapping), OpdsMappingSize); } if (MI.isRegSequence()) { @@ -4160,6 +4167,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32); break; } + case Intrinsic::amdgcn_ballot: { + unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize); + OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize); + break; + } } break; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 2849645863a57..213788ae0f67b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -411,11 +411,8 @@ std::pair AMDGPUSubtarget::getWavesPerEU( unsigned MinImpliedByFlatWorkGroupSize = getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second); Default.first = MinImpliedByFlatWorkGroupSize; - bool RequestedFlatWorkGroupSize = false; - - if (F.hasFnAttribute("amdgpu-flat-work-group-size")) { - RequestedFlatWorkGroupSize = true; - } + bool RequestedFlatWorkGroupSize = + F.hasFnAttribute("amdgpu-flat-work-group-size"); // Requested minimum/maximum number of waves per execution unit. std::pair Requested = AMDGPU::getIntegerPairAttribute( @@ -427,9 +424,7 @@ std::pair AMDGPUSubtarget::getWavesPerEU( // Make sure requested values do not violate subtarget's specifications. if (Requested.first < getMinWavesPerEU() || - Requested.first > getMaxWavesPerEU()) - return Default; - if (Requested.second > getMaxWavesPerEU()) + Requested.second > getMaxWavesPerEU()) return Default; // Make sure requested values are compatible with values implied by requested diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index c833bfbcf9366..6b23830491235 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -1041,10 +1041,6 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo, return CIInsts; } - bool hasSMovFedHazard() const { - return getGeneration() == AMDGPUSubtarget::GFX9; - } - bool hasReadM0MovRelInterpHazard() const { return getGeneration() == AMDGPUSubtarget::GFX9; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 8604f5005eb2b..b4b10835837cd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -787,10 +787,15 @@ void AMDGPUPassConfig::addCodeGenPrepare() { if (EnableLoadStoreVectorizer) addPass(createLoadStoreVectorizerPass()); + + // LowerSwitch pass may introduce unreachable blocks that can + // cause unexpected behavior for subsequent passes. Placing it + // here seems better that these blocks would get cleaned up by + // UnreachableBlockElim inserted next in the pass flow. + addPass(createLowerSwitchPass()); } bool AMDGPUPassConfig::addPreISel() { - addPass(createLowerSwitchPass()); addPass(createFlattenCFGPass()); return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 542a5f006c0f7..9ca851c4d7463 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -452,8 +452,8 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, // implementation tries to generate legalize and scalarization costs. Maybe // we could hoist the scalarization code here? return BaseT::getArithmeticInstrCost(Opcode, Ty, TTI::TCK_RecipThroughput, - Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo); + Opd1Info, Opd2Info, Opd1PropInfo, + Opd2PropInfo, Args, CxtI); } // Legalize the type. @@ -506,9 +506,20 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, // i32 return QuarterRateCost * NElts * LT.first; } + case ISD::FMUL: + // Check possible fuse {fadd|fsub}(a,fmul(b,c)) and return zero cost for + // fmul(b,c) supposing the fadd|fsub will get estimated cost for the whole + // fused operation. + if (!HasFP32Denormals && SLT == MVT::f32 && CxtI && CxtI->hasOneUse()) + if (const auto *FAdd = dyn_cast(*CxtI->user_begin())) { + const int OPC = TLI->InstructionOpcodeToISD(FAdd->getOpcode()); + if (OPC == ISD::FADD || OPC == ISD::FSUB) { + return TargetTransformInfo::TCC_Free; + } + } + LLVM_FALLTHROUGH; case ISD::FADD: case ISD::FSUB: - case ISD::FMUL: if (SLT == MVT::f64) return LT.first * NElts * get64BitInstrCost(); @@ -568,9 +579,8 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, break; } - return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, - Opd2Info, - Opd1PropInfo, Opd2PropInfo); + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo, Args, CxtI); } // Return true if there's a potential benefit from using v2f16 instructions for diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp index 418296684d765..3c375e0575255 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -187,7 +187,7 @@ static BasicBlock *unifyReturnBlockSet(Function &F, for (BasicBlock *BB : ReturningBlocks) { // Cleanup possible branch to unconditional branch to the return. - simplifyCFG(BB, TTI, {2}); + simplifyCFG(BB, TTI, SimplifyCFGOptions().bonusInstThreshold(2)); } return NewRetBlock; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 013b7a0cf25d1..9f3a6ffc35e6f 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -339,7 +339,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { bool isSWZ() const { return isImmTy(ImmTySWZ); } bool isTFE() const { return isImmTy(ImmTyTFE); } bool isD16() const { return isImmTy(ImmTyD16); } - bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } + bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } @@ -1295,7 +1295,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); - OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); + OperandMatchResultTy parseDfmtNfmt(int64_t &Format); + OperandMatchResultTy parseUfmt(int64_t &Format); + OperandMatchResultTy parseFORMAT(OperandVector &Operands); + bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } @@ -3239,8 +3242,8 @@ static bool IsRevOpcode(const unsigned Opcode) case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: case AMDGPU::V_SUBREV_F32_e64_vi: - case AMDGPU::V_SUBREV_I32_e32: - case AMDGPU::V_SUBREV_I32_e64: + case AMDGPU::V_SUBREV_CO_U32_e32: + case AMDGPU::V_SUBREV_CO_U32_e64: case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: @@ -4870,50 +4873,96 @@ AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { return MatchOperand_Success; } +//===----------------------------------------------------------------------===// +// MTBUF format +//===----------------------------------------------------------------------===// + +bool AMDGPUAsmParser::tryParseFmt(const char *Pref, + int64_t MaxVal, + int64_t &Fmt) { + int64_t Val; + SMLoc Loc = getLoc(); + + auto Res = parseIntWithPrefix(Pref, Val); + if (Res == MatchOperand_ParseFail) + return false; + if (Res == MatchOperand_NoMatch) + return true; + + if (Val < 0 || Val > MaxVal) { + Error(Loc, Twine("out of range ", StringRef(Pref))); + return false; + } + + Fmt = Val; + return true; +} + // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their // values to live in a joint format operand in the MCInst encoding. OperandMatchResultTy -AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { - SMLoc S = Parser.getTok().getLoc(); - int64_t Dfmt = 0, Nfmt = 0; +AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { + using namespace llvm::AMDGPU::MTBUFFormat; + + int64_t Dfmt = DFMT_UNDEF; + int64_t Nfmt = NFMT_UNDEF; + // dfmt and nfmt can appear in either order, and each is optional. - bool GotDfmt = false, GotNfmt = false; - while (!GotDfmt || !GotNfmt) { - if (!GotDfmt) { - auto Res = parseIntWithPrefix("dfmt", Dfmt); - if (Res != MatchOperand_NoMatch) { - if (Res != MatchOperand_Success) - return Res; - if (Dfmt >= 16) { - Error(Parser.getTok().getLoc(), "out of range dfmt"); - return MatchOperand_ParseFail; - } - GotDfmt = true; - Parser.Lex(); - continue; - } + for (int I = 0; I < 2; ++I) { + if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) + return MatchOperand_ParseFail; + + if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { + return MatchOperand_ParseFail; } - if (!GotNfmt) { - auto Res = parseIntWithPrefix("nfmt", Nfmt); - if (Res != MatchOperand_NoMatch) { - if (Res != MatchOperand_Success) - return Res; - if (Nfmt >= 8) { - Error(Parser.getTok().getLoc(), "out of range nfmt"); - return MatchOperand_ParseFail; - } - GotNfmt = true; - Parser.Lex(); - continue; - } + // Skip optional comma between dfmt/nfmt + // but guard against 2 commas following each other. + if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && + !peekToken().is(AsmToken::Comma)) { + trySkipToken(AsmToken::Comma); } - break; } - if (!GotDfmt && !GotNfmt) + + if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) return MatchOperand_NoMatch; - auto Format = Dfmt | Nfmt << 4; + + Dfmt = (Dfmt == DFMT_UNDEF)? DFMT_DEFAULT : Dfmt; + Nfmt = (Nfmt == NFMT_UNDEF)? NFMT_DEFAULT : Nfmt; + + Format = encodeDfmtNfmt(Dfmt, Nfmt); + return MatchOperand_Success; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseUfmt(int64_t &Format) { + using namespace llvm::AMDGPU::MTBUFFormat; + + int64_t Fmt = UFMT_UNDEF; + + if (!tryParseFmt("format", UFMT_MAX, Fmt)) + return MatchOperand_ParseFail; + + if (Fmt == UFMT_UNDEF) + return MatchOperand_NoMatch; + + Format = Fmt; + return MatchOperand_Success; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { + using namespace llvm::AMDGPU::MTBUFFormat; + + int64_t Format = isGFX10() ? UFMT_DEFAULT : DFMT_NFMT_DEFAULT; + OperandMatchResultTy Res; + SMLoc Loc = getLoc(); + + Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format); + if (Res == MatchOperand_ParseFail) + return Res; + Operands.push_back( - AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); + AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); return MatchOperand_Success; } @@ -6242,7 +6291,6 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, - {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, @@ -6327,8 +6375,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) Op.ConvertResult); } else if (Op.Type == AMDGPUOperand::ImmTyDim) { res = parseDim(Operands); - } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { - res = parseDfmtNfmt(Operands); } else { res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); } diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index fa42ddc54b565..370e9db9e83e9 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -168,15 +168,15 @@ class getMTBUFIns vdataList=[]> { class getMTBUFAsmOps { string Pfx = - !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $format, $soffset", + !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc,$format $soffset", !if(!eq(addrKind, BUFAddrKind.OffEn), - "$vaddr, $srsrc, $format, $soffset offen", + "$vaddr, $srsrc,$format $soffset offen", !if(!eq(addrKind, BUFAddrKind.IdxEn), - "$vaddr, $srsrc, $format, $soffset idxen", + "$vaddr, $srsrc,$format $soffset idxen", !if(!eq(addrKind, BUFAddrKind.BothEn), - "$vaddr, $srsrc, $format, $soffset idxen offen", + "$vaddr, $srsrc,$format $soffset idxen offen", !if(!eq(addrKind, BUFAddrKind.Addr64), - "$vaddr, $srsrc, $format, $soffset addr64", + "$vaddr, $srsrc,$format $soffset addr64", ""))))); string ret = Pfx # "$offset"; } diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index 719a968b83147..10a74bf4c2f7d 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -274,14 +274,14 @@ static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) { default: break; case AMDGPU::V_ADD_U32_e32: case AMDGPU::V_ADD_U32_e64: - case AMDGPU::V_ADD_I32_e32: - case AMDGPU::V_ADD_I32_e64: + case AMDGPU::V_ADD_CO_U32_e32: + case AMDGPU::V_ADD_CO_U32_e64: case AMDGPU::V_OR_B32_e32: case AMDGPU::V_OR_B32_e64: case AMDGPU::V_SUBREV_U32_e32: case AMDGPU::V_SUBREV_U32_e64: - case AMDGPU::V_SUBREV_I32_e32: - case AMDGPU::V_SUBREV_I32_e64: + case AMDGPU::V_SUBREV_CO_U32_e32: + case AMDGPU::V_SUBREV_CO_U32_e64: case AMDGPU::V_MAX_U32_e32: case AMDGPU::V_MAX_U32_e64: case AMDGPU::V_XOR_B32_e32: diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 8482dbfec250b..8b446a4342986 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -191,9 +191,6 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0) return NoopHazard; - if (checkAnyInstHazards(MI) > 0) - return NoopHazard; - return NoHazard; } @@ -241,7 +238,7 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) { if (MI->isBundle()) return 0; - int WaitStates = std::max(0, checkAnyInstHazards(MI)); + int WaitStates = 0; if (SIInstrInfo::isSMRD(*MI)) return std::max(WaitStates, checkSMRDHazards(MI)); @@ -821,34 +818,6 @@ int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) { return RFEWaitStates - WaitStatesNeeded; } -int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) { - if (MI->isDebugInstr()) - return 0; - - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - if (!ST.hasSMovFedHazard()) - return 0; - - // Check for any instruction reading an SGPR after a write from - // s_mov_fed_b32. - int MovFedWaitStates = 1; - int WaitStatesNeeded = 0; - - for (const MachineOperand &Use : MI->uses()) { - if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg())) - continue; - auto IsHazardFn = [] (MachineInstr *MI) { - return MI->getOpcode() == AMDGPU::S_MOV_FED_B32; - }; - int WaitStatesNeededForUse = - MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn, - MovFedWaitStates); - WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); - } - - return WaitStatesNeeded; -} - int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) { const SIInstrInfo *TII = ST.getInstrInfo(); const int SMovRelWaitStates = 1; @@ -930,10 +899,12 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) { return false; }; - auto IsExpiredFn = [] (MachineInstr *MI, int) { + auto IsExpiredFn = [](MachineInstr *MI, int) { return MI && (SIInstrInfo::isVALU(*MI) || (MI->getOpcode() == AMDGPU::S_WAITCNT && - !MI->getOperand(0).getImm())); + !MI->getOperand(0).getImm()) || + (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && + MI->getOperand(0).getImm() == 0xffe3)); }; if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == @@ -941,7 +912,9 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) { return false; const SIInstrInfo *TII = ST.getInstrInfo(); - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32)); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(0xffe3); return true; } diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index cd17f2755bd10..59d6d4f90755a 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -83,7 +83,6 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { int checkRWLaneHazards(MachineInstr *RWLane); int checkRFEHazards(MachineInstr *RFE); int checkInlineAsmHazards(MachineInstr *IA); - int checkAnyInstHazards(MachineInstr *MI); int checkReadM0Hazards(MachineInstr *SMovRel); int checkNSAtoVMEMHazard(MachineInstr *MI); int checkFPAtomicToDenormModeHazard(MachineInstr *MI); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index fe063d33ea3e0..00bf404dea279 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -299,14 +299,23 @@ void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - if (unsigned Val = MI->getOperand(OpNo).getImm()) { - if (AMDGPU::isGFX10(STI)) - O << " format:" << Val; - else { - O << " dfmt:" << (Val & 15); - O << ", nfmt:" << (Val >> 4); - } + using namespace llvm::AMDGPU::MTBUFFormat; + + unsigned Val = MI->getOperand(OpNo).getImm(); + if (AMDGPU::isGFX10(STI)) { + if (Val == UFMT_DEFAULT) + return; + O << " format:" << Val; + } else { + if (Val == DFMT_NFMT_DEFAULT) + return; + unsigned Dfmt; + unsigned Nfmt; + decodeDfmtNfmt(Val, Dfmt, Nfmt); + O << " dfmt:" << Dfmt; + O << ", nfmt:" << Nfmt; } + O << ','; } void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 2f1f4e7a03928..f614705730501 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -47,7 +47,7 @@ class R600MCCodeEmitter : public MCCodeEmitter { /// Encode the instruction and write it to the OS. void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + const MCSubtargetInfo &STI) const override; /// \returns the encoding for an MCOperand. uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 4f7d255eb450a..9c9dd66a4a79c 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -392,6 +392,48 @@ enum ModeRegisterMasks : uint32_t { } // namespace Hwreg +namespace MTBUFFormat { + +enum DataFormat { + DFMT_MAX = 15, + + DFMT_UNDEF = -1, + DFMT_DEFAULT = 1, + + DFMT_SHIFT = 0, + DFMT_MASK = DFMT_MAX +}; + +enum NumFormat { + NFMT_MAX = 7, + + NFMT_UNDEF = -1, + NFMT_DEFAULT = 0, + + NFMT_SHIFT = 4, + NFMT_MASK = NFMT_MAX +}; + +enum MergedFormat { + DFMT_NFMT_UNDEF = -1, + DFMT_NFMT_DEFAULT = ((DFMT_DEFAULT & DFMT_MASK) << DFMT_SHIFT) | + ((NFMT_DEFAULT & NFMT_MASK) << NFMT_SHIFT), + + + DFMT_NFMT_MASK = (DFMT_MASK << DFMT_SHIFT) | (NFMT_MASK << NFMT_SHIFT), + + DFMT_NFMT_MAX = DFMT_NFMT_MASK +}; + +enum UnifiedFormat { + UFMT_MAX = 127, + + UFMT_UNDEF = -1, + UFMT_DEFAULT = 1 +}; + +} // namespace MTBUFFormat + namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32. enum Id : unsigned { // id of symbolic names diff --git a/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp b/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp index 8e3402b537b3b..abde092c7b84a 100644 --- a/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp @@ -13,7 +13,7 @@ /// and decompose it into a base and index. /// /// Transform: -/// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32 +/// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_CO_U32_e64 %21:sgpr_32, %22:vgpr_32 /// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32, /// %24:vgpr_32, %19:sreg_64_xexec /// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1 @@ -106,7 +106,7 @@ static bool findSRegBaseAndIndex(MachineOperand *Op, Worklist.push_back(&DefInst->getOperand(1)); Worklist.push_back(&DefInst->getOperand(3)); break; - case AMDGPU::V_ADD_I32_e64: + case AMDGPU::V_ADD_CO_U32_e64: // The V_ADD_* and its analogous V_ADDCV_* are generated by // a previous pass which lowered from an ADD_64_PSEUDO, // which generates subregs to break up the 64 bit args. diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index ffcf4c30bc70d..0986e1efb9840 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -399,9 +399,9 @@ static bool tryAddToFoldList(SmallVectorImpl &FoldList, return false; if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) { - if ((Opc == AMDGPU::V_ADD_I32_e64 || - Opc == AMDGPU::V_SUB_I32_e64 || - Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME + if ((Opc == AMDGPU::V_ADD_CO_U32_e64 || + Opc == AMDGPU::V_SUB_CO_U32_e64 || + Opc == AMDGPU::V_SUBREV_CO_U32_e64) && // FIXME (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) { MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d035aa8f72bd7..c2666f3302151 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -449,6 +449,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, if (Subtarget->has16BitInsts()) { setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FPOWI, MVT::f16, Promote); setOperationAction(ISD::FLOG, MVT::f16, Custom); setOperationAction(ISD::FEXP, MVT::f16, Custom); setOperationAction(ISD::FLOG10, MVT::f16, Custom); @@ -3849,7 +3850,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( MachineOperand SrcReg1Sub1 = TII->buildExtractSubRegOrImm( MI, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC); - unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; + unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64; MachineInstr *LoHalf = BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0) .addReg(CarryReg, RegState::Define) .add(SrcReg0Sub0) @@ -4111,9 +4112,9 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( MI.eraseFromParent(); return BB; } - case AMDGPU::V_ADD_I32_e32: - case AMDGPU::V_SUB_I32_e32: - case AMDGPU::V_SUBREV_I32_e32: { + case AMDGPU::V_ADD_CO_U32_e32: + case AMDGPU::V_SUB_CO_U32_e32: + case AMDGPU::V_SUBREV_CO_U32_e32: { // TODO: Define distinct V_*_I32_Pseudo instructions instead. const DebugLoc &DL = MI.getDebugLoc(); unsigned Opc = MI.getOpcode(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9af8ffedce0f3..5d7cd5ffc4ce7 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -474,27 +474,65 @@ bool SIInstrInfo::shouldClusterMemOps(ArrayRef BaseOps1, ArrayRef BaseOps2, unsigned NumLoads, unsigned NumBytes) const { - // If current mem ops pair do not have same base pointer, then they cannot be - // clustered. assert(!BaseOps1.empty() && !BaseOps2.empty()); const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); + if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2)) return false; - // Compute max cluster size based on average number bytes clustered till now, - // and decide based on it, if current mem ops pair can be clustered or not. - assert((NumLoads > 0) && (NumBytes > 0) && (NumBytes >= NumLoads) && - "Invalid NumLoads/NumBytes values"); - unsigned MaxNumLoads; - if (NumBytes <= 4 * NumLoads) { - // Loads are dword or smaller (on average). - MaxNumLoads = 5; - } else { - // Loads are bigger than a dword (on average). - MaxNumLoads = 4; - } - return NumLoads <= MaxNumLoads; + const MachineOperand *FirstDst = nullptr; + const MachineOperand *SecondDst = nullptr; + + if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) || + (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) || + (isMIMG(FirstLdSt) && isMIMG(SecondLdSt)) || + (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) { + const unsigned MaxGlobalLoadCluster = 7; + if (NumLoads > MaxGlobalLoadCluster) + return false; + + FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata); + if (!FirstDst) + FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst); + SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata); + if (!SecondDst) + SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst); + } else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) { + FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst); + SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst); + } else if (isDS(FirstLdSt) && isDS(SecondLdSt)) { + FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst); + SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst); + } + + if (!FirstDst || !SecondDst) + return false; + + // Try to limit clustering based on the total number of bytes loaded + // rather than the number of instructions. This is done to help reduce + // register pressure. The method used is somewhat inexact, though, + // because it assumes that all loads in the cluster will load the + // same number of bytes as FirstLdSt. + + // The unit of this value is bytes. + // FIXME: This needs finer tuning. + unsigned LoadClusterThreshold = 16; + + const MachineRegisterInfo &MRI = + FirstLdSt.getParent()->getParent()->getRegInfo(); + + const Register Reg = FirstDst->getReg(); + + const TargetRegisterClass *DstRC = Register::isVirtualRegister(Reg) + ? MRI.getRegClass(Reg) + : RI.getPhysRegClass(Reg); + + // FIXME: NumLoads should not be subtracted 1. This is to match behavior + // of clusterNeighboringMemOps which was previosly passing cluster length + // less 1. LoadClusterThreshold should be tuned instead. + return ((NumLoads - 1) * (RI.getRegSizeInBits(*DstRC) / 8)) <= + LoadClusterThreshold; } // FIXME: This behaves strangely. If, for example, you have 32 load + stores, @@ -533,6 +571,80 @@ static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); } +/// Handle copying from SGPR to AGPR, or from AGPR to AGPR. It is not possible +/// to directly copy, so an intermediate VGPR needs to be used. +static void indirectCopyToAGPR(const SIInstrInfo &TII, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const DebugLoc &DL, MCRegister DestReg, + MCRegister SrcReg, bool KillSrc, + RegScavenger &RS) { + const SIRegisterInfo &RI = TII.getRegisterInfo(); + + assert(AMDGPU::SReg_32RegClass.contains(SrcReg) || + AMDGPU::AGPR_32RegClass.contains(SrcReg)); + + // First try to find defining accvgpr_write to avoid temporary registers. + for (auto Def = MI, E = MBB.begin(); Def != E; ) { + --Def; + if (!Def->definesRegister(SrcReg, &RI)) + continue; + if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32) + break; + + MachineOperand &DefOp = Def->getOperand(1); + assert(DefOp.isReg() || DefOp.isImm()); + + if (DefOp.isReg()) { + // Check that register source operand if not clobbered before MI. + // Immediate operands are always safe to propagate. + bool SafeToPropagate = true; + for (auto I = Def; I != MI && SafeToPropagate; ++I) + if (I->modifiesRegister(DefOp.getReg(), &RI)) + SafeToPropagate = false; + + if (!SafeToPropagate) + break; + + DefOp.setIsKill(false); + } + + BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg) + .add(DefOp); + return; + } + + RS.enterBasicBlock(MBB); + RS.forward(MI); + + // Ideally we want to have three registers for a long reg_sequence copy + // to hide 2 waitstates between v_mov_b32 and accvgpr_write. + unsigned MaxVGPRs = RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass, + *MBB.getParent()); + + // Registers in the sequence are allocated contiguously so we can just + // use register number to pick one of three round-robin temps. + unsigned RegNo = DestReg % 3; + Register Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0); + if (!Tmp) + report_fatal_error("Cannot scavenge VGPR to copy to AGPR"); + RS.setRegUsed(Tmp); + // Only loop through if there are any free registers left, otherwise + // scavenger may report a fatal error without emergency spill slot + // or spill with the slot. + while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) { + Register Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0); + if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs) + break; + Tmp = Tmp2; + RS.setRegUsed(Tmp); + } + + TII.copyPhysReg(MBB, MI, DL, Tmp, SrcReg, KillSrc); + BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg) + .addReg(Tmp, RegState::Kill); +} + void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, @@ -652,75 +764,18 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (RC == &AMDGPU::AGPR_32RegClass) { - assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || - AMDGPU::SReg_32RegClass.contains(SrcReg) || - AMDGPU::AGPR_32RegClass.contains(SrcReg)); - if (!AMDGPU::VGPR_32RegClass.contains(SrcReg)) { - // First try to find defining accvgpr_write to avoid temporary registers. - for (auto Def = MI, E = MBB.begin(); Def != E; ) { - --Def; - if (!Def->definesRegister(SrcReg, &RI)) - continue; - if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32) - break; - - MachineOperand &DefOp = Def->getOperand(1); - assert(DefOp.isReg() || DefOp.isImm()); - - if (DefOp.isReg()) { - // Check that register source operand if not clobbered before MI. - // Immediate operands are always safe to propagate. - bool SafeToPropagate = true; - for (auto I = Def; I != MI && SafeToPropagate; ++I) - if (I->modifiesRegister(DefOp.getReg(), &RI)) - SafeToPropagate = false; - - if (!SafeToPropagate) - break; - - DefOp.setIsKill(false); - } - - BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg) - .add(DefOp); - return; - } - RegScavenger RS; - RS.enterBasicBlock(MBB); - RS.forward(MI); - - // Ideally we want to have three registers for a long reg_sequence copy - // to hide 2 waitstates between v_mov_b32 and accvgpr_write. - unsigned MaxVGPRs = RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass, - *MBB.getParent()); - - // Registers in the sequence are allocated contiguously so we can just - // use register number to pick one of three round-robin temps. - unsigned RegNo = DestReg % 3; - Register Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0); - if (!Tmp) - report_fatal_error("Cannot scavenge VGPR to copy to AGPR"); - RS.setRegUsed(Tmp); - // Only loop through if there are any free registers left, otherwise - // scavenger may report a fatal error without emergency spill slot - // or spill with the slot. - while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) { - unsigned Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0); - if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs) - break; - Tmp = Tmp2; - RS.setRegUsed(Tmp); - } - copyPhysReg(MBB, MI, DL, Tmp, SrcReg, KillSrc); + if (RC == &AMDGPU::AGPR_32RegClass) { + if (AMDGPU::VGPR_32RegClass.contains(SrcReg)) { BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg) - .addReg(Tmp, RegState::Kill); + .addReg(SrcReg, getKillRegState(KillSrc)); return; } - BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + // FIXME: Pass should maintain scavenger to avoid scan through the block on + // every AGPR spill. + RegScavenger RS; + indirectCopyToAGPR(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RS); return; } @@ -4079,17 +4134,17 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; } case AMDGPU::S_ADD_I32: - return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_I32_e32; + return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32; case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; case AMDGPU::S_SUB_I32: - return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; + return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32; // FIXME: These are not consistently handled, and selected when the carry is // used. case AMDGPU::S_ADD_U32: - return AMDGPU::V_ADD_I32_e32; + return AMDGPU::V_ADD_CO_U32_e32; case AMDGPU::S_SUB_U32: - return AMDGPU::V_SUB_I32_e32; + return AMDGPU::V_SUB_CO_U32_e32; case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_U32; case AMDGPU::S_MUL_HI_U32: return AMDGPU::V_MUL_HI_U32; @@ -5046,7 +5101,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, // NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0 const DebugLoc &DL = MI.getDebugLoc(); - BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e64), NewVAddrLo) + BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_CO_U32_e64), NewVAddrLo) .addDef(CondReg0) .addReg(RsrcPtr, 0, AMDGPU::sub0) .addReg(VAddr->getReg(), 0, AMDGPU::sub0) @@ -5376,8 +5431,8 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst, MachineOperand &Src1 = Inst.getOperand(3); unsigned Opc = (Inst.getOpcode() == AMDGPU::S_UADDO_PSEUDO) - ? AMDGPU::V_ADD_I32_e64 - : AMDGPU::V_SUB_I32_e64; + ? AMDGPU::V_ADD_CO_U32_e64 + : AMDGPU::V_SUB_CO_U32_e64; const TargetRegisterClass *NewRC = RI.getEquivalentVGPRClass(MRI.getRegClass(Dest0.getReg())); Register DestReg = MRI.createVirtualRegister(NewRC); @@ -5626,7 +5681,7 @@ void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist, Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned SubOp = ST.hasAddNoCarry() ? - AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32; + AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32; BuildMI(MBB, MII, DL, get(SubOp), TmpReg) .addImm(0) @@ -5855,7 +5910,7 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist, MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC); - unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; + unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64; MachineInstr *LoHalf = BuildMI(MBB, MII, DL, get(LoOpc), DestSub0) .addReg(CarryReg, RegState::Define) @@ -6716,7 +6771,7 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, Register UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC()); MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC()); - return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg) + return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_CO_U32_e64), DestReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead); } @@ -6737,7 +6792,7 @@ MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, if (!UnusedCarry.isValid()) return MachineInstrBuilder(); - return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg) + return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_CO_U32_e64), DestReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 7aee52f913605..72feff80ac81c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1115,7 +1115,7 @@ def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>; def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>; def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>; -def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>; +def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT", 0>>; def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>; def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 0c4c9e0e9df2b..c0a7116de772b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2213,7 +2213,7 @@ def : GCNPat< def : GCNPat< (add i32:$src0, (i32 NegSubInlineConst32:$src1)), - (V_SUB_I32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> { + (V_SUB_CO_U32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> { let SubtargetPredicate = NotHasAddNoCarryInsts; } diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 2eb1c52f1b595..110d82412c280 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1667,7 +1667,7 @@ Register SILoadStoreOptimizer::computeBase(MachineInstr &MI, Register DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); Register DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); MachineInstr *LoHalf = - BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0) + BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_CO_U32_e64), DestSub0) .addReg(CarryReg, RegState::Define) .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg) .add(OffsetLo) @@ -1730,7 +1730,7 @@ SILoadStoreOptimizer::extractConstOffset(const MachineOperand &Op) const { // Expecting base computation as: // %OFFSET0:sgpr_32 = S_MOV_B32 8000 // %LO:vgpr_32, %c:sreg_64_xexec = -// V_ADD_I32_e64 %BASE_LO:vgpr_32, %103:sgpr_32, +// V_ADD_CO_U32_e64 %BASE_LO:vgpr_32, %103:sgpr_32, // %HI:vgpr_32, = V_ADDC_U32_e64 %BASE_HI:vgpr_32, 0, killed %c:sreg_64_xexec // %Base:vreg_64 = // REG_SEQUENCE %LO:vgpr_32, %subreg.sub0, %HI:vgpr_32, %subreg.sub1 @@ -1752,7 +1752,7 @@ void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base MachineInstr *BaseLoDef = MRI->getUniqueVRegDef(BaseLo.getReg()); MachineInstr *BaseHiDef = MRI->getUniqueVRegDef(BaseHi.getReg()); - if (!BaseLoDef || BaseLoDef->getOpcode() != AMDGPU::V_ADD_I32_e64 || + if (!BaseLoDef || BaseLoDef->getOpcode() != AMDGPU::V_ADD_CO_U32_e64 || !BaseHiDef || BaseHiDef->getOpcode() != AMDGPU::V_ADDC_U32_e64) return; diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 7eb1ec941dbd7..8af00fcf62a82 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -168,6 +168,11 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB, And->getOperand(0).getReg()) .addReg(ExecReg) .addReg(CCReg, getUndefRegState(CC->isUndef()), CC->getSubReg()); + MachineOperand &AndSCC = And->getOperand(3); + assert(AndSCC.getReg() == AMDGPU::SCC); + MachineOperand &Andn2SCC = Andn2->getOperand(3); + assert(Andn2SCC.getReg() == AMDGPU::SCC); + Andn2SCC.setIsDead(AndSCC.isDead()); And->eraseFromParent(); LIS->InsertMachineInstrInMaps(*Andn2); diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 9a1855c3458be..87bacc5880ac8 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -10,11 +10,11 @@ /// /// E.g. original: /// V_LSHRREV_B32_e32 %0, 16, %1 -/// V_ADD_I32_e32 %2, %0, %3 +/// V_ADD_CO_U32_e32 %2, %0, %3 /// V_LSHLREV_B32_e32 %4, 16, %2 /// /// Replace: -/// V_ADD_I32_sdwa %4, %1, %3 +/// V_ADD_CO_U32_sdwa %4, %1, %3 /// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD /// //===----------------------------------------------------------------------===// @@ -863,19 +863,19 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) { } // Convert the V_ADDC_U32_e64 into V_ADDC_U32_e32, and -// V_ADD_I32_e64 into V_ADD_I32_e32. This allows isConvertibleToSDWA -// to perform its transformation on V_ADD_I32_e32 into V_ADD_I32_sdwa. +// V_ADD_CO_U32_e64 into V_ADD_CO_U32_e32. This allows isConvertibleToSDWA +// to perform its transformation on V_ADD_CO_U32_e32 into V_ADD_CO_U32_sdwa. // // We are transforming from a VOP3 into a VOP2 form of the instruction. // %19:vgpr_32 = V_AND_B32_e32 255, // killed %16:vgpr_32, implicit $exec -// %47:vgpr_32, %49:sreg_64_xexec = V_ADD_I32_e64 +// %47:vgpr_32, %49:sreg_64_xexec = V_ADD_CO_U32_e64 // %26.sub0:vreg_64, %19:vgpr_32, implicit $exec // %48:vgpr_32, dead %50:sreg_64_xexec = V_ADDC_U32_e64 // %26.sub1:vreg_64, %54:vgpr_32, killed %49:sreg_64_xexec, implicit $exec // // becomes -// %47:vgpr_32 = V_ADD_I32_sdwa +// %47:vgpr_32 = V_ADD_CO_U32_sdwa // 0, %26.sub0:vreg_64, 0, killed %16:vgpr_32, 0, 6, 0, 6, 0, // implicit-def $vcc, implicit $exec // %48:vgpr_32 = V_ADDC_U32_e32 @@ -883,8 +883,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) { void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI, const GCNSubtarget &ST) const { int Opc = MI.getOpcode(); - assert((Opc == AMDGPU::V_ADD_I32_e64 || Opc == AMDGPU::V_SUB_I32_e64) && - "Currently only handles V_ADD_I32_e64 or V_SUB_I32_e64"); + assert((Opc == AMDGPU::V_ADD_CO_U32_e64 || Opc == AMDGPU::V_SUB_CO_U32_e64) && + "Currently only handles V_ADD_CO_U32_e64 or V_SUB_CO_U32_e64"); // Can the candidate MI be shrunk? if (!TII->canShrink(MI, *MRI)) @@ -1235,8 +1235,8 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { const auto &Operand = OperandPair.second; MachineInstr *PotentialMI = Operand->potentialToConvert(TII); if (PotentialMI && - (PotentialMI->getOpcode() == AMDGPU::V_ADD_I32_e64 || - PotentialMI->getOpcode() == AMDGPU::V_SUB_I32_e64)) + (PotentialMI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 || + PotentialMI->getOpcode() == AMDGPU::V_SUB_CO_U32_e64)) pseudoOpConvertToVOP2(*PotentialMI, ST); } SDWAOperands.clear(); diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index 1bb66907f9ce4..c9bec366a54a2 100644 --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -54,14 +54,14 @@ char &llvm::SIPreEmitPeepholeID = SIPreEmitPeephole::ID; bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { // Match: - // sreg = -1 - // vcc = S_AND_B64 exec, sreg + // sreg = -1 or 0 + // vcc = S_AND_B64 exec, sreg or S_ANDN2_B64 exec, sreg // S_CBRANCH_VCC[N]Z // => // S_CBRANCH_EXEC[N]Z // We end up with this pattern sometimes after basic block placement. - // It happens while combining a block which assigns -1 to a saved mask and - // another block which consumes that saved mask and then a branch. + // It happens while combining a block which assigns -1 or 0 to a saved mask + // and another block which consumes that saved mask and then a branch. bool Changed = false; MachineBasicBlock &MBB = *MI.getParent(); const GCNSubtarget &ST = MBB.getParent()->getSubtarget(); @@ -69,6 +69,8 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { const unsigned CondReg = TRI->getVCC(); const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC; const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; + const unsigned AndN2 = IsWave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64; + const unsigned Mov = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(), E = MBB.rend(); @@ -80,7 +82,8 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { if (A->modifiesRegister(ExecReg, TRI)) return false; if (A->modifiesRegister(CondReg, TRI)) { - if (!A->definesRegister(CondReg, TRI) || A->getOpcode() != And) + if (!A->definesRegister(CondReg, TRI) || + (A->getOpcode() != And && A->getOpcode() != AndN2)) return false; break; } @@ -97,9 +100,10 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { } if (Op1.getReg() != ExecReg) return Changed; - if (Op2.isImm() && Op2.getImm() != -1) + if (Op2.isImm() && !(Op2.getImm() == -1 || Op2.getImm() == 0)) return Changed; + int64_t MaskValue = 0; Register SReg; if (Op2.isReg()) { SReg = Op2.getReg(); @@ -113,28 +117,86 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { ReadsSreg |= M->readsRegister(SReg, TRI); } if (M == E || !M->isMoveImmediate() || !M->getOperand(1).isImm() || - M->getOperand(1).getImm() != -1) + (M->getOperand(1).getImm() != -1 && M->getOperand(1).getImm() != 0)) return Changed; - // First if sreg is only used in and instruction fold the immediate - // into that and. + MaskValue = M->getOperand(1).getImm(); + // First if sreg is only used in the AND instruction fold the immediate + // into into the AND. if (!ReadsSreg && Op2.isKill()) { - A->getOperand(2).ChangeToImmediate(-1); + A->getOperand(2).ChangeToImmediate(MaskValue); M->eraseFromParent(); } + } else if (Op2.isImm()) { + MaskValue = Op2.getImm(); + } else { + llvm_unreachable("Op2 must be register or immediate"); } - if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) && - MI.killsRegister(CondReg, TRI)) + // Invert mask for s_andn2 + assert(MaskValue == 0 || MaskValue == -1); + if (A->getOpcode() == AndN2) + MaskValue = ~MaskValue; + + if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC)) { + if (!MI.killsRegister(CondReg, TRI)) { + // Replace AND with MOV + if (MaskValue == 0) { + BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg) + .addImm(0); + } else { + BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg) + .addReg(ExecReg); + } + } + // Remove AND instruction A->eraseFromParent(); + } bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ; if (SReg == ExecReg) { + // EXEC is updated directly if (IsVCCZ) { MI.eraseFromParent(); return true; } MI.setDesc(TII->get(AMDGPU::S_BRANCH)); - } else { + } else if (IsVCCZ && MaskValue == 0) { + // Will always branch + // Remove all succesors shadowed by new unconditional branch + MachineBasicBlock *Parent = MI.getParent(); + SmallVector ToRemove; + bool Found = false; + for (MachineInstr &Term : Parent->terminators()) { + if (Found) { + if (Term.isBranch()) + ToRemove.push_back(&Term); + } else { + Found = Term.isIdenticalTo(MI); + } + } + assert(Found && "conditional branch is not terminator"); + for (auto BranchMI : ToRemove) { + MachineOperand &Dst = BranchMI->getOperand(0); + assert(Dst.isMBB() && "destination is not basic block"); + Parent->removeSuccessor(Dst.getMBB()); + BranchMI->eraseFromParent(); + } + + if (MachineBasicBlock *Succ = Parent->getFallThrough()) { + Parent->removeSuccessor(Succ); + } + + // Rewrite to unconditional branch + MI.setDesc(TII->get(AMDGPU::S_BRANCH)); + } else if (!IsVCCZ && MaskValue == 0) { + // Will never branch + MachineOperand &Dst = MI.getOperand(0); + assert(Dst.isMBB() && "destination is not basic block"); + MI.getParent()->removeSuccessor(Dst.getMBB()); + MI.eraseFromParent(); + return true; + } else if (MaskValue == -1) { + // Depends only on EXEC MI.setDesc( TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ : AMDGPU::S_CBRANCH_EXECNZ)); } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 5d6009ebf3843..956658296a94a 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1356,7 +1356,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (!IsVOP2) MIB.addImm(0); // clamp bit } else { - assert(MIB->getOpcode() == AMDGPU::V_ADD_I32_e64 && + assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 && "Need to reuse carry out register"); // Use scavenged unused carry out as offset register. diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 53b7f7d3ca0a9..9c6833a7dab61 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -185,6 +185,11 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) { if (!MI.getOperand(0).isReg()) TII->commuteInstruction(MI, false, 0, 1); + // cmpk requires src0 to be a register + const MachineOperand &Src0 = MI.getOperand(0); + if (!Src0.isReg()) + return; + const MachineOperand &Src1 = MI.getOperand(1); if (!Src1.isImm()) return; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 9d7b25d552170..d60fa58a0a74e 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -288,13 +288,11 @@ def S_MOVRELD_B64 : SOP1_64_movreld <"s_movreld_b64">; let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in { def S_CBRANCH_JOIN : SOP1_0_32R <"s_cbranch_join">; -def S_MOV_REGRD_B32 : SOP1_32 <"s_mov_regrd_b32">; } // End SubtargetPredicate = isGFX6GFX7GFX8GFX9 let Defs = [SCC] in { def S_ABS_I32 : SOP1_32 <"s_abs_i32">; } // End Defs = [SCC] -def S_MOV_FED_B32 : SOP1_32 <"s_mov_fed_b32">; let SubtargetPredicate = HasVGPRIndexMode in { def S_SET_GPR_IDX_IDX : SOP1_0_32<"s_set_gpr_idx_idx"> { @@ -1381,7 +1379,6 @@ multiclass SOP1_Real_gfx6_gfx7_gfx10 op> : SOP1_Real_gfx6_gfx7, SOP1_Real_gfx10; defm S_CBRANCH_JOIN : SOP1_Real_gfx6_gfx7<0x032>; -defm S_MOV_REGRD_B32 : SOP1_Real_gfx6_gfx7<0x033>; defm S_MOV_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x003>; defm S_MOV_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x004>; @@ -1430,7 +1427,6 @@ defm S_MOVRELS_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x02f>; defm S_MOVRELD_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x030>; defm S_MOVRELD_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x031>; defm S_ABS_I32 : SOP1_Real_gfx6_gfx7_gfx10<0x034>; -defm S_MOV_FED_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x035>; //===----------------------------------------------------------------------===// // SOP2 - GFX10. @@ -1643,9 +1639,7 @@ def S_MOVRELS_B64_vi : SOP1_Real_vi <0x2b, S_MOVRELS_B64>; def S_MOVRELD_B32_vi : SOP1_Real_vi <0x2c, S_MOVRELD_B32>; def S_MOVRELD_B64_vi : SOP1_Real_vi <0x2d, S_MOVRELD_B64>; def S_CBRANCH_JOIN_vi : SOP1_Real_vi <0x2e, S_CBRANCH_JOIN>; -def S_MOV_REGRD_B32_vi : SOP1_Real_vi <0x2f, S_MOV_REGRD_B32>; def S_ABS_I32_vi : SOP1_Real_vi <0x30, S_ABS_I32>; -def S_MOV_FED_B32_vi : SOP1_Real_vi <0x31, S_MOV_FED_B32>; def S_SET_GPR_IDX_IDX_vi : SOP1_Real_vi <0x32, S_SET_GPR_IDX_IDX>; def S_ADD_U32_vi : SOP2_Real_vi <0x00, S_ADD_U32>; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 00e6d517bde58..b89e34e4c99c1 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -783,6 +783,23 @@ void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) } // namespace Hwreg +//===----------------------------------------------------------------------===// +// MTBUF Format +//===----------------------------------------------------------------------===// + +namespace MTBUFFormat { + +int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) { + return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT); +} + +void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { + Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK; + Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; +} + +} // namespace MTBUFFormat + //===----------------------------------------------------------------------===// // SendMsg //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index e71554575f6af..a9ea05755a676 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -482,6 +482,15 @@ void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); } // namespace Hwreg +namespace MTBUFFormat { + +LLVM_READNONE +int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt); + +void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt); + +} // namespace MTBUFFormat + namespace SendMsg { LLVM_READONLY diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 17f334f62a30b..11ec2953e1e84 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -338,8 +338,6 @@ defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>; defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>; } // End Uses = [M0, EXEC] -defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>; - let SubtargetPredicate = isGFX6GFX7 in { let SchedRW = [WriteTrans32] in { defm V_LOG_CLAMP_F32 : @@ -650,7 +648,6 @@ defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x005>; defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x006>; defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x007>; defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x008>; -defm V_MOV_FED_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x009>; defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; @@ -754,7 +751,6 @@ defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; -defm V_MOV_FED_B32 : VOP1_Real_vi <0x9>; defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index aa37dbf1418f9..55b64c4e614c4 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -240,12 +240,16 @@ multiclass VOP2eInst : +class VOP2eInstAlias : InstAlias , - PredicateControl { -} + ps.Pfl.Src1RC32:$src1)>, PredicateControl; + +class VOP2e64InstAlias : + InstAlias , + PredicateControl; multiclass VOP2eInstAliases { let WaveSizePredicate = isWave32 in { @@ -502,12 +506,9 @@ def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; // No patterns so that the scalar instructions are always selected. // The scalar versions will be replaced with vector when needed later. - -// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI, -// but the VI instructions behave the same as the SI versions. -defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_i32", 1>; -defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>; -defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>; +defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32", 1>; +defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; +defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>; defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; @@ -595,8 +596,8 @@ let SubtargetPredicate = HasAddNoCarryInsts in { } let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in { -def : DivergentClampingBinOp; -def : DivergentClampingBinOp; +def : DivergentClampingBinOp; +def : DivergentClampingBinOp; } def : DivergentBinOp; @@ -1090,13 +1091,10 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } //===---------------------------- VOP3beOnly ----------------------------===// - multiclass VOP3beOnly_Real_gfx10 op, string opName, string asmName> { + multiclass VOP3beOnly_Real_gfx10 op> { def _e64_gfx10 : - VOP3_Real(opName#"_e64"), SIEncodingFamily.GFX10>, - VOP3be_gfx10(opName#"_e64").Pfl> { - VOP3_Pseudo Ps = !cast(opName#"_e64"); - let AsmString = asmName # Ps.AsmOperands; - } + VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX10>, + VOP3be_gfx10(NAME#"_e64").Pfl>; } } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" @@ -1172,13 +1170,10 @@ defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>; defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>; defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>; -// VOP3 carry-in, carry-out. -defm V_ADD_CO_U32 : - VOP3beOnly_Real_gfx10<0x30f, "V_ADD_I32", "v_add_co_u32">; -defm V_SUB_CO_U32 : - VOP3beOnly_Real_gfx10<0x310, "V_SUB_I32", "v_sub_co_u32">; -defm V_SUBREV_CO_U32 : - VOP3beOnly_Real_gfx10<0x319, "V_SUBREV_I32", "v_subrev_co_u32">; +// VOP3 carry-out. +defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>; +defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>; +defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>; let SubtargetPredicate = isGFX10Plus in { defm : VOP2eInstAliases; @@ -1217,20 +1212,20 @@ let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { VOP2_Real(NAME), SIEncodingFamily.SI>, VOP2_MADKe(NAME).Pfl>; } - multiclass VOP2_Real_e32_gfx6_gfx7 op> { + multiclass VOP2_Real_e32_gfx6_gfx7 op, string PseudoName = NAME> { def _e32_gfx6_gfx7 : - VOP2_Real(NAME#"_e32"), SIEncodingFamily.SI>, - VOP2e(NAME#"_e32").Pfl>; + VOP2_Real(PseudoName#"_e32"), SIEncodingFamily.SI>, + VOP2e(PseudoName#"_e32").Pfl>; } - multiclass VOP2_Real_e64_gfx6_gfx7 op> { + multiclass VOP2_Real_e64_gfx6_gfx7 op, string PseudoName = NAME> { def _e64_gfx6_gfx7 : - VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, - VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>; + VOP3_Real(PseudoName#"_e64"), SIEncodingFamily.SI>, + VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast(PseudoName#"_e64").Pfl>; } - multiclass VOP2be_Real_e64_gfx6_gfx7 op> { + multiclass VOP2be_Real_e64_gfx6_gfx7 op, string PseudoName = NAME> { def _e64_gfx6_gfx7 : - VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, - VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>; + VOP3_Real(PseudoName#"_e64"), SIEncodingFamily.SI>, + VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast(PseudoName#"_e64").Pfl>; } } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" @@ -1246,6 +1241,20 @@ multiclass VOP2_Real_gfx6_gfx7_gfx10 op> : multiclass VOP2be_Real_gfx6_gfx7 op> : VOP2_Real_e32_gfx6_gfx7, VOP2be_Real_e64_gfx6_gfx7; +multiclass VOP2be_Real_gfx6_gfx7_with_name op, + string PseudoName, string asmName> { + defvar ps32 = !cast(PseudoName#"_e32"); + defvar ps64 = !cast(PseudoName#"_e64"); + + let AsmString = asmName # ps32.AsmOperands in { + defm "" : VOP2_Real_e32_gfx6_gfx7; + } + + let AsmString = asmName # ps64.AsmOperands in { + defm "" : VOP2be_Real_e64_gfx6_gfx7; + } +} + defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>; defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>; defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>; @@ -1262,9 +1271,12 @@ defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>; defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>; defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>; defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>; -defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7<0x025>; -defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7<0x026>; -defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7<0x027>; + +// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in +// VI, but the VI instructions behave the same as the SI versions. +defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x025, "V_ADD_CO_U32", "v_add_i32">; +defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x026, "V_SUB_CO_U32", "v_sub_i32">; +defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x027, "V_SUBREV_CO_U32", "v_subrev_i32">; defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>; defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>; defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>; @@ -1277,6 +1289,13 @@ let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) let SubtargetPredicate = isGFX6GFX7 in { defm : VOP2eInstAliases; + defm : VOP2eInstAliases; + defm : VOP2eInstAliases; + defm : VOP2eInstAliases; + + def : VOP2e64InstAlias; + def : VOP2e64InstAlias; + def : VOP2e64InstAlias; } // End SubtargetPredicate = isGFX6GFX7 defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x003>; @@ -1490,16 +1509,16 @@ defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; -defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_I32", "v_add_u32">; -defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_I32", "v_sub_u32">; -defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_I32", "v_subrev_u32">; +defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_CO_U32", "v_add_u32">; +defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_CO_U32", "v_sub_u32">; +defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_CO_U32", "v_subrev_u32">; defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">; defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">; defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">; -defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_I32", "v_add_co_u32">; -defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_I32", "v_sub_co_u32">; -defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_I32", "v_subrev_co_u32">; +defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32", "v_add_co_u32">; +defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32", "v_sub_co_u32">; +defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32", "v_subrev_co_u32">; defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">; defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">; defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">; @@ -1568,11 +1587,11 @@ defm : VOP2eInstAliases; let SubtargetPredicate = isGFX9Only in { -defm : VOP2bInstAliases; +defm : VOP2bInstAliases; defm : VOP2bInstAliases; -defm : VOP2bInstAliases; +defm : VOP2bInstAliases; defm : VOP2bInstAliases; -defm : VOP2bInstAliases; +defm : VOP2bInstAliases; defm : VOP2bInstAliases; } // End SubtargetPredicate = isGFX9Only diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 169949f2171ae..dcbfeb547a32d 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -649,8 +649,8 @@ def V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile>; def V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile>; -def V_ADD_I32_gfx9 : VOP3Inst <"v_add_i32_gfx9", VOP3_Profile>; -def V_SUB_I32_gfx9 : VOP3Inst <"v_sub_i32_gfx9", VOP3_Profile>; +def V_ADD_I32 : VOP3Inst <"v_add_i32", VOP3_Profile>; +def V_SUB_I32 : VOP3Inst <"v_sub_i32", VOP3_Profile>; class ThreeOp_i32_Pats : GCNPat < @@ -868,9 +868,9 @@ defm V_ADD_NC_I16 : defm V_SUB_NC_I16 : VOP3OpSel_Real_gfx10_with_name<0x30e, "V_SUB_I16", "v_sub_nc_i16">; defm V_SUB_NC_I32 : - VOP3_Real_gfx10_with_name<0x376, "V_SUB_I32_gfx9", "v_sub_nc_i32">; + VOP3_Real_gfx10_with_name<0x376, "V_SUB_I32", "v_sub_nc_i32">; defm V_ADD_NC_I32 : - VOP3_Real_gfx10_with_name<0x37f, "V_ADD_I32_gfx9", "v_add_nc_i32">; + VOP3_Real_gfx10_with_name<0x37f, "V_ADD_I32", "v_add_nc_i32">; defm V_INTERP_P1_F32_e64 : VOP3Interp_Real_gfx10<0x200>; defm V_INTERP_P2_F32_e64 : VOP3Interp_Real_gfx10<0x201>; @@ -1177,8 +1177,8 @@ defm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">; defm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">; defm V_INTERP_P2_F16_gfx9 : VOP3Interp_F16_Real_gfx9 <0x277, "V_INTERP_P2_F16_gfx9", "v_interp_p2_f16">; -defm V_ADD_I32_gfx9 : VOP3_Real_gfx9 <0x29c, "v_add_i32">; -defm V_SUB_I32_gfx9 : VOP3_Real_gfx9 <0x29d, "v_sub_i32">; +defm V_ADD_I32 : VOP3_Real_vi <0x29c>; +defm V_SUB_I32 : VOP3_Real_vi <0x29d>; defm V_INTERP_P1_F32_e64 : VOP3Interp_Real_vi <0x270>; defm V_INTERP_P2_F32_e64 : VOP3Interp_Real_vi <0x271>; diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp index d860473011e77..4f1410eecff4f 100644 --- a/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -435,7 +435,7 @@ bool ARMCallLowering::lowerFormalArguments( for (auto &Arg : F.args()) { if (!isSupportedType(DL, TLI, Arg.getType())) return false; - if (Arg.hasPassPointeeByValueAttr()) + if (Arg.hasPassPointeeByValueCopyAttr()) return false; } diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 48622aae3cb41..ec72c2b5ac194 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1860,6 +1860,66 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, default: return false; + case ARM::VBSPd: + case ARM::VBSPq: { + Register DstReg = MI.getOperand(0).getReg(); + if (DstReg == MI.getOperand(3).getReg()) { + // Expand to VBIT + unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq; + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) + .add(MI.getOperand(0)) + .add(MI.getOperand(3)) + .add(MI.getOperand(2)) + .add(MI.getOperand(1)) + .addImm(MI.getOperand(4).getImm()) + .add(MI.getOperand(5)); + } else if (DstReg == MI.getOperand(2).getReg()) { + // Expand to VBIF + unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq; + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) + .add(MI.getOperand(0)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(1)) + .addImm(MI.getOperand(4).getImm()) + .add(MI.getOperand(5)); + } else { + // Expand to VBSL + unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq; + if (DstReg == MI.getOperand(1).getReg()) { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .addImm(MI.getOperand(4).getImm()) + .add(MI.getOperand(5)); + } else { + // Use move to satisfy constraints + unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq; + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc)) + .addReg(DstReg, + RegState::Define | + getRenamableRegState(MI.getOperand(0).isRenamable())) + .add(MI.getOperand(1)) + .add(MI.getOperand(1)) + .addImm(MI.getOperand(4).getImm()) + .add(MI.getOperand(5)); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) + .add(MI.getOperand(0)) + .addReg(DstReg, + RegState::Kill | + getRenamableRegState(MI.getOperand(0).isRenamable())) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .addImm(MI.getOperand(4).getImm()) + .add(MI.getOperand(5)); + } + } + MI.eraseFromParent(); + return true; + } + case ARM::TCRETURNdi: case ARM::TCRETURNri: { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 287e2e60e572c..7c2798b0a4ba2 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1755,7 +1755,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; - case ARMISD::VBSL: return "ARMISD::VBSL"; + case ARMISD::VBSP: return "ARMISD::VBSP"; case ARMISD::MEMCPY: return "ARMISD::MEMCPY"; case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP"; case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; @@ -13153,7 +13153,7 @@ static SDValue PerformORCombine(SDNode *N, // Canonicalize the vector type to make instruction selection // simpler. EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; - SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, + SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT, N0->getOperand(1), N0->getOperand(0), N1->getOperand(0)); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 8b1f4183032eb..f4d77d4ff70f5 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -271,8 +271,8 @@ class VectorType; // Vector AND with NOT of immediate VBICIMM, - // Vector bitwise select - VBSL, + // Pseudo vector bitwise select + VBSP, // Pseudo-instruction representing a memory copy using ldm/stm // instructions. diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 2a1f50d97e3b3..d6e725084d7f9 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -5605,7 +5605,6 @@ def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>; def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>; def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>; -let hasSideEffects = 1 in class MVE_VCTPInst size, list pattern=[]> : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix, "$Rn", vpred_n, "", pattern> { diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 1b3f6075c0e9d..c097a4ad4facf 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -509,7 +509,7 @@ def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>; def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>; def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>; -def NEONvbsl : SDNode<"ARMISD::VBSL", +def NEONvbsp : SDNode<"ARMISD::VBSP", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, @@ -4526,9 +4526,9 @@ let Predicates = [HasNEON, HasV8_1a] in { (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), - (v4i32 (int_arm_neon_vqrdmulh + (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src2), - (v4i32 (ARMvduplane (v4i32 QPR:$src3), + (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane)))))), (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), @@ -4579,17 +4579,17 @@ let Predicates = [HasNEON, HasV8_1a] in { (v2i32 DPR:$Vn), (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), imm:$lane)))))), - (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, + (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane))>; def : Pat<(v8i16 (ssubsat (v8i16 QPR:$src1), (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src2), - (v8i16 (ARMvduplane (v8i16 QPR:$src3), + (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane)))))), (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), - (v4i16 (EXTRACT_SUBREG + (v4i16 (EXTRACT_SUBREG QPR:$src3, (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; @@ -4601,7 +4601,7 @@ let Predicates = [HasNEON, HasV8_1a] in { imm:$lane)))))), (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), - (v2i32 (EXTRACT_SUBREG + (v2i32 (EXTRACT_SUBREG QPR:$src3, (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; @@ -5442,74 +5442,86 @@ def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; } -// VBSL : Vector Bitwise Select -def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), - (ins DPR:$src1, DPR:$Vn, DPR:$Vm), - N3RegFrm, IIC_VCNTiD, - "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [(set DPR:$Vd, - (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; +// The TwoAddress pass will not go looking for equivalent operations +// with different register constraints; it just inserts copies. +// That is why pseudo VBSP implemented. Is is expanded later into +// VBIT/VBIF/VBSL taking into account register constraints to avoid copies. +def VBSPd + : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), + IIC_VBINiD, "", + [(set DPR:$Vd, + (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; let Predicates = [HasNEON] in { def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), - (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; + (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), - (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; + (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), - (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; + (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), - (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; + (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), - (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; + (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), (and DPR:$Vm, (vnotd DPR:$Vd)))), - (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; + (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), (and DPR:$Vm, (vnotd DPR:$Vd)))), - (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; + (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; } -def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), - (ins QPR:$src1, QPR:$Vn, QPR:$Vm), - N3RegFrm, IIC_VCNTiQ, - "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [(set QPR:$Vd, - (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; - +def VBSPq + : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), + IIC_VBINiQ, "", + [(set QPR:$Vd, + (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; let Predicates = [HasNEON] in { def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), - (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; + (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), - (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; + (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), - (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; + (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), - (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; + (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), - (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; + (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), (and QPR:$Vm, (vnotq QPR:$Vd)))), - (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; + (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), (and QPR:$Vm, (vnotq QPR:$Vd)))), - (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; + (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; } +// VBSL : Vector Bitwise Select +def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), + (ins DPR:$src1, DPR:$Vn, DPR:$Vm), + N3RegFrm, IIC_VBINiD, + "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", + []>; + +def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), + (ins QPR:$src1, QPR:$Vn, QPR:$Vm), + N3RegFrm, IIC_VBINiQ, + "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", + []>; + // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", -// FIXME: This instruction's encoding MAY NOT BE correct. def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, @@ -5523,7 +5535,6 @@ def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, // VBIT : Vector Bitwise Insert if True // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", -// FIXME: This instruction's encoding MAY NOT BE correct. def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, @@ -5535,10 +5546,6 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", []>; -// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking -// for equivalent operations with different register constraints; it just -// inserts copies. - // Vector Absolute Differences. // VABD : Vector Absolute Difference @@ -7953,7 +7960,7 @@ let Predicates = [HasNEON,IsLE] in { (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; } -// The following patterns are basically a copy of the patterns above, +// The following patterns are basically a copy of the patterns above, // however with an additional VREV16d instruction to convert data // loaded by VLD1LN into proper vector format in big endian mode. let Predicates = [HasNEON,IsBE] in { diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 7137e8ee66b8f..d5143adaac179 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -5446,6 +5446,7 @@ class CS opcode, list pattern=[]> let Inst{3-0} = Rm{3-0}; let Uses = [CPSR]; + let hasSideEffects = 0; } def t2CSEL : CS<"csel", 0b1000>; diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td index d9a8d304c41fd..a52a2db3a0d63 100644 --- a/llvm/lib/Target/ARM/ARMScheduleA57.td +++ b/llvm/lib/Target/ARM/ARMScheduleA57.td @@ -1201,7 +1201,7 @@ def : InstRW<[A57Write_5cyc_1V], (instregex // --- 3.16 ASIMD Miscellaneous Instructions --- // ASIMD bitwise insert -def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL")>; +def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL", "VBSP")>; // ASIMD count def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>; diff --git a/llvm/lib/Target/ARM/ARMScheduleR52.td b/llvm/lib/Target/ARM/ARMScheduleR52.td index d1cbf754b5a1b..466acec6f76ae 100644 --- a/llvm/lib/Target/ARM/ARMScheduleR52.td +++ b/llvm/lib/Target/ARM/ARMScheduleR52.td @@ -787,8 +787,8 @@ def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>; def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>; -def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>; -def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>; +def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)d")>; +def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)q")>; def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>; diff --git a/llvm/lib/Target/ARM/ARMScheduleSwift.td b/llvm/lib/Target/ARM/ARMScheduleSwift.td index e0e98bfa0e9b2..d66b3065c7b74 100644 --- a/llvm/lib/Target/ARM/ARMScheduleSwift.td +++ b/llvm/lib/Target/ARM/ARMScheduleSwift.td @@ -558,8 +558,8 @@ let SchedModel = SwiftModel in { (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL", "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi", "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST", - "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL(s|u)", "VBIF", - "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>; + "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL(s|u)", "VBIF", "VBIT", + "VBSL", "VBSP", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>; def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VEXT", "VREV16", "VREV32", "VREV64")>; diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 9ead5fa4308c3..b316b1041f2c5 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -409,7 +409,7 @@ void ARMPassConfig::addIRPasses() { // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) addPass(createCFGSimplificationPass( - 1, false, false, true, true, [this](const Function &F) { + SimplifyCFGOptions().sinkCommonInsts(true), [this](const Function &F) { const auto &ST = this->TM->getSubtarget(F); return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); })); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 74b1331216a05..bea4e157a1316 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include #include #include @@ -45,7 +46,7 @@ static cl::opt DisableLowOverheadLoops( "disable-arm-loloops", cl::Hidden, cl::init(false), cl::desc("Disable the generation of low-overhead loops")); -extern cl::opt DisableTailPredication; +extern cl::opt EnableTailPredication; extern cl::opt EnableMaskedGatherScatters; @@ -1405,12 +1406,47 @@ static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount) { static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE, const DataLayout &DL, const LoopAccessInfo *LAI) { + LLVM_DEBUG(dbgs() << "Tail-predication: checking allowed instructions\n"); + + // If there are live-out values, it is probably a reduction, which needs a + // final reduction step after the loop. MVE has a VADDV instruction to reduce + // integer vectors, but doesn't have an equivalent one for float vectors. A + // live-out value that is not recognised as a reduction will result in the + // tail-predicated loop to be reverted to a non-predicated loop and this is + // very expensive, i.e. it has a significant performance impact. So, in this + // case it's better not to tail-predicate the loop, which is what we check + // here. Thus, we allow only 1 live-out value, which has to be an integer + // reduction, which matches the loops supported by ARMLowOverheadLoops. + // It is important to keep ARMLowOverheadLoops and canTailPredicateLoop in + // sync with each other. + SmallVector< Instruction *, 8 > LiveOuts; + LiveOuts = llvm::findDefsUsedOutsideOfLoop(L); + bool IntReductionsDisabled = + EnableTailPredication == TailPredication::EnabledNoReductions || + EnableTailPredication == TailPredication::ForceEnabledNoReductions; + + for (auto *I : LiveOuts) { + if (!I->getType()->isIntegerTy()) { + LLVM_DEBUG(dbgs() << "Don't tail-predicate loop with non-integer " + "live-out value\n"); + return false; + } + if (I->getOpcode() != Instruction::Add) { + LLVM_DEBUG(dbgs() << "Only add reductions supported\n"); + return false; + } + if (IntReductionsDisabled) { + LLVM_DEBUG(dbgs() << "Integer add reductions not enabled\n"); + return false; + } + } + + // Next, check that all instructions can be tail-predicated. PredicatedScalarEvolution PSE = LAI->getPSE(); + SmallVector LoadStores; int ICmpCount = 0; int Stride = 0; - LLVM_DEBUG(dbgs() << "tail-predication: checking allowed instructions\n"); - SmallVector LoadStores; for (BasicBlock *BB : L->blocks()) { for (Instruction &I : BB->instructionsWithoutDebug()) { if (isa(&I)) @@ -1458,8 +1494,10 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) { - if (DisableTailPredication) + if (!EnableTailPredication) { + LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n"); return false; + } // Creating a predicated vector loop is the first step for generating a // tail-predicated hardware loop, for which we need the MVE masked @@ -1501,7 +1539,7 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, } bool ARMTTIImpl::emitGetActiveLaneMask() const { - if (!ST->hasMVEIntegerOps() || DisableTailPredication) + if (!ST->hasMVEIntegerOps() || !EnableTailPredication) return false; // Intrinsic @llvm.get.active.lane.mask is supported. diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 537a546361eeb..7bf6de4bffe07 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -38,6 +38,16 @@ class ScalarEvolution; class Type; class Value; +namespace TailPredication { + enum Mode { + Disabled = 0, + EnabledNoReductions, + Enabled, + ForceEnabledNoReductions, + ForceEnabled + }; +} + class ARMTTIImpl : public BasicTTIImplBase { using BaseT = BasicTTIImplBase; using TTI = TargetTransformInfo; diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 9ad595c016c4c..b02aef3c338b8 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -1010,6 +1010,7 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) { case ARM::fixup_t2_condbranch: case ARM::fixup_t2_uncondbranch: case ARM::fixup_t2_pcrel_10: + case ARM::fixup_t2_pcrel_9: case ARM::fixup_t2_adr_pcrel_12: case ARM::fixup_arm_thumb_bl: case ARM::fixup_arm_thumb_blx: diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp index 6583dcb77e1ed..5bf3522ab2e64 100644 --- a/llvm/lib/Target/ARM/MVETailPredication.cpp +++ b/llvm/lib/Target/ARM/MVETailPredication.cpp @@ -42,6 +42,7 @@ #include "ARM.h" #include "ARMSubtarget.h" +#include "ARMTargetTransformInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -64,16 +65,27 @@ using namespace llvm; #define DEBUG_TYPE "mve-tail-predication" #define DESC "Transform predicated vector loops to use MVE tail predication" -static cl::opt -ForceTailPredication("force-mve-tail-predication", cl::Hidden, cl::init(false), - cl::desc("Force MVE tail-predication even if it might be " - "unsafe (e.g. possible overflow in loop " - "counters)")); +cl::opt EnableTailPredication( + "tail-predication", cl::desc("MVE tail-predication options"), + cl::init(TailPredication::Disabled), + cl::values(clEnumValN(TailPredication::Disabled, "disabled", + "Don't tail-predicate loops"), + clEnumValN(TailPredication::EnabledNoReductions, + "enabled-no-reductions", + "Enable tail-predication, but not for reduction loops"), + clEnumValN(TailPredication::Enabled, + "enabled", + "Enable tail-predication, including reduction loops"), + clEnumValN(TailPredication::ForceEnabledNoReductions, + "force-enabled-no-reductions", + "Enable tail-predication, but not for reduction loops, " + "and force this which might be unsafe"), + clEnumValN(TailPredication::ForceEnabled, + "force-enabled", + "Enable tail-predication, including reduction loops, " + "and force this which might be unsafe"))); + -cl::opt -DisableTailPredication("disable-mve-tail-predication", cl::Hidden, - cl::init(true), - cl::desc("Disable MVE Tail Predication")); namespace { class MVETailPredication : public LoopPass { @@ -146,7 +158,7 @@ static bool IsMasked(Instruction *I) { } bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) { - if (skipLoop(L) || DisableTailPredication) + if (skipLoop(L) || !EnableTailPredication) return false; MaskedInsts.clear(); @@ -346,6 +358,9 @@ static void Cleanup(SetVector &MaybeDead, Loop *L) { // vector width. bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount, FixedVectorType *VecTy) { + bool ForceTailPredication = + EnableTailPredication == TailPredication::ForceEnabledNoReductions || + EnableTailPredication == TailPredication::ForceEnabled; // 1) Test whether entry to the loop is protected by a conditional // BTC + 1 < 0. In other words, if the scalar trip count overflows, // becomes negative, we shouldn't enter the loop and creating diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 48c6b47f21545..43942316b80e6 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -12,6 +12,7 @@ #include "Thumb2InstrInfo.h" #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -38,6 +39,11 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden, cl::desc("Use old-style Thumb2 if-conversion heuristics"), cl::init(false)); +static cl::opt +PreferNoCSEL("prefer-no-csel", cl::Hidden, + cl::desc("Prefer predicated Move to CSEL"), + cl::init(false)); + Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI) {} @@ -118,6 +124,31 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, return getITInstrPredicate(*MBBI, PredReg) == ARMCC::AL; } +MachineInstr * +Thumb2InstrInfo::optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl &SeenMIs, + bool PreferFalse) const { + // Try to use the base optimizeSelect, which uses canFoldIntoMOVCC to fold the + // MOVCC into another instruction. If that fails on 8.1-M fall back to using a + // CSEL. + MachineInstr *RV = ARMBaseInstrInfo::optimizeSelect(MI, SeenMIs, PreferFalse); + if (!RV && getSubtarget().hasV8_1MMainlineOps() && !PreferNoCSEL) { + Register DestReg = MI.getOperand(0).getReg(); + + if (!DestReg.isVirtual()) + return nullptr; + + MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), + get(ARM::t2CSEL), DestReg) + .add(MI.getOperand(2)) + .add(MI.getOperand(1)) + .add(MI.getOperand(3)); + SeenMIs.insert(NewMI); + return NewMI; + } + return RV; +} + void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index ec37636322398..e31c49a38959f 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -60,6 +60,10 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo { /// const ThumbRegisterInfo &getRegisterInfo() const override { return RI; } + MachineInstr *optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl &SeenMIs, + bool) const override; + private: void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; }; diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp index a2bc1d050fbba..c95a553b86acf 100644 --- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -405,7 +405,7 @@ struct AVRFrameAnalyzer : public MachineFunctionPass { static char ID; AVRFrameAnalyzer() : MachineFunctionPass(ID) {} - bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { const MachineFrameInfo &MFI = MF.getFrameInfo(); AVRMachineFunctionInfo *FuncInfo = MF.getInfo(); @@ -457,7 +457,7 @@ struct AVRFrameAnalyzer : public MachineFunctionPass { return false; } - StringRef getPassName() const { return "AVR Frame Analyzer"; } + StringRef getPassName() const override { return "AVR Frame Analyzer"; } }; char AVRFrameAnalyzer::ID = 0; @@ -473,7 +473,7 @@ struct AVRDynAllocaSR : public MachineFunctionPass { static char ID; AVRDynAllocaSR() : MachineFunctionPass(ID) {} - bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { // Early exit when there are no variable sized objects in the function. if (!MF.getFrameInfo().hasVarSizedObjects()) { return false; @@ -506,7 +506,7 @@ struct AVRDynAllocaSR : public MachineFunctionPass { return true; } - StringRef getPassName() const { + StringRef getPassName() const override { return "AVR dynalloca stack pointer save/restore"; } }; diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index a7b6d03b3b3d5..230bc7adc07ab 100644 --- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -178,10 +178,10 @@ class AVROperand : public MCParsedAsmOperand { return isUInt<8>(Value); } - bool isReg() const { return Kind == k_Register; } - bool isImm() const { return Kind == k_Immediate; } - bool isToken() const { return Kind == k_Token; } - bool isMem() const { return Kind == k_Memri; } + bool isReg() const override { return Kind == k_Register; } + bool isImm() const override { return Kind == k_Immediate; } + bool isToken() const override { return Kind == k_Token; } + bool isMem() const override { return Kind == k_Memri; } bool isMemri() const { return Kind == k_Memri; } StringRef getToken() const { @@ -189,7 +189,7 @@ class AVROperand : public MCParsedAsmOperand { return Tok; } - unsigned getReg() const { + unsigned getReg() const override { assert((Kind == k_Register || Kind == k_Memri) && "Invalid access!"); return RegImm.Reg; @@ -239,10 +239,10 @@ class AVROperand : public MCParsedAsmOperand { RegImm = {RegNo, Imm}; } - SMLoc getStartLoc() const { return Start; } - SMLoc getEndLoc() const { return End; } + SMLoc getStartLoc() const override { return Start; } + SMLoc getEndLoc() const override { return End; } - virtual void print(raw_ostream &O) const { + void print(raw_ostream &O) const override { switch (Kind) { case k_Token: O << "Token: \"" << getToken() << "\""; diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp index 815a309a8caef..42fac5e2e000e 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp @@ -131,7 +131,7 @@ void AVRInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << getPrettyRegisterName(Op.getReg(), MRI); } } else if (Op.isImm()) { - O << Op.getImm(); + O << formatImm(Op.getImm()); } else { assert(Op.isExpr() && "Unknown operand kind in printOperand"); O << *Op.getExpr(); diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index 6ada75adba969..13999d800a800 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/LineIterator.h" +#include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; @@ -664,7 +665,17 @@ void BTFDebug::visitMapDefType(const DIType *Ty, uint32_t &TypeId) { return; } - // MapDef type is a struct type + // MapDef type may be a struct type or a non-pointer derived type + const DIType *OrigTy = Ty; + while (auto *DTy = dyn_cast(Ty)) { + auto Tag = DTy->getTag(); + if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type && + Tag != dwarf::DW_TAG_volatile_type && + Tag != dwarf::DW_TAG_restrict_type) + break; + Ty = DTy->getBaseType(); + } + const auto *CTy = dyn_cast(Ty); if (!CTy) return; @@ -673,27 +684,15 @@ void BTFDebug::visitMapDefType(const DIType *Ty, uint32_t &TypeId) { if (Tag != dwarf::DW_TAG_structure_type || CTy->isForwardDecl()) return; - // Record this type + // Visit all struct members to ensure pointee type is visited const DINodeArray Elements = CTy->getElements(); - bool HasBitField = false; - for (const auto *Element : Elements) { - auto E = cast(Element); - if (E->isBitField()) { - HasBitField = true; - break; - } - } - - auto TypeEntry = - std::make_unique(CTy, true, HasBitField, Elements.size()); - StructTypes.push_back(TypeEntry.get()); - TypeId = addType(std::move(TypeEntry), CTy); - - // Visit all struct members for (const auto *Element : Elements) { const auto *MemberType = cast(Element); visitTypeEntry(MemberType->getBaseType()); } + + // Visit this type, struct or a const/typedef/volatile/restrict type + visitTypeEntry(OrigTy, TypeId, false, false); } /// Read file contents from the actual file or from the source @@ -1127,6 +1126,20 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) { if (ProcessingMapDef != SecName.startswith(".maps")) continue; + // Create a .rodata datasec if the global variable is an initialized + // constant with private linkage and if it won't be in .rodata.str<#> + // and .rodata.cst<#> sections. + if (SecName == ".rodata" && Global.hasPrivateLinkage() && + DataSecEntries.find(std::string(SecName)) == DataSecEntries.end()) { + SectionKind GVKind = + TargetLoweringObjectFile::getKindForGlobal(&Global, Asm->TM); + // skip .rodata.str<#> and .rodata.cst<#> sections + if (!GVKind.isMergeableCString() && !GVKind.isMergeableConst()) { + DataSecEntries[std::string(SecName)] = + std::make_unique(Asm, std::string(SecName)); + } + } + SmallVector GVs; Global.getDebugInfo(GVs); diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h index 8ccd53eb3106d..2f39f665299a5 100644 --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -63,8 +63,8 @@ class BTFTypeDerived : public BTFTypeBase { public: BTFTypeDerived(const DIDerivedType *Ty, unsigned Tag, bool NeedsFixup); - void completeType(BTFDebug &BDebug); - void emitType(MCStreamer &OS); + void completeType(BTFDebug &BDebug) override; + void emitType(MCStreamer &OS) override; void setPointeeType(uint32_t PointeeType); }; @@ -74,8 +74,8 @@ class BTFTypeFwd : public BTFTypeBase { public: BTFTypeFwd(StringRef Name, bool IsUnion); - void completeType(BTFDebug &BDebug); - void emitType(MCStreamer &OS); + void completeType(BTFDebug &BDebug) override; + void emitType(MCStreamer &OS) override; }; /// Handle int type. @@ -86,9 +86,9 @@ class BTFTypeInt : public BTFTypeBase { public: BTFTypeInt(uint32_t Encoding, uint32_t SizeInBits, uint32_t OffsetInBits, StringRef TypeName); - uint32_t getSize() { return BTFTypeBase::getSize() + sizeof(uint32_t); } - void completeType(BTFDebug &BDebug); - void emitType(MCStreamer &OS); + uint32_t getSize() override { return BTFTypeBase::getSize() + sizeof(uint32_t); } + void completeType(BTFDebug &BDebug) override; + void emitType(MCStreamer &OS) override; }; /// Handle enumerate type. @@ -98,11 +98,11 @@ class BTFTypeEnum : public BTFTypeBase { public: BTFTypeEnum(const DICompositeType *ETy, uint32_t NumValues); - uint32_t getSize() { + uint32_t getSize() override { return BTFTypeBase::getSize() + EnumValues.size() * BTF::BTFEnumSize; } - void completeType(BTFDebug &BDebug); - void emitType(MCStreamer &OS); + void completeType(BTFDebug &BDebug) override; + void emitType(MCStreamer &OS) override; }; /// Handle array type. @@ -111,9 +111,9 @@ class BTFTypeArray : public BTFTypeBase { public: BTFTypeArray(uint32_t ElemTypeId, uint32_t NumElems); - uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; } - void completeType(BTFDebug &BDebug); - void emitType(MCStreamer &OS); + uint32_t getSize() override { return BTFTypeBase::getSize() + BTF::BTFArraySize; } + void completeType(BTFDebug &BDebug) override; + void emitType(MCStreamer &OS) override; }; /// Handle struct/union type. @@ -125,11 +125,11 @@ class BTFTypeStruct : public BTFTypeBase { public: BTFTypeStruct(const DICompositeType *STy, bool IsStruct, bool HasBitField, uint32_t NumMembers); - uint32_t getSize() { + uint32_t getSize() override { return BTFTypeBase::getSize() + Members.size() * BTF::BTFMemberSize; } - void completeType(BTFDebug &BDebug); - void emitType(MCStreamer &OS); + void completeType(BTFDebug &BDebug) override; + void emitType(MCStreamer &OS) override; std::string getName(); }; @@ -142,11 +142,11 @@ class BTFTypeFuncProto : public BTFTypeBase { public: BTFTypeFuncProto(const DISubroutineType *STy, uint32_t NumParams, const std::unordered_map &FuncArgNames); - uint32_t getSize() { + uint32_t getSize() override { return BTFTypeBase::getSize() + Parameters.size() * BTF::BTFParamSize; } - void completeType(BTFDebug &BDebug); - void emitType(MCStreamer &OS); + void completeType(BTFDebug &BDebug) override; + void emitType(MCStreamer &OS) override; }; /// Handle subprogram @@ -155,9 +155,9 @@ class BTFTypeFunc : public BTFTypeBase { public: BTFTypeFunc(StringRef FuncName, uint32_t ProtoTypeId, uint32_t Scope); - uint32_t getSize() { return BTFTypeBase::getSize(); } - void completeType(BTFDebug &BDebug); - void emitType(MCStreamer &OS); + uint32_t getSize() override { return BTFTypeBase::getSize(); } + void completeType(BTFDebug &BDebug) override; + void emitType(MCStreamer &OS) override; }; /// Handle variable instances @@ -167,9 +167,9 @@ class BTFKindVar : public BTFTypeBase { public: BTFKindVar(StringRef VarName, uint32_t TypeId, uint32_t VarInfo); - uint32_t getSize() { return BTFTypeBase::getSize() + 4; } - void completeType(BTFDebug &BDebug); - void emitType(MCStreamer &OS); + uint32_t getSize() override { return BTFTypeBase::getSize() + 4; } + void completeType(BTFDebug &BDebug) override; + void emitType(MCStreamer &OS) override; }; /// Handle data sections @@ -180,15 +180,15 @@ class BTFKindDataSec : public BTFTypeBase { public: BTFKindDataSec(AsmPrinter *AsmPrt, std::string SecName); - uint32_t getSize() { + uint32_t getSize() override { return BTFTypeBase::getSize() + BTF::BTFDataSecVarSize * Vars.size(); } void addVar(uint32_t Id, const MCSymbol *Sym, uint32_t Size) { Vars.push_back(std::make_tuple(Id, Sym, Size)); } std::string getName() { return Name; } - void completeType(BTFDebug &BDebug); - void emitType(MCStreamer &OS); + void completeType(BTFDebug &BDebug) override; + void emitType(MCStreamer &OS) override; }; /// String table. diff --git a/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp b/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp index 342ca21525c5e..d9307190ae169 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp @@ -221,15 +221,16 @@ bool HexagonGenExtract::convert(Instruction *In) { } bool HexagonGenExtract::visitBlock(BasicBlock *B) { + bool Changed = false; + // Depth-first, bottom-up traversal. for (auto *DTN : children(DT->getNode(B))) - visitBlock(DTN->getBlock()); + Changed |= visitBlock(DTN->getBlock()); // Allow limiting the number of generated extracts for debugging purposes. bool HasCutoff = ExtractCutoff.getPosition(); unsigned Cutoff = ExtractCutoff; - bool Changed = false; BasicBlock::iterator I = std::prev(B->end()), NextI, Begin = B->begin(); while (true) { if (HasCutoff && (ExtractCount >= Cutoff)) diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 3fe42ea13f51b..49d98622d946c 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -320,7 +320,11 @@ void HexagonPassConfig::addIRPasses() { if (!NoOpt) { if (EnableInitialCFGCleanup) - addPass(createCFGSimplificationPass(1, true, true, false, true)); + addPass(createCFGSimplificationPass(SimplifyCFGOptions() + .forwardSwitchCondToPhi(true) + .convertSwitchToLookupTable(true) + .needCanonicalLoops(false) + .sinkCommonInsts(true))); if (EnableLoopPrefetch) addPass(createLoopDataPrefetchPass()); if (EnableCommGEP) diff --git a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp index cbff8d1773ff0..9529b5e802d58 100644 --- a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp +++ b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp @@ -156,12 +156,12 @@ class MSP430Operand : public MCParsedAsmOperand { addExprOperand(Inst, Mem.Offset); } - bool isReg() const { return Kind == k_Reg; } - bool isImm() const { return Kind == k_Imm; } - bool isToken() const { return Kind == k_Tok; } - bool isMem() const { return Kind == k_Mem; } - bool isIndReg() const { return Kind == k_IndReg; } - bool isPostIndReg() const { return Kind == k_PostIndReg; } + bool isReg() const override { return Kind == k_Reg; } + bool isImm() const override { return Kind == k_Imm; } + bool isToken() const override { return Kind == k_Tok; } + bool isMem() const override { return Kind == k_Mem; } + bool isIndReg() const { return Kind == k_IndReg; } + bool isPostIndReg() const { return Kind == k_PostIndReg; } bool isCGImm() const { if (Kind != k_Imm) @@ -182,7 +182,7 @@ class MSP430Operand : public MCParsedAsmOperand { return Tok; } - unsigned getReg() const { + unsigned getReg() const override { assert(Kind == k_Reg && "Invalid access!"); return Reg; } @@ -222,10 +222,10 @@ class MSP430Operand : public MCParsedAsmOperand { return std::make_unique(k_PostIndReg, RegNum, S, E); } - SMLoc getStartLoc() const { return Start; } - SMLoc getEndLoc() const { return End; } + SMLoc getStartLoc() const override { return Start; } + SMLoc getEndLoc() const override { return End; } - virtual void print(raw_ostream &O) const { + void print(raw_ostream &O) const override { switch (Kind) { case k_Tok: O << "Token " << Tok; diff --git a/llvm/lib/Target/Mips/Mips.td b/llvm/lib/Target/Mips/Mips.td index 7fe750249c588..792960332bcc7 100644 --- a/llvm/lib/Target/Mips/Mips.td +++ b/llvm/lib/Target/Mips/Mips.td @@ -191,7 +191,7 @@ def FeatureUseTCCInDIV : SubtargetFeature< "UseTCCInDIV", "false", "Force the assembler to use trapping">; -def FeatureMadd4 +def FeatureNoMadd4 : SubtargetFeature<"nomadd4", "DisableMadd4", "true", "Disable 4-operand madd.fmt and related instructions">; diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td index a3b928870f3f6..089fed9ec0bf4 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -242,7 +242,7 @@ def HasEVA : Predicate<"Subtarget->hasEVA()">, def HasMSA : Predicate<"Subtarget->hasMSA()">, AssemblerPredicate<(all_of FeatureMSA)>; def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">, - AssemblerPredicate<(all_of (not FeatureMadd4))>; + AssemblerPredicate<(all_of (not FeatureNoMadd4))>; def HasMT : Predicate<"Subtarget->hasMT()">, AssemblerPredicate<(all_of FeatureMT)>; def UseIndirectJumpsHazard : Predicate<"Subtarget->useIndirectJumpsHazard()">, diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h index 8c52bbbd8a56a..27c687686641f 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h @@ -28,7 +28,7 @@ class PPCELFMCAsmInfo : public MCAsmInfoELF { }; class PPCXCOFFMCAsmInfo : public MCAsmInfoXCOFF { - virtual void anchor(); + void anchor() override; public: explicit PPCXCOFFMCAsmInfo(bool is64Bit, const Triple &); diff --git a/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp index c9f74bbf861ca..08b7bdb3ac1e2 100644 --- a/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp +++ b/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp @@ -77,8 +77,9 @@ namespace { if (J->getOperand(0).getMBB() == &ReturnMBB) { // This is an unconditional branch to the return. Replace the // branch with a blr. - BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode())) - .copyImplicitOps(*I); + MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I); + (*PI)->insert(J, MI); + MachineBasicBlock::iterator K = J--; K->eraseFromParent(); BlockChanged = true; @@ -89,10 +90,13 @@ namespace { if (J->getOperand(2).getMBB() == &ReturnMBB) { // This is a conditional branch to the return. Replace the branch // with a bclr. - BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR)) + MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I); + MI->setDesc(TII->get(PPC::BCCLR)); + MachineInstrBuilder(*ReturnMBB.getParent(), MI) .add(J->getOperand(0)) - .add(J->getOperand(1)) - .copyImplicitOps(*I); + .add(J->getOperand(1)); + (*PI)->insert(J, MI); + MachineBasicBlock::iterator K = J--; K->eraseFromParent(); BlockChanged = true; @@ -103,11 +107,13 @@ namespace { if (J->getOperand(1).getMBB() == &ReturnMBB) { // This is a conditional branch to the return. Replace the branch // with a bclr. - BuildMI( - **PI, J, J->getDebugLoc(), - TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn)) - .add(J->getOperand(0)) - .copyImplicitOps(*I); + MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I); + MI->setDesc( + TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn)); + MachineInstrBuilder(*ReturnMBB.getParent(), MI) + .add(J->getOperand(0)); + (*PI)->insert(J, MI); + MachineBasicBlock::iterator K = J--; K->eraseFromParent(); BlockChanged = true; diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 93ea3154e47f2..bd9174c1973dc 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1371,6 +1371,10 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + // AIX assembler does not support cfi directives. + const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) { int Opc = MI.getOpcode(); return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; @@ -1394,6 +1398,24 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, // Initialize current frame pointer. const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); + // Subroutines to generate .cfi_* directives. + auto buildDefCFAReg = [&](MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, Register Reg) { + unsigned RegNum = MRI->getDwarfRegNum(Reg, true); + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + }; + auto buildDefCFA = [&](MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, Register Reg, + int Offset) { + unsigned RegNum = MRI->getDwarfRegNum(Reg, true); + unsigned CFIIndex = MBB.getParent()->addFrameInst( + MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + }; // Subroutine to determine if we can use the Imm as part of d-form. auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; }; // Subroutine to materialize the Imm into TempReg. @@ -1427,6 +1449,9 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, .addReg(SPReg) .addReg(NegSizeReg); }; + // Use FPReg to calculate CFA. + if (needsCFI) + buildDefCFA(PrologMBB, {MI}, FPReg, 0); // For case HasBP && MaxAlign > 1, we have to align the SP by performing // SP = SP - SP % MaxAlign. if (HasBP && MaxAlign > 1) { @@ -1462,6 +1487,10 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg); for (int i = 0; i < NumBlocks; ++i) allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm); + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(PrologMBB, {MI}, SPReg); + } } else { // Since CTR is a volatile register and current shrinkwrap implementation // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a @@ -1492,6 +1521,10 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, PrologMBB.end()); ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB); PrologMBB.addSuccessor(LoopMBB); + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); + } // Update liveins. recomputeLiveIns(*LoopMBB); recomputeLiveIns(*ExitMBB); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 49140bab51343..ddfbd04e1ebc5 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -423,6 +423,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (Subtarget.hasSPE()) { // SPE has built-in conversions + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); @@ -572,9 +575,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. - if (Subtarget.hasSPE()) + if (Subtarget.hasSPE()) { + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); - else + } else setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); } @@ -9892,6 +9896,8 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // to vector legalization will not be sent to the target combine. Try to // combine it here. if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) { + if (!isa(NewShuffle)) + return NewShuffle; Op = NewShuffle; SVOp = cast(Op); V1 = Op.getOperand(0); diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index 2d12a72e29aee..742693083432c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -242,15 +242,6 @@ class VXForm_RD5_N3_VB5 xo, dag OOL, dag IOL, string asmstr, } -// VX-Form: [PO VRT / UIM RB XO]. -// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent -// "/ UIM" (unused bit followed by a 4-bit immediate) -// Destructive (insert) forms are suffixed with _ins. -class VXForm_VRT5_UIM5_RB5_ins xo, string opc, list pattern> - : VXForm_1, - RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; - // VX-Form: [PO VRT RA VRB XO]. // Destructive (insert) forms are suffixed with _ins. class VXForm_VTB5_RA5_ins xo, string opc, list pattern> @@ -261,10 +252,27 @@ class VXForm_VTB5_RA5_ins xo, string opc, list pattern> // VX-Form: [PO VRT RA RB XO]. // Destructive (insert) forms are suffixed with _ins. class VXForm_VRT5_RAB5_ins xo, string opc, list pattern> - : VXForm_1, RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; +// VX-Form: [ PO BF // VRA VRB XO ] +class VXForm_BF3_VAB5 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<5> VA; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-8} = BF; + let Inst{9-10} = 0; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + // VN-Form: [PO VRT VRA VRB PS SD XO] // SD is "Shift Direction" class VNForm_VTAB5_SD3 xo, bits<2> ps, dag OOL, dag IOL, string asmstr, @@ -396,6 +404,25 @@ class 8RR_XX4Form_IMM3_XTABC6 opcode, bits<2> xo, dag OOL, dag IOL, let Inst{63} = XT{5}; } +// [PO BF / XO2 B XO BX /] +class XX2_BF3_XO5_XB6_XO9 opcode, bits<5> xo2, bits<9> xo, dag OOL, + dag IOL, string asmstr, InstrItinClass itin, + list pattern> + : I { + bits<3> BF; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-8} = BF; + let Inst{9-10} = 0; + let Inst{11-15} = xo2; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + multiclass MLS_DForm_R_SI34_RTA5_MEM_p opcode, dag OOL, dag IOL, dag PCRel_IOL, string asmstr, InstrItinClass itin> { @@ -794,16 +821,18 @@ let Predicates = [IsISA3_1] in { (int_ppc_altivec_vsrdbi v16i8:$VRA, v16i8:$VRB, i32:$SH))]>; - def VINSW : - VXForm_VRT5_UIM5_RB5_ins<207, "vinsw", - [(set v4i32:$vD, - (int_ppc_altivec_vinsw v4i32:$vDi, i64:$rB, - timm:$UIM))]>; + def VINSW : + VXForm_1<207, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, gprc:$rB), + "vinsw $vD, $rB, $UIM", IIC_VecGeneral, + [(set v4i32:$vD, + (int_ppc_altivec_vinsw v4i32:$vDi, i32:$rB, timm:$UIM))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; def VINSD : - VXForm_VRT5_UIM5_RB5_ins<463, "vinsd", - [(set v2i64:$vD, - (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB, - timm:$UIM))]>; + VXForm_1<463, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, g8rc:$rB), + "vinsd $vD, $rB, $UIM", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB, timm:$UIM))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; def VINSBVLX : VXForm_VTB5_RA5_ins<15, "vinsbvlx", [(set v16i8:$vD, @@ -837,44 +866,45 @@ let Predicates = [IsISA3_1] in { def VINSBLX : VXForm_VRT5_RAB5_ins<527, "vinsblx", [(set v16i8:$vD, - (int_ppc_altivec_vinsblx v16i8:$vDi, i64:$rA, - i64:$rB))]>; + (int_ppc_altivec_vinsblx v16i8:$vDi, i32:$rA, + i32:$rB))]>; def VINSBRX : VXForm_VRT5_RAB5_ins<783, "vinsbrx", [(set v16i8:$vD, - (int_ppc_altivec_vinsbrx v16i8:$vDi, i64:$rA, - i64:$rB))]>; + (int_ppc_altivec_vinsbrx v16i8:$vDi, i32:$rA, + i32:$rB))]>; def VINSHLX : VXForm_VRT5_RAB5_ins<591, "vinshlx", [(set v8i16:$vD, - (int_ppc_altivec_vinshlx v8i16:$vDi, i64:$rA, - i64:$rB))]>; + (int_ppc_altivec_vinshlx v8i16:$vDi, i32:$rA, + i32:$rB))]>; def VINSHRX : VXForm_VRT5_RAB5_ins<847, "vinshrx", [(set v8i16:$vD, - (int_ppc_altivec_vinshrx v8i16:$vDi, i64:$rA, - i64:$rB))]>; + (int_ppc_altivec_vinshrx v8i16:$vDi, i32:$rA, + i32:$rB))]>; def VINSWLX : VXForm_VRT5_RAB5_ins<655, "vinswlx", [(set v4i32:$vD, - (int_ppc_altivec_vinswlx v4i32:$vDi, i64:$rA, - i64:$rB))]>; + (int_ppc_altivec_vinswlx v4i32:$vDi, i32:$rA, + i32:$rB))]>; def VINSWRX : VXForm_VRT5_RAB5_ins<911, "vinswrx", [(set v4i32:$vD, - (int_ppc_altivec_vinswrx v4i32:$vDi, i64:$rA, - i64:$rB))]>; + (int_ppc_altivec_vinswrx v4i32:$vDi, i32:$rA, + i32:$rB))]>; def VINSDLX : - VXForm_VRT5_RAB5_ins<719, "vinsdlx", - [(set v2i64:$vD, - (int_ppc_altivec_vinsdlx v2i64:$vDi, i64:$rA, - i64:$rB))]>; + VXForm_1<719, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB), + "vinsdlx $vD, $rA, $rB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vinsdlx v2i64:$vDi, i64:$rA, i64:$rB))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; def VINSDRX : - VXForm_VRT5_RAB5_ins<975, "vinsdrx", - [(set v2i64:$vD, - (int_ppc_altivec_vinsdrx v2i64:$vDi, i64:$rA, - i64:$rB))]>; - + VXForm_1<975, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB), + "vinsdrx $vD, $rA, $rB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vinsdrx v2i64:$vDi, i64:$rA, i64:$rB))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vpdepd $vD, $vA, $vB", IIC_VecGeneral, [(set v2i64:$vD, @@ -943,6 +973,9 @@ let Predicates = [IsISA3_1] in { [(set v16i8:$vD, (int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>; + def XVTLSBB : XX2_BF3_XO5_XB6_XO9<60, 2, 475, (outs crrc:$BF), (ins vsrc:$XB), + "xvtlsbb $BF, $XB", IIC_VecGeneral, []>; + // The XFormMemOp flag for the following 8 instructions is set on // the instruction format. let mayLoad = 1, mayStore = 0 in { @@ -958,9 +991,56 @@ let Predicates = [IsISA3_1] in { def STXVRWX : X_XS6_RA5_RB5<31, 205, "stxvrwx", vsrc, []>; def STXVRDX : X_XS6_RA5_RB5<31, 237, "stxvrdx", vsrc, []>; } -} - + def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulesd $vD, $vA, $vB", IIC_VecGeneral, []>; + def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmuleud $vD, $vA, $vB", IIC_VecGeneral, []>; + def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulosd $vD, $vA, $vB", IIC_VecGeneral, []>; + def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmuloud $vD, $vA, $vB", IIC_VecGeneral, []>; + def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), + "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, []>; + def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivsq $vD, $vA, $vB", IIC_VecGeneral, []>; + def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivuq $vD, $vA, $vB", IIC_VecGeneral, []>; + def VDIVESQ : VXForm_1<779, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivesq $vD, $vA, $vB", IIC_VecGeneral, []>; + def VDIVEUQ : VXForm_1<523, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdiveuq $vD, $vA, $vB", IIC_VecGeneral, []>; + def VCMPEQUQ : VCMP <455, "vcmpequq $vD, $vA, $vB" , v1i128>; + def VCMPGTSQ : VCMP <903, "vcmpgtsq $vD, $vA, $vB" , v1i128>; + def VCMPGTUQ : VCMP <647, "vcmpgtuq $vD, $vA, $vB" , v1i128>; + def VCMPEQUQ_rec : VCMPo <455, "vcmpequq. $vD, $vA, $vB" , v1i128>; + def VCMPGTSQ_rec : VCMPo <903, "vcmpgtsq. $vD, $vA, $vB" , v1i128>; + def VCMPGTUQ_rec : VCMPo <647, "vcmpgtuq. $vD, $vA, $vB" , v1i128>; + def VMODSQ : VXForm_1<1803, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmodsq $vD, $vA, $vB", IIC_VecGeneral, []>; + def VMODUQ : VXForm_1<1547, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmoduq $vD, $vA, $vB", IIC_VecGeneral, []>; + def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB), + "vextsd2q $vD, $vB", IIC_VecGeneral, []>; + def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB), + "vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>; + def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB), + "vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>; + def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm", []>; + def VRLQMI : VXForm_1<69, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi), + "vrlqmi $vD, $vA, $vB", IIC_VecFP, []>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; + def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>; + def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>; + def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>; + def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>; + def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>; + def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>; + def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>; + def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>; +} //---------------------------- Anonymous Patterns ----------------------------// let Predicates = [IsISA3_1] in { @@ -972,6 +1052,10 @@ let Predicates = [IsISA3_1] in { (v4i32 (COPY_TO_REGCLASS (XXGENPCVWM $VRB, imm:$IMM), VRRC))>; def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)), (v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>; + def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, -1)), + (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>; + def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)), + (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>; } let AddedComplexity = 400, Predicates = [PrefixInstrs] in { diff --git a/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/llvm/lib/Target/PowerPC/PPCInstrSPE.td index 935c3044ae470..858eb0c9fe500 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrSPE.td +++ b/llvm/lib/Target/PowerPC/PPCInstrSPE.td @@ -158,7 +158,7 @@ def EFDCFSF : EFXForm_2a<755, (outs sperc:$RT), (ins spe4rc:$RB), def EFDCFSI : EFXForm_2a<753, (outs sperc:$RT), (ins gprc:$RB), "efdcfsi $RT, $RB", IIC_FPDGeneral, - [(set f64:$RT, (sint_to_fp i32:$RB))]>; + [(set f64:$RT, (any_sint_to_fp i32:$RB))]>; def EFDCFSID : EFXForm_2a<739, (outs sperc:$RT), (ins gprc:$RB), "efdcfsid $RT, $RB", IIC_FPDGeneral, @@ -169,7 +169,7 @@ def EFDCFUF : EFXForm_2a<754, (outs sperc:$RT), (ins spe4rc:$RB), def EFDCFUI : EFXForm_2a<752, (outs sperc:$RT), (ins gprc:$RB), "efdcfui $RT, $RB", IIC_FPDGeneral, - [(set f64:$RT, (uint_to_fp i32:$RB))]>; + [(set f64:$RT, (any_uint_to_fp i32:$RB))]>; def EFDCFUID : EFXForm_2a<738, (outs sperc:$RT), (ins gprc:$RB), "efdcfuid $RT, $RB", IIC_FPDGeneral, @@ -197,7 +197,7 @@ def EFDCTSIDZ : EFXForm_2a<747, (outs gprc:$RT), (ins sperc:$RB), def EFDCTSIZ : EFXForm_2a<762, (outs gprc:$RT), (ins sperc:$RB), "efdctsiz $RT, $RB", IIC_FPDGeneral, - [(set i32:$RT, (fp_to_sint f64:$RB))]>; + [(set i32:$RT, (any_fp_to_sint f64:$RB))]>; def EFDCTUF : EFXForm_2a<758, (outs sperc:$RT), (ins spe4rc:$RB), "efdctuf $RT, $RB", IIC_FPDGeneral, []>; @@ -212,7 +212,7 @@ def EFDCTUIDZ : EFXForm_2a<746, (outs gprc:$RT), (ins sperc:$RB), def EFDCTUIZ : EFXForm_2a<760, (outs gprc:$RT), (ins sperc:$RB), "efdctuiz $RT, $RB", IIC_FPDGeneral, - [(set i32:$RT, (fp_to_uint f64:$RB))]>; + [(set i32:$RT, (any_fp_to_uint f64:$RB))]>; def EFDDIV : EFXForm_1<745, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), "efddiv $RT, $RA, $RB", IIC_FPDivD, @@ -261,14 +261,14 @@ def EFSCFSF : EFXForm_2a<723, (outs spe4rc:$RT), (ins spe4rc:$RB), def EFSCFSI : EFXForm_2a<721, (outs spe4rc:$RT), (ins gprc:$RB), "efscfsi $RT, $RB", IIC_FPSGeneral, - [(set f32:$RT, (sint_to_fp i32:$RB))]>; + [(set f32:$RT, (any_sint_to_fp i32:$RB))]>; def EFSCFUF : EFXForm_2a<722, (outs spe4rc:$RT), (ins spe4rc:$RB), "efscfuf $RT, $RB", IIC_FPSGeneral, []>; def EFSCFUI : EFXForm_2a<720, (outs spe4rc:$RT), (ins gprc:$RB), "efscfui $RT, $RB", IIC_FPSGeneral, - [(set f32:$RT, (uint_to_fp i32:$RB))]>; + [(set f32:$RT, (any_uint_to_fp i32:$RB))]>; let isCompare = 1 in { def EFSCMPEQ : EFXForm_3<718, (outs crrc:$crD), (ins spe4rc:$RA, spe4rc:$RB), @@ -288,7 +288,7 @@ def EFSCTSI : EFXForm_2a<725, (outs gprc:$RT), (ins spe4rc:$RB), def EFSCTSIZ : EFXForm_2a<730, (outs gprc:$RT), (ins spe4rc:$RB), "efsctsiz $RT, $RB", IIC_FPSGeneral, - [(set i32:$RT, (fp_to_sint f32:$RB))]>; + [(set i32:$RT, (any_fp_to_sint f32:$RB))]>; def EFSCTUF : EFXForm_2a<726, (outs sperc:$RT), (ins spe4rc:$RB), "efsctuf $RT, $RB", IIC_FPSGeneral, []>; @@ -299,7 +299,7 @@ def EFSCTUI : EFXForm_2a<724, (outs gprc:$RT), (ins spe4rc:$RB), def EFSCTUIZ : EFXForm_2a<728, (outs gprc:$RT), (ins spe4rc:$RB), "efsctuiz $RT, $RB", IIC_FPSGeneral, - [(set i32:$RT, (fp_to_uint f32:$RB))]>; + [(set i32:$RT, (any_fp_to_uint f32:$RB))]>; def EFSDIV : EFXForm_1<713, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB), "efsdiv $RT, $RA, $RB", IIC_FPDivD, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp index 8db1738566ac8..089a2def4c210 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp @@ -27,7 +27,6 @@ RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) { ExceptionsType = ExceptionHandling::DwarfCFI; Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; - UseIntegratedAssembler = false; } const MCExpr *RISCVMCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index ae13c00565c43..816206c477dfa 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -189,6 +189,9 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, // Get byte count of instruction. unsigned Size = Desc.getSize(); + // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded + // instructions for each pseudo, and must be updated when adding new pseudos + // or changing existing ones. if (MI.getOpcode() == RISCV::PseudoCALLReg || MI.getOpcode() == RISCV::PseudoCALL || MI.getOpcode() == RISCV::PseudoTAIL || diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index f0583f6919361..57e7c41c42711 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -215,6 +215,16 @@ def : ProcessorModel<"rocket-rv32", Rocket32Model, []>; def : ProcessorModel<"rocket-rv64", Rocket64Model, [Feature64Bit]>; +def : ProcessorModel<"sifive-e31", Rocket32Model, [FeatureStdExtM, + FeatureStdExtA, + FeatureStdExtC]>; + +def : ProcessorModel<"sifive-u54", Rocket64Model, [Feature64Bit, + FeatureStdExtM, + FeatureStdExtA, + FeatureStdExtF, + FeatureStdExtD, + FeatureStdExtC]>; //===----------------------------------------------------------------------===// // Define the RISC-V target. diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp index b49c767ff2ca6..26ce16486bd9c 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -86,6 +86,9 @@ bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) { bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { + // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded + // instructions for each pseudo, and must be updated when adding new pseudos + // or changing existing ones. switch (MBBI->getOpcode()) { case RISCV::PseudoAtomicLoadNand32: return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32, diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 33db8f231c7db..504355fb8bf88 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/MC/MCContext.h" using namespace llvm; @@ -42,18 +41,24 @@ class RISCVExpandPseudo : public MachineFunctionPass { private: bool expandMBB(MachineBasicBlock &MBB); - bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); bool expandAuipcInstPair(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, unsigned FlagsHi, unsigned SecondOpcode); bool expandLoadLocalAddress(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI); + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); bool expandLoadAddress(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI); + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); bool expandLoadTLSIEAddress(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI); + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); bool expandLoadTLSGDAddress(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI); + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); }; char RISCVExpandPseudo::ID = 0; @@ -72,7 +77,7 @@ bool RISCVExpandPseudo::expandMBB(MachineBasicBlock &MBB) { MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineBasicBlock::iterator NMBBI = std::next(MBBI); - Modified |= expandMI(MBB, MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); MBBI = NMBBI; } @@ -80,56 +85,76 @@ bool RISCVExpandPseudo::expandMBB(MachineBasicBlock &MBB) { } bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI) { + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded + // instructions for each pseudo, and must be updated when adding new pseudos + // or changing existing ones. switch (MBBI->getOpcode()) { case RISCV::PseudoLLA: - return expandLoadLocalAddress(MBB, MBBI); + return expandLoadLocalAddress(MBB, MBBI, NextMBBI); case RISCV::PseudoLA: - return expandLoadAddress(MBB, MBBI); + return expandLoadAddress(MBB, MBBI, NextMBBI); case RISCV::PseudoLA_TLS_IE: - return expandLoadTLSIEAddress(MBB, MBBI); + return expandLoadTLSIEAddress(MBB, MBBI, NextMBBI); case RISCV::PseudoLA_TLS_GD: - return expandLoadTLSGDAddress(MBB, MBBI); + return expandLoadTLSGDAddress(MBB, MBBI, NextMBBI); } return false; } -bool RISCVExpandPseudo::expandAuipcInstPair(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned FlagsHi, - unsigned SecondOpcode) { +bool RISCVExpandPseudo::expandAuipcInstPair( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, unsigned FlagsHi, + unsigned SecondOpcode) { MachineFunction *MF = MBB.getParent(); MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); Register DestReg = MI.getOperand(0).getReg(); - Register ScratchReg = - MF->getRegInfo().createVirtualRegister(&RISCV::GPRRegClass); + const MachineOperand &Symbol = MI.getOperand(1); - MachineOperand &Symbol = MI.getOperand(1); - Symbol.setTargetFlags(FlagsHi); - MCSymbol *AUIPCSymbol = MF->getContext().createTempSymbol(false); + MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - MachineInstr *MIAUIPC = - BuildMI(MBB, MBBI, DL, TII->get(RISCV::AUIPC), ScratchReg).add(Symbol); - MIAUIPC->setPreInstrSymbol(*MF, AUIPCSymbol); + // Tell AsmPrinter that we unconditionally want the symbol of this label to be + // emitted. + NewMBB->setLabelMustBeEmitted(); - BuildMI(MBB, MBBI, DL, TII->get(SecondOpcode), DestReg) - .addReg(ScratchReg) - .addSym(AUIPCSymbol, RISCVII::MO_PCREL_LO); + MF->insert(++MBB.getIterator(), NewMBB); + BuildMI(NewMBB, DL, TII->get(RISCV::AUIPC), DestReg) + .addDisp(Symbol, 0, FlagsHi); + BuildMI(NewMBB, DL, TII->get(SecondOpcode), DestReg) + .addReg(DestReg) + .addMBB(NewMBB, RISCVII::MO_PCREL_LO); + + // Move all the rest of the instructions to NewMBB. + NewMBB->splice(NewMBB->end(), &MBB, std::next(MBBI), MBB.end()); + // Update machine-CFG edges. + NewMBB->transferSuccessorsAndUpdatePHIs(&MBB); + // Make the original basic block fall-through to the new. + MBB.addSuccessor(NewMBB); + + // Make sure live-ins are correctly attached to this new basic block. + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *NewMBB); + + NextMBBI = MBB.end(); MI.eraseFromParent(); return true; } bool RISCVExpandPseudo::expandLoadLocalAddress( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { - return expandAuipcInstPair(MBB, MBBI, RISCVII::MO_PCREL_HI, RISCV::ADDI); + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_PCREL_HI, + RISCV::ADDI); } -bool RISCVExpandPseudo::expandLoadAddress(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI) { +bool RISCVExpandPseudo::expandLoadAddress( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { MachineFunction *MF = MBB.getParent(); unsigned SecondOpcode; @@ -142,21 +167,25 @@ bool RISCVExpandPseudo::expandLoadAddress(MachineBasicBlock &MBB, SecondOpcode = RISCV::ADDI; FlagsHi = RISCVII::MO_PCREL_HI; } - return expandAuipcInstPair(MBB, MBBI, FlagsHi, SecondOpcode); + return expandAuipcInstPair(MBB, MBBI, NextMBBI, FlagsHi, SecondOpcode); } bool RISCVExpandPseudo::expandLoadTLSIEAddress( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { MachineFunction *MF = MBB.getParent(); const auto &STI = MF->getSubtarget(); unsigned SecondOpcode = STI.is64Bit() ? RISCV::LD : RISCV::LW; - return expandAuipcInstPair(MBB, MBBI, RISCVII::MO_TLS_GOT_HI, SecondOpcode); + return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_TLS_GOT_HI, + SecondOpcode); } bool RISCVExpandPseudo::expandLoadTLSGDAddress( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { - return expandAuipcInstPair(MBB, MBBI, RISCVII::MO_TLS_GD_HI, RISCV::ADDI); + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_TLS_GD_HI, + RISCV::ADDI); } } // end of anonymous namespace diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index a0ae05081adcb..7570385e38e3a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -184,6 +184,330 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) { return false; } +// Check that it is a SLOI (Shift Left Ones Immediate). We first check that +// it is the right node tree: +// +// (OR (SHL RS1, VC2), VC1) +// +// and then we check that VC1, the mask used to fill with ones, is compatible +// with VC2, the shamt: +// +// VC1 == maskTrailingOnes(VC2) + +bool RISCVDAGToDAGISel::SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt) { + MVT XLenVT = Subtarget->getXLenVT(); + if (N.getOpcode() == ISD::OR) { + SDValue Or = N; + if (Or.getOperand(0).getOpcode() == ISD::SHL) { + SDValue Shl = Or.getOperand(0); + if (isa(Shl.getOperand(1)) && + isa(Or.getOperand(1))) { + if (XLenVT == MVT::i64) { + uint64_t VC1 = Or.getConstantOperandVal(1); + uint64_t VC2 = Shl.getConstantOperandVal(1); + if (VC1 == maskTrailingOnes(VC2)) { + RS1 = Shl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), + Shl.getOperand(1).getValueType()); + return true; + } + } + if (XLenVT == MVT::i32) { + uint32_t VC1 = Or.getConstantOperandVal(1); + uint32_t VC2 = Shl.getConstantOperandVal(1); + if (VC1 == maskTrailingOnes(VC2)) { + RS1 = Shl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), + Shl.getOperand(1).getValueType()); + return true; + } + } + } + } + } + return false; +} + +// Check that it is a SROI (Shift Right Ones Immediate). We first check that +// it is the right node tree: +// +// (OR (SRL RS1, VC2), VC1) +// +// and then we check that VC1, the mask used to fill with ones, is compatible +// with VC2, the shamt: +// +// VC1 == maskLeadingOnes(VC2) + +bool RISCVDAGToDAGISel::SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt) { + MVT XLenVT = Subtarget->getXLenVT(); + if (N.getOpcode() == ISD::OR) { + SDValue Or = N; + if (Or.getOperand(0).getOpcode() == ISD::SRL) { + SDValue Srl = Or.getOperand(0); + if (isa(Srl.getOperand(1)) && + isa(Or.getOperand(1))) { + if (XLenVT == MVT::i64) { + uint64_t VC1 = Or.getConstantOperandVal(1); + uint64_t VC2 = Srl.getConstantOperandVal(1); + if (VC1 == maskLeadingOnes(VC2)) { + RS1 = Srl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), + Srl.getOperand(1).getValueType()); + return true; + } + } + if (XLenVT == MVT::i32) { + uint32_t VC1 = Or.getConstantOperandVal(1); + uint32_t VC2 = Srl.getConstantOperandVal(1); + if (VC1 == maskLeadingOnes(VC2)) { + RS1 = Srl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), + Srl.getOperand(1).getValueType()); + return true; + } + } + } + } + } + return false; +} + +// Check that it is a RORI (Rotate Right Immediate). We first check that +// it is the right node tree: +// +// (ROTL RS1, VC) +// +// The compiler translates immediate rotations to the right given by the call +// to the rotateright32/rotateright64 intrinsics as rotations to the left. +// Since the rotation to the left can be easily emulated as a rotation to the +// right by negating the constant, there is no encoding for ROLI. +// We then select the immediate left rotations as RORI by the complementary +// constant: +// +// Shamt == XLen - VC + +bool RISCVDAGToDAGISel::SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt) { + MVT XLenVT = Subtarget->getXLenVT(); + if (N.getOpcode() == ISD::ROTL) { + if (isa(N.getOperand(1))) { + if (XLenVT == MVT::i64) { + uint64_t VC = N.getConstantOperandVal(1); + Shamt = CurDAG->getTargetConstant((64 - VC), SDLoc(N), + N.getOperand(1).getValueType()); + RS1 = N.getOperand(0); + return true; + } + if (XLenVT == MVT::i32) { + uint32_t VC = N.getConstantOperandVal(1); + Shamt = CurDAG->getTargetConstant((32 - VC), SDLoc(N), + N.getOperand(1).getValueType()); + RS1 = N.getOperand(0); + return true; + } + } + } + return false; +} + + +// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32 +// on RV64). +// SLLIUW is the same as SLLI except for the fact that it clears the bits +// XLEN-1:32 of the input RS1 before shifting. +// We first check that it is the right node tree: +// +// (AND (SHL RS1, VC2), VC1) +// +// We check that VC2, the shamt is less than 32, otherwise the pattern is +// exactly the same as SLLI and we give priority to that. +// Eventually we check that that VC1, the mask used to clear the upper 32 bits +// of RS1, is correct: +// +// VC1 == (0xFFFFFFFF << VC2) + +bool RISCVDAGToDAGISel::SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt) { + if (N.getOpcode() == ISD::AND && Subtarget->getXLenVT() == MVT::i64) { + SDValue And = N; + if (And.getOperand(0).getOpcode() == ISD::SHL) { + SDValue Shl = And.getOperand(0); + if (isa(Shl.getOperand(1)) && + isa(And.getOperand(1))) { + uint64_t VC1 = And.getConstantOperandVal(1); + uint64_t VC2 = Shl.getConstantOperandVal(1); + if (VC2 < 32 && VC1 == ((uint64_t)0xFFFFFFFF << VC2)) { + RS1 = Shl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), + Shl.getOperand(1).getValueType()); + return true; + } + } + } + } + return false; +} + +// Check that it is a SLOIW (Shift Left Ones Immediate i32 on RV64). +// We first check that it is the right node tree: +// +// (SIGN_EXTEND_INREG (OR (SHL RS1, VC2), VC1)) +// +// and then we check that VC1, the mask used to fill with ones, is compatible +// with VC2, the shamt: +// +// VC1 == maskTrailingOnes(VC2) + +bool RISCVDAGToDAGISel::SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt) { + if (Subtarget->getXLenVT() == MVT::i64 && + N.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast(N.getOperand(1))->getVT() == MVT::i32) { + if (N.getOperand(0).getOpcode() == ISD::OR) { + SDValue Or = N.getOperand(0); + if (Or.getOperand(0).getOpcode() == ISD::SHL) { + SDValue Shl = Or.getOperand(0); + if (isa(Shl.getOperand(1)) && + isa(Or.getOperand(1))) { + uint32_t VC1 = Or.getConstantOperandVal(1); + uint32_t VC2 = Shl.getConstantOperandVal(1); + if (VC1 == maskTrailingOnes(VC2)) { + RS1 = Shl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), + Shl.getOperand(1).getValueType()); + return true; + } + } + } + } + } + return false; +} + +// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64). +// We first check that it is the right node tree: +// +// (OR (SHL RS1, VC2), VC1) +// +// and then we check that VC1, the mask used to fill with ones, is compatible +// with VC2, the shamt: +// +// VC1 == maskLeadingOnes(VC2) + +bool RISCVDAGToDAGISel::SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt) { + if (N.getOpcode() == ISD::OR && Subtarget->getXLenVT() == MVT::i64) { + SDValue Or = N; + if (Or.getOperand(0).getOpcode() == ISD::SRL) { + SDValue Srl = Or.getOperand(0); + if (isa(Srl.getOperand(1)) && + isa(Or.getOperand(1))) { + uint32_t VC1 = Or.getConstantOperandVal(1); + uint32_t VC2 = Srl.getConstantOperandVal(1); + if (VC1 == maskLeadingOnes(VC2)) { + RS1 = Srl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), + Srl.getOperand(1).getValueType()); + return true; + } + } + } + } + return false; +} + +// Check that it is a RORIW (i32 Right Rotate Immediate on RV64). +// We first check that it is the right node tree: +// +// (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2), +// (SRL (AND (AssertSext RS2, i32), VC3), VC1))) +// +// Then we check that the constant operands respect these constraints: +// +// VC2 == 32 - VC1 +// VC3 == maskLeadingOnes(VC2) +// +// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32 +// and VC3 a 32 bit mask of (32 - VC1) leading ones. + +bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) { + if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && + Subtarget->getXLenVT() == MVT::i64 && + cast(N.getOperand(1))->getVT() == MVT::i32) { + if (N.getOperand(0).getOpcode() == ISD::OR) { + SDValue Or = N.getOperand(0); + if (Or.getOperand(0).getOpcode() == ISD::SHL && + Or.getOperand(1).getOpcode() == ISD::SRL) { + SDValue Shl = Or.getOperand(0); + SDValue Srl = Or.getOperand(1); + if (Srl.getOperand(0).getOpcode() == ISD::AND) { + SDValue And = Srl.getOperand(0); + if (isa(Srl.getOperand(1)) && + isa(Shl.getOperand(1)) && + isa(And.getOperand(1))) { + uint32_t VC1 = Srl.getConstantOperandVal(1); + uint32_t VC2 = Shl.getConstantOperandVal(1); + uint32_t VC3 = And.getConstantOperandVal(1); + if (VC2 == (32 - VC1) && + VC3 == maskLeadingOnes(VC2)) { + RS1 = Shl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N), + Srl.getOperand(1).getValueType()); + return true; + } + } + } + } + } + } + return false; +} + +// Check that it is a FSRIW (i32 Funnel Shift Right Immediate on RV64). +// We first check that it is the right node tree: +// +// (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2), +// (SRL (AND (AssertSext RS2, i32), VC3), VC1))) +// +// Then we check that the constant operands respect these constraints: +// +// VC2 == 32 - VC1 +// VC3 == maskLeadingOnes(VC2) +// +// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32 +// and VC3 a 32 bit mask of (32 - VC1) leading ones. + +bool RISCVDAGToDAGISel::SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2, + SDValue &Shamt) { + if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && + Subtarget->getXLenVT() == MVT::i64 && + cast(N.getOperand(1))->getVT() == MVT::i32) { + if (N.getOperand(0).getOpcode() == ISD::OR) { + SDValue Or = N.getOperand(0); + if (Or.getOperand(0).getOpcode() == ISD::SHL && + Or.getOperand(1).getOpcode() == ISD::SRL) { + SDValue Shl = Or.getOperand(0); + SDValue Srl = Or.getOperand(1); + if (Srl.getOperand(0).getOpcode() == ISD::AND) { + SDValue And = Srl.getOperand(0); + if (isa(Srl.getOperand(1)) && + isa(Shl.getOperand(1)) && + isa(And.getOperand(1))) { + uint32_t VC1 = Srl.getConstantOperandVal(1); + uint32_t VC2 = Shl.getConstantOperandVal(1); + uint32_t VC3 = And.getConstantOperandVal(1); + if (VC2 == (32 - VC1) && + VC3 == maskLeadingOnes(VC2)) { + RS1 = Shl.getOperand(0); + RS2 = And.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N), + Srl.getOperand(1).getValueType()); + return true; + } + } + } + } + } + } + return false; +} + // Merge an ADDI into the offset of a load/store instruction where possible. // (load (addi base, off1), off2) -> (load base, off1+off2) // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index dcf733ec36751..0ca12510a2308 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -45,6 +45,15 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { bool SelectAddrFI(SDValue Addr, SDValue &Base); + bool SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt); + bool SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt); + bool SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt); + bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt); + bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt); + bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt); + bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt); + bool SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2, SDValue &Shamt); + // Include the pieces autogenerated from the target description. #include "RISCVGenDAGISel.inc" diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 91fc69b5bc10e..03d9eefd59d0b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -149,12 +149,27 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); - setOperationAction(ISD::ROTL, XLenVT, Expand); - setOperationAction(ISD::ROTR, XLenVT, Expand); - setOperationAction(ISD::BSWAP, XLenVT, Expand); - setOperationAction(ISD::CTTZ, XLenVT, Expand); - setOperationAction(ISD::CTLZ, XLenVT, Expand); - setOperationAction(ISD::CTPOP, XLenVT, Expand); + if (!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp())) { + setOperationAction(ISD::ROTL, XLenVT, Expand); + setOperationAction(ISD::ROTR, XLenVT, Expand); + } + + if (!Subtarget.hasStdExtZbp()) + setOperationAction(ISD::BSWAP, XLenVT, Expand); + + if (!Subtarget.hasStdExtZbb()) { + setOperationAction(ISD::CTTZ, XLenVT, Expand); + setOperationAction(ISD::CTLZ, XLenVT, Expand); + setOperationAction(ISD::CTPOP, XLenVT, Expand); + } + + if (Subtarget.hasStdExtZbp()) + setOperationAction(ISD::BITREVERSE, XLenVT, Legal); + + if (Subtarget.hasStdExtZbt()) { + setOperationAction(ISD::FSHL, XLenVT, Legal); + setOperationAction(ISD::FSHR, XLenVT, Legal); + } ISD::CondCode FPCCToExtend[] = { ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index f64e4397dcd3c..d39ec505127c4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -76,10 +76,10 @@ unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, break; } - if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && - MI.getOperand(1).getImm() == 0) { - FrameIndex = MI.getOperand(0).getIndex(); - return MI.getOperand(2).getReg(); + if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && + MI.getOperand(2).getImm() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); } return 0; @@ -471,6 +471,9 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { case TargetOpcode::KILL: case TargetOpcode::DBG_VALUE: return 0; + // These values are determined based on RISCVExpandAtomicPseudoInsts, + // RISCVExpandPseudoInsts and RISCVMCCodeEmitter, depending on where the + // pseudos are expanded. case RISCV::PseudoCALLReg: case RISCV::PseudoCALL: case RISCV::PseudoJump: @@ -480,6 +483,26 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { case RISCV::PseudoLA_TLS_IE: case RISCV::PseudoLA_TLS_GD: return 8; + case RISCV::PseudoAtomicLoadNand32: + case RISCV::PseudoAtomicLoadNand64: + return 20; + case RISCV::PseudoMaskedAtomicSwap32: + case RISCV::PseudoMaskedAtomicLoadAdd32: + case RISCV::PseudoMaskedAtomicLoadSub32: + return 28; + case RISCV::PseudoMaskedAtomicLoadNand32: + return 32; + case RISCV::PseudoMaskedAtomicLoadMax32: + case RISCV::PseudoMaskedAtomicLoadMin32: + return 44; + case RISCV::PseudoMaskedAtomicLoadUMax32: + case RISCV::PseudoMaskedAtomicLoadUMin32: + return 36; + case RISCV::PseudoCmpXchg32: + case RISCV::PseudoCmpXchg64: + return 16; + case RISCV::PseudoMaskedCmpXchg32: + return 32; case TargetOpcode::INLINEASM: case TargetOpcode::INLINEASM_BR: { const MachineFunction &MF = *MI.getParent()->getParent(); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index de73c8df93679..7fce37519b93e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -235,13 +235,13 @@ class PseudoMaskedAMOUMinUMax class PseudoMaskedAMOPat : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), - (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>; + (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>; class PseudoMaskedAMOMinMaxPat : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, timm:$ordering), (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, - imm:$ordering)>; + timm:$ordering)>; def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO; def : PseudoMaskedAMOPat; + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; } // Predicates = [HasStdExtA] @@ -387,5 +387,5 @@ defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64>; def : Pat<(int_riscv_masked_cmpxchg_i64 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), (PseudoMaskedCmpXchg32 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>; + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; } // Predicates = [HasStdExtA, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td index 34a463626e290..afac509f743d7 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -632,3 +632,432 @@ let Predicates = [HasStdExtZbproposedc, HasStdExtZbbOrZbp, HasStdExtC, IsRV64] i def : CompressPat<(PACK GPRC:$rs1, GPRC:$rs1, X0), (C_ZEXTW GPRC:$rs1)>; } // Predicates = [HasStdExtZbproposedc, HasStdExtC, IsRV64] + +//===----------------------------------------------------------------------===// +// Codegen patterns +//===----------------------------------------------------------------------===// +def SLOIPat : ComplexPattern; +def SROIPat : ComplexPattern; +def RORIPat : ComplexPattern; +def SLLIUWPat : ComplexPattern; +def SLOIWPat : ComplexPattern; +def SROIWPat : ComplexPattern; +def RORIWPat : ComplexPattern; +def FSRIWPat : ComplexPattern; + +let Predicates = [HasStdExtZbbOrZbp] in { +def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>; +def : Pat<(or GPR:$rs1, (not GPR:$rs2)), (ORN GPR:$rs1, GPR:$rs2)>; +def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbbOrZbp] + +let Predicates = [HasStdExtZbb] in { +def : Pat<(xor (shl (xor GPR:$rs1, -1), GPR:$rs2), -1), + (SLO GPR:$rs1, GPR:$rs2)>; +def : Pat<(xor (srl (xor GPR:$rs1, -1), GPR:$rs2), -1), + (SRO GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbb] + +let Predicates = [HasStdExtZbbOrZbp] in { +def : Pat<(rotl GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>; +def : Pat<(fshl GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>; +def : Pat<(rotr GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>; +def : Pat<(fshr GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbbOrZbp] + +let Predicates = [HasStdExtZbs, IsRV32] in +def : Pat<(and (xor (shl 1, (and GPR:$rs2, 31)), -1), GPR:$rs1), + (SBCLR GPR:$rs1, GPR:$rs2)>; +let Predicates = [HasStdExtZbs, IsRV64] in +def : Pat<(and (xor (shl 1, (and GPR:$rs2, 63)), -1), GPR:$rs1), + (SBCLR GPR:$rs1, GPR:$rs2)>; + +let Predicates = [HasStdExtZbs] in +def : Pat<(and (rotl -2, GPR:$rs2), GPR:$rs1), (SBCLR GPR:$rs1, GPR:$rs2)>; + +let Predicates = [HasStdExtZbs, IsRV32] in +def : Pat<(or (shl 1, (and GPR:$rs2, 31)), GPR:$rs1), + (SBSET GPR:$rs1, GPR:$rs2)>; +let Predicates = [HasStdExtZbs, IsRV64] in +def : Pat<(or (shl 1, (and GPR:$rs2, 63)), GPR:$rs1), + (SBSET GPR:$rs1, GPR:$rs2)>; + +let Predicates = [HasStdExtZbs, IsRV32] in +def : Pat<(xor (shl 1, (and GPR:$rs2, 31)), GPR:$rs1), + (SBINV GPR:$rs1, GPR:$rs2)>; +let Predicates = [HasStdExtZbs, IsRV64] in +def : Pat<(xor (shl 1, (and GPR:$rs2, 63)), GPR:$rs1), + (SBINV GPR:$rs1, GPR:$rs2)>; + +let Predicates = [HasStdExtZbs, IsRV32] in +def : Pat<(and (srl GPR:$rs1, (and GPR:$rs2, 31)), 1), + (SBEXT GPR:$rs1, GPR:$rs2)>; + +let Predicates = [HasStdExtZbs, IsRV64] in +def : Pat<(and (srl GPR:$rs1, (and GPR:$rs2, 63)), 1), + (SBEXT GPR:$rs1, GPR:$rs2)>; + +let Predicates = [HasStdExtZbb] in { +def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt), + (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>; +def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt), + (SROI GPR:$rs1, uimmlog2xlen:$shamt)>; +} // Predicates = [HasStdExtZbb] + +// There's no encoding for roli in the current version of the 'B' extension +// (v0.92) as it can be implemented with rori by negating the immediate. +// For this reason we pattern-match only against rori[w]. +let Predicates = [HasStdExtZbbOrZbp] in +def : Pat<(RORIPat GPR:$rs1, uimmlog2xlen:$shamt), + (RORI GPR:$rs1, uimmlog2xlen:$shamt)>; + +// We don't pattern-match sbclri[w], sbseti[w], sbinvi[w] because they are +// pattern-matched by simple andi, ori, and xori. +let Predicates = [HasStdExtZbs] in +def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1)), + (SBEXTI GPR:$rs1, uimmlog2xlen:$shamt)>; + +let Predicates = [HasStdExtZbp, IsRV32] in { +def : Pat<(or (or (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555)), GPR:$rs1), + (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA))), + (GORCI GPR:$rs1, (i32 1))>; +def : Pat<(or (or (and (srl GPR:$rs1, (i32 2)), (i32 0x33333333)), GPR:$rs1), + (and (shl GPR:$rs1, (i32 2)), (i32 0xCCCCCCCC))), + (GORCI GPR:$rs1, (i32 2))>; +def : Pat<(or (or (and (srl GPR:$rs1, (i32 4)), (i32 0x0F0F0F0F)), GPR:$rs1), + (and (shl GPR:$rs1, (i32 4)), (i32 0xF0F0F0F0))), + (GORCI GPR:$rs1, (i32 4))>; +def : Pat<(or (or (and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF)), GPR:$rs1), + (and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00))), + (GORCI GPR:$rs1, (i32 8))>; +def : Pat<(or (or (srl GPR:$rs1, (i32 16)), GPR:$rs1), + (shl GPR:$rs1, (i32 16))), + (GORCI GPR:$rs1, (i32 16))>; +} // Predicates = [HasStdExtZbp, IsRV32] + +let Predicates = [HasStdExtZbp, IsRV64] in { +def : Pat<(or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x5555555555555555)), + GPR:$rs1), + (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAAAAAAAAAA))), + (GORCI GPR:$rs1, (i64 1))>; +def : Pat<(or (or (and (srl GPR:$rs1, (i64 2)), (i64 0x3333333333333333)), + GPR:$rs1), + (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCCCCCCCCCC))), + (GORCI GPR:$rs1, (i64 2))>; +def : Pat<(or (or (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F0F0F0F0F)), + GPR:$rs1), + (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0F0F0F0F0))), + (GORCI GPR:$rs1, (i64 4))>; +def : Pat<(or (or (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF00FF00FF)), + GPR:$rs1), + (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00))), + (GORCI GPR:$rs1, (i64 8))>; +def : Pat<(or (or (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF)), + GPR:$rs1), + (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000))), + (GORCI GPR:$rs1, (i64 16))>; +def : Pat<(or (or (srl GPR:$rs1, (i64 32)), GPR:$rs1), + (shl GPR:$rs1, (i64 32))), + (GORCI GPR:$rs1, (i64 32))>; +} // Predicates = [HasStdExtZbp, IsRV64] + +let Predicates = [HasStdExtZbp, IsRV32] in { +def : Pat<(or (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA)), + (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555))), + (GREVI GPR:$rs1, (i32 1))>; +def : Pat<(or (and (shl GPR:$rs1, (i32 2)), (i32 0xCCCCCCCC)), + (and (srl GPR:$rs1, (i32 2)), (i32 0x33333333))), + (GREVI GPR:$rs1, (i32 2))>; +def : Pat<(or (and (shl GPR:$rs1, (i32 4)), (i32 0xF0F0F0F0)), + (and (srl GPR:$rs1, (i32 4)), (i32 0x0F0F0F0F))), + (GREVI GPR:$rs1, (i32 4))>; +def : Pat<(or (and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00)), + (and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF))), + (GREVI GPR:$rs1, (i32 8))>; +def : Pat<(rotr (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, (i32 8))>; +def : Pat<(or (shl GPR:$rs1, (i32 16)), (srl GPR:$rs1, (i32 16))), + (GREVI GPR:$rs1, (i32 16))>; +def : Pat<(rotl GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>; +def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i32 24))>; +def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i32 31))>; +} // Predicates = [HasStdExtZbp, IsRV32] + +let Predicates = [HasStdExtZbp, IsRV64] in { +def : Pat<(or (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAAAAAAAAAA)), + (and (srl GPR:$rs1, (i64 1)), (i64 0x5555555555555555))), + (GREVI GPR:$rs1, (i64 1))>; +def : Pat<(or (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCCCCCCCCCC)), + (and (srl GPR:$rs1, (i64 2)), (i64 0x3333333333333333))), + (GREVI GPR:$rs1, (i64 2))>; +def : Pat<(or (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0F0F0F0F0)), + (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F0F0F0F0F))), + (GREVI GPR:$rs1, (i64 4))>; +def : Pat<(or (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00)), + (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF00FF00FF))), + (GREVI GPR:$rs1, (i64 8))>; +def : Pat<(or (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000)), + (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF))), + (GREVI GPR:$rs1, (i64 16))>; +def : Pat<(or (shl GPR:$rs1, (i64 32)), (srl GPR:$rs1, (i64 32))), + (GREVI GPR:$rs1, (i64 32))>; +def : Pat<(rotl GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>; +def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>; +def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>; +} // Predicates = [HasStdExtZbp, IsRV64] + +let Predicates = [HasStdExtZbt] in { +def : Pat<(or (and (xor GPR:$rs2, -1), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)), + (CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(riscv_selectcc GPR:$rs2, (XLenVT 0), (XLenVT 17), GPR:$rs3, GPR:$rs1), + (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(fshl GPR:$rs1, GPR:$rs2, GPR:$rs3), + (FSL GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(fshr GPR:$rs1, GPR:$rs2, GPR:$rs3), + (FSR GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(fshr GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt), + (FSRI GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt)>; +} // Predicates = [HasStdExtZbt] + +let Predicates = [HasStdExtZbb] in { +def : Pat<(ctlz GPR:$rs1), (CLZ GPR:$rs1)>; +def : Pat<(cttz GPR:$rs1), (CTZ GPR:$rs1)>; +def : Pat<(ctpop GPR:$rs1), (PCNT GPR:$rs1)>; +} // Predicates = [HasStdExtZbb] + +let Predicates = [HasStdExtZbb, IsRV32] in +def : Pat<(sra (shl GPR:$rs1, (i32 24)), (i32 24)), (SEXTB GPR:$rs1)>; +let Predicates = [HasStdExtZbb, IsRV64] in +def : Pat<(sra (shl GPR:$rs1, (i64 56)), (i64 56)), (SEXTB GPR:$rs1)>; + +let Predicates = [HasStdExtZbb, IsRV32] in +def : Pat<(sra (shl GPR:$rs1, (i32 16)), (i32 16)), (SEXTH GPR:$rs1)>; +let Predicates = [HasStdExtZbb, IsRV64] in +def : Pat<(sra (shl GPR:$rs1, (i64 48)), (i64 48)), (SEXTH GPR:$rs1)>; + +let Predicates = [HasStdExtZbb] in { +def : Pat<(smin GPR:$rs1, GPR:$rs2), (MIN GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 20), GPR:$rs1, GPR:$rs2), + (MIN GPR:$rs1, GPR:$rs2)>; +def : Pat<(smax GPR:$rs1, GPR:$rs2), (MAX GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 20), GPR:$rs1, GPR:$rs2), + (MAX GPR:$rs1, GPR:$rs2)>; +def : Pat<(umin GPR:$rs1, GPR:$rs2), (MINU GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 12), GPR:$rs1, GPR:$rs2), + (MINU GPR:$rs1, GPR:$rs2)>; +def : Pat<(umax GPR:$rs1, GPR:$rs2), (MAXU GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 12), GPR:$rs1, GPR:$rs2), + (MAXU GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbb] + +let Predicates = [HasStdExtZbbOrZbp, IsRV32] in +def : Pat<(or (and GPR:$rs1, 0x0000FFFF), (shl GPR:$rs2, (i32 16))), + (PACK GPR:$rs1, GPR:$rs2)>; +let Predicates = [HasStdExtZbbOrZbp, IsRV64] in +def : Pat<(or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32))), + (PACK GPR:$rs1, GPR:$rs2)>; +let Predicates = [HasStdExtZbbOrZbp, IsRV32] in +def : Pat<(or (and GPR:$rs2, 0xFFFF0000), (srl GPR:$rs1, (i32 16))), + (PACKU GPR:$rs1, GPR:$rs2)>; +let Predicates = [HasStdExtZbbOrZbp, IsRV64] in +def : Pat<(or (and GPR:$rs2, 0xFFFFFFFF00000000), (srl GPR:$rs1, (i64 32))), + (PACKU GPR:$rs1, GPR:$rs2)>; +let Predicates = [HasStdExtZbbOrZbp] in +def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFF00), + (and GPR:$rs1, 0x00FF)), + (PACKH GPR:$rs1, GPR:$rs2)>; + +let Predicates = [HasStdExtZbp, IsRV32] in { +def : Pat<(or (or (and (shl GPR:$rs1, (i32 8)), (i32 0x00FF0000)), + (and GPR:$rs1, (i32 0xFF0000FF))), + (and (srl GPR:$rs1, (i32 8)), (i32 0x0000FF00))), + (SHFLI GPR:$rs1, (i32 8))>; +def : Pat<(or (or (and (shl GPR:$rs1, (i32 4)), (i32 0x0F000F00)), + (and GPR:$rs1, (i32 0xF00FF00F))), + (and (srl GPR:$rs1, (i32 4)), (i32 0x00F000F0))), + (SHFLI GPR:$rs1, (i32 4))>; +def : Pat<(or (or (and (shl GPR:$rs1, (i32 2)), (i32 0x30303030)), + (and GPR:$rs1, (i32 0xC3C3C3C3))), + (and (srl GPR:$rs1, (i32 2)), (i32 0x0C0C0C0C))), + (SHFLI GPR:$rs1, (i32 2))>; +def : Pat<(or (or (and (shl GPR:$rs1, (i32 1)), (i32 0x44444444)), + (and GPR:$rs1, (i32 0x99999999))), + (and (srl GPR:$rs1, (i32 1)), (i32 0x22222222))), + (SHFLI GPR:$rs1, (i32 1))>; +} // Predicates = [HasStdExtZbp, IsRV32] + +let Predicates = [HasStdExtZbp, IsRV64] in { +def : Pat<(or (or (and (shl GPR:$rs1, (i64 16)), (i64 0x0000FFFF00000000)), + (and GPR:$rs1, (i64 0xFFFF00000000FFFF))), + (and (srl GPR:$rs1, (i64 16)), (i64 0x00000000FFFF0000))), + (SHFLI GPR:$rs1, (i64 16))>; +def : Pat<(or (or (and (shl GPR:$rs1, (i64 8)), (i64 0x00FF000000FF0000)), + (and GPR:$rs1, (i64 0xFF0000FFFF0000FF))), + (and (srl GPR:$rs1, (i64 8)), (i64 0x0000FF000000FF00))), + (SHFLI GPR:$rs1, (i64 8))>; +def : Pat<(or (or (and (shl GPR:$rs1, (i64 4)), (i64 0x0F000F000F000F00)), + (and GPR:$rs1, (i64 0xF00FF00FF00FF00F))), + (and (srl GPR:$rs1, (i64 4)), (i64 0x00F000F000F000F0))), + (SHFLI GPR:$rs1, (i64 4))>; +def : Pat<(or (or (and (shl GPR:$rs1, (i64 2)), (i64 0x3030303030303030)), + (and GPR:$rs1, (i64 0xC3C3C3C3C3C3C3C3))), + (and (srl GPR:$rs1, (i64 2)), (i64 0x0C0C0C0C0C0C0C0C))), + (SHFLI GPR:$rs1, (i64 2))>; +def : Pat<(or (or (and (shl GPR:$rs1, (i64 1)), (i64 0x4444444444444444)), + (and GPR:$rs1, (i64 0x9999999999999999))), + (and (srl GPR:$rs1, (i64 1)), (i64 0x2222222222222222))), + (SHFLI GPR:$rs1, (i64 1))>; +} // Predicates = [HasStdExtZbp, IsRV64] + +let Predicates = [HasStdExtZbb, IsRV64] in { +def : Pat<(and (add GPR:$rs, simm12:$simm12), (i64 0xFFFFFFFF)), + (ADDIWU GPR:$rs, simm12:$simm12)>; +def : Pat<(SLLIUWPat GPR:$rs1, uimmlog2xlen:$shamt), + (SLLIUW GPR:$rs1, uimmlog2xlen:$shamt)>; +def : Pat<(and (add GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)), + (ADDWU GPR:$rs1, GPR:$rs2)>; +def : Pat<(and (sub GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)), + (SUBWU GPR:$rs1, GPR:$rs2)>; +def : Pat<(add GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))), + (ADDUW GPR:$rs1, GPR:$rs2)>; +def : Pat<(sub GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))), + (SUBUW GPR:$rs1, GPR:$rs2)>; +def : Pat<(xor (riscv_sllw (xor GPR:$rs1, -1), GPR:$rs2), -1), + (SLOW GPR:$rs1, GPR:$rs2)>; +def : Pat<(xor (riscv_srlw (xor GPR:$rs1, -1), GPR:$rs2), -1), + (SROW GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbb, IsRV64] + +let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { +def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2)), + (riscv_srlw (assertsexti32 GPR:$rs1), + (sub (i64 0), (assertsexti32 GPR:$rs2)))), + (ROLW GPR:$rs1, GPR:$rs2)>; +def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1), + (sub (i64 0), (assertsexti32 GPR:$rs2))), + (riscv_srlw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2))), + (RORW GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbbOrZbp, IsRV64] + +let Predicates = [HasStdExtZbs, IsRV64] in { +def : Pat<(and (xor (riscv_sllw 1, (assertsexti32 GPR:$rs2)), -1), + (assertsexti32 GPR:$rs1)), + (SBCLRW GPR:$rs1, GPR:$rs2)>; +def : Pat<(or (riscv_sllw 1, (assertsexti32 GPR:$rs2)), + (assertsexti32 GPR:$rs1)), + (SBSETW GPR:$rs1, GPR:$rs2)>; +def : Pat<(xor (riscv_sllw 1, (assertsexti32 GPR:$rs2)), + (assertsexti32 GPR:$rs1)), + (SBINVW GPR:$rs1, GPR:$rs2)>; +def : Pat<(and (riscv_srlw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2)), + 1), + (SBEXTW GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbs, IsRV64] + +let Predicates = [HasStdExtZbb, IsRV64] in { +def : Pat<(SLOIWPat GPR:$rs1, uimmlog2xlen:$shamt), + (SLOIW GPR:$rs1, uimmlog2xlen:$shamt)>; +def : Pat<(SROIWPat GPR:$rs1, uimmlog2xlen:$shamt), + (SROIW GPR:$rs1, uimmlog2xlen:$shamt)>; +} // Predicates = [HasStdExtZbb, IsRV64] + +let Predicates = [HasStdExtZbbOrZbp, IsRV64] in +def : Pat<(RORIWPat GPR:$rs1, uimmlog2xlen:$shamt), + (RORIW GPR:$rs1, uimmlog2xlen:$shamt)>; + +let Predicates = [HasStdExtZbp, IsRV64] in { +def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x55555555)), + GPR:$rs1), + (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAA))), + i32), + (GORCIW GPR:$rs1, (i64 1))>; +def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 2)), (i64 0x33333333)), + GPR:$rs1), + (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCC))), + i32), + (GORCIW GPR:$rs1, (i64 2))>; +def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F)), + GPR:$rs1), + (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0))), + i32), + (GORCIW GPR:$rs1, (i64 4))>; +def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF)), + GPR:$rs1), + (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00))), + i32), + (GORCIW GPR:$rs1, (i64 8))>; +def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF)), + GPR:$rs1), + (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000))), + i32), + (GORCIW GPR:$rs1, (i64 16))>; +def : Pat<(sext_inreg (or (or (srl (and GPR:$rs1, (i64 0xFFFF0000)), (i64 16)), + GPR:$rs1), + (shl GPR:$rs1, (i64 16))), i32), + (GORCIW GPR:$rs1, (i64 16))>; + +def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAA)), + (and (srl GPR:$rs1, (i64 1)), (i64 0x55555555))), + i32), + (GREVIW GPR:$rs1, (i64 1))>; +def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCC)), + (and (srl GPR:$rs1, (i64 2)), (i64 0x33333333))), + i32), + (GREVIW GPR:$rs1, (i64 2))>; +def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0)), + (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F))), + i32), + (GREVIW GPR:$rs1, (i64 4))>; +def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00)), + (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF))), + i32), + (GREVIW GPR:$rs1, (i64 8))>; +def : Pat<(sext_inreg (or (shl GPR:$rs1, (i64 16)), + (srl (and GPR:$rs1, 0xFFFF0000), (i64 16))), i32), + (GREVIW GPR:$rs1, (i64 16))>; +def : Pat<(sra (bswap GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 24))>; +def : Pat<(sra (bitreverse GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 31))>; +} // Predicates = [HasStdExtZbp, IsRV64] + +let Predicates = [HasStdExtZbt, IsRV64] in { +def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31), + (i64 0), + (i64 17), + (assertsexti32 GPR:$rs1), + (or (riscv_sllw (assertsexti32 GPR:$rs1), + (and (assertsexti32 GPR:$rs3), 31)), + (riscv_srlw (assertsexti32 GPR:$rs2), + (sub (i64 32), + (assertsexti32 GPR:$rs3))))), + (FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31), + (i64 0), + (i64 17), + (assertsexti32 GPR:$rs2), + (or (riscv_sllw (assertsexti32 GPR:$rs1), + (sub (i64 32), + (assertsexti32 GPR:$rs3))), + (riscv_srlw (assertsexti32 GPR:$rs2), + (and (assertsexti32 GPR:$rs3), 31)))), + (FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(FSRIWPat GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt), + (FSRIW GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt)>; +} // Predicates = [HasStdExtZbt, IsRV64] + +let Predicates = [HasStdExtZbb, IsRV64] in { +def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)), + (CLZW GPR:$rs1)>; +// We don't pattern-match CTZW here as it has the same pattern and result as +// RV64 CTZ +def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (PCNTW GPR:$rs1)>; +} // Predicates = [HasStdExtZbb, IsRV64] + +let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { +def : Pat<(sext_inreg (or (shl (assertsexti32 GPR:$rs2), (i64 16)), + (and (assertsexti32 GPR:$rs1), 0x000000000000FFFF)), + i32), + (PACKW GPR:$rs1, GPR:$rs2)>; +def : Pat<(or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000), + (srl (and (assertsexti32 GPR:$rs1), 0x00000000FFFF0000), + (i64 16))), + (PACKUW GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbbOrZbp, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp index 5bd09a546114f..4d1f47da209d0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp @@ -16,6 +16,7 @@ #include "RISCVTargetMachine.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "riscv-isel" diff --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp index 8ddcf757c97e7..b1dbcfa7f7387 100644 --- a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp +++ b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp @@ -121,9 +121,6 @@ bool llvm::LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO, case MachineOperand::MO_ConstantPoolIndex: MCOp = lowerSymbolOperand(MO, AP.GetCPISymbol(MO.getIndex()), AP); break; - case MachineOperand::MO_MCSymbol: - MCOp = lowerSymbolOperand(MO, MO.getMCSymbol(), AP); - break; } return true; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 63f607a9c352c..75683e2fd8e96 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -173,6 +173,7 @@ void RISCVPassConfig::addPreSched2() {} void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); } void RISCVPassConfig::addPreEmitPass2() { + addPass(createRISCVExpandPseudoPass()); // Schedule the expansion of AMOs at the last possible moment, avoiding the // possibility for other passes to break the requirements for forward // progress in the LR/SC block. @@ -180,6 +181,5 @@ void RISCVPassConfig::addPreEmitPass2() { } void RISCVPassConfig::addPreRegAlloc() { - addPass(createRISCVExpandPseudoPass()); addPass(createRISCVMergeBaseOffsetOptPass()); } diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp index 779e921c1d949..23f8b4f78bbdd 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp @@ -92,7 +92,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, return wasm::R_WASM_TABLE_INDEX_SLEB; return wasm::R_WASM_MEMORY_ADDR_SLEB; case WebAssembly::fixup_sleb128_i64: - assert(SymA.isData()); + if (SymA.isFunction()) + return wasm::R_WASM_TABLE_INDEX_SLEB64; return wasm::R_WASM_MEMORY_ADDR_SLEB64; case WebAssembly::fixup_uleb128_i32: if (SymA.isGlobal()) @@ -119,6 +120,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, } return wasm::R_WASM_MEMORY_ADDR_I32; case FK_Data_8: + if (SymA.isFunction()) + return wasm::R_WASM_TABLE_INDEX_I64; assert(SymA.isData()); return wasm::R_WASM_MEMORY_ADDR_I64; default: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index a9b9eceb41304..c6519fafbc491 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -156,11 +156,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // There is no i8x16.mul instruction setOperationAction(ISD::MUL, MVT::v16i8, Expand); - // There are no vector select instructions - for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, - MVT::v2f64}) - setOperationAction(Op, T, Expand); + // There is no vector conditional select instruction + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, + MVT::v2f64}) + setOperationAction(ISD::SELECT_CC, T, Expand); // Expand integer operations supported for scalars but not SIMD for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV, @@ -442,6 +441,19 @@ static MachineBasicBlock *LowerCallResults(MachineInstr &CallResults, const MCInstrDesc &MCID = TII.get(CallOp); MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL)); + // See if we must truncate the function pointer. + // CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers + // as 64-bit for uniformity with other pointer types. + if (IsIndirect && MF.getSubtarget().hasAddr64()) { + Register Reg32 = + MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); + auto &FnPtr = CallParams.getOperand(0); + BuildMI(*BB, CallResults.getIterator(), DL, + TII.get(WebAssembly::I32_WRAP_I64), Reg32) + .addReg(FnPtr.getReg()); + FnPtr.setReg(Reg32); + } + // Move the function pointer to the end of the arguments for indirect calls if (IsIndirect) { auto FnPtr = CallParams.getOperand(0); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 5ff0d73534a6a..08b964542b5bd 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -328,19 +328,25 @@ defm CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm), } // isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)), - (CONST_I32 tglobaladdr:$addr)>, Requires<[IsNotPIC]>; + (CONST_I32 tglobaladdr:$addr)>, Requires<[IsNotPIC, HasAddr32]>; +def : Pat<(i64 (WebAssemblywrapper tglobaladdr:$addr)), + (CONST_I64 tglobaladdr:$addr)>, Requires<[IsNotPIC, HasAddr64]>; def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)), - (GLOBAL_GET_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>; + (GLOBAL_GET_I32 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr32]>; def : Pat<(i32 (WebAssemblywrapperPIC tglobaladdr:$addr)), - (CONST_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>; + (CONST_I32 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr32]>; +def : Pat<(i64 (WebAssemblywrapperPIC tglobaladdr:$addr)), + (CONST_I64 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr64]>; def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)), - (GLOBAL_GET_I32 texternalsym:$addr)>, Requires<[IsPIC]>; + (GLOBAL_GET_I32 texternalsym:$addr)>, Requires<[IsPIC, HasAddr32]>; def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)), - (CONST_I32 texternalsym:$addr)>, Requires<[IsNotPIC]>; + (CONST_I32 texternalsym:$addr)>, Requires<[IsNotPIC, HasAddr32]>; +def : Pat<(i64 (WebAssemblywrapper texternalsym:$addr)), + (CONST_I64 texternalsym:$addr)>, Requires<[IsNotPIC, HasAddr64]>; def : Pat<(i32 (WebAssemblywrapper mcsym:$sym)), (CONST_I32 mcsym:$sym)>; def : Pat<(i64 (WebAssemblywrapper mcsym:$sym)), (CONST_I64 mcsym:$sym)>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 4f3da2f35c61d..b603701ab930e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -574,6 +574,47 @@ foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in (!cast("BITSELECT_"#vec_t) V128:$v1, V128:$v2, V128:$c)>; +// Also implement vselect in terms of bitselect +foreach types = [[v16i8, v16i8], [v8i16, v8i16], [v4i32, v4i32], [v2i64, v2i64], + [v4f32, v4i32], [v2f64, v2i64]] in + def : Pat<(types[0] (vselect + (types[1] V128:$c), (types[0] V128:$v1), (types[0] V128:$v2) + )), + (!cast("BITSELECT_"#types[0]) + V128:$v1, V128:$v2, V128:$c + )>; + +// MVP select on v128 values +foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { +defm SELECT_#vec_t : I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond), + (outs), (ins), + [(set V128:$dst, + (select I32:$cond, + (vec_t V128:$lhs), (vec_t V128:$rhs) + ) + )], + "v128.select\t$dst, $lhs, $rhs, $cond", + "v128.select", 0x1b>; + +// ISD::SELECT requires its operand to conform to getBooleanContents, but +// WebAssembly's select interprets any non-zero value as true, so we can fold +// a setne with 0 into a select. +def : Pat<(select + (i32 (setne I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs) + ), + (!cast("SELECT_"#vec_t) + V128:$lhs, V128:$rhs, I32:$cond + )>; + +// And again, this time with seteq instead of setne and the arms reversed. +def : Pat<(select + (i32 (seteq I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs) + ), + (!cast("SELECT_"#vec_t) + V128:$rhs, V128:$lhs, I32:$cond + )>; +} // foreach vec_t + //===----------------------------------------------------------------------===// // Integer unary arithmetic //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 0573d4eec0599..a3014b2aba92c 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -332,6 +332,7 @@ class X86AsmParser : public MCTargetAsmParser { IES_PLUS, IES_MINUS, IES_OFFSET, + IES_CAST, IES_NOT, IES_MULTIPLY, IES_DIVIDE, @@ -358,6 +359,7 @@ class X86AsmParser : public MCTargetAsmParser { bool MemExpr; bool OffsetOperator; SMLoc OffsetOperatorLoc; + StringRef CurType; bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) { if (Sym) { @@ -385,6 +387,7 @@ class X86AsmParser : public MCTargetAsmParser { unsigned getScale() { return Scale; } const MCExpr *getSym() { return Sym; } StringRef getSymName() { return SymName; } + StringRef getType() { return CurType; } int64_t getImm() { return Imm + IC.execute(); } bool isValidEndState() { return State == IES_RBRAC || State == IES_INTEGER; @@ -630,6 +633,7 @@ class X86AsmParser : public MCTargetAsmParser { default: State = IES_ERROR; break; + case IES_CAST: case IES_PLUS: case IES_MINUS: case IES_NOT: @@ -742,6 +746,7 @@ class X86AsmParser : public MCTargetAsmParser { IC.pushOperator(IC_PLUS); break; case IES_INIT: + case IES_CAST: assert(!BracCount && "BracCount should be zero on parsing's start"); State = IES_LBRAC; break; @@ -814,6 +819,7 @@ class X86AsmParser : public MCTargetAsmParser { case IES_INTEGER: case IES_OFFSET: case IES_REGISTER: + case IES_RBRAC: case IES_RPAREN: State = IES_RPAREN; IC.pushOperator(IC_RPAREN); @@ -846,6 +852,19 @@ class X86AsmParser : public MCTargetAsmParser { } return false; } + void onCast(StringRef Type) { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_LPAREN: + setType(Type); + State = IES_CAST; + break; + } + } + void setType(StringRef Type) { CurType = Type; } }; bool Error(SMLoc L, const Twine &Msg, SMRange Range = None, @@ -1632,6 +1651,18 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { SMLoc IdentLoc = Tok.getLoc(); StringRef Identifier = Tok.getString(); UpdateLocLex = false; + // (MASM only) PTR operator + if (Parser.isParsingMasm()) { + const AsmToken &NextTok = getLexer().peekTok(); + if (NextTok.is(AsmToken::Identifier) && + NextTok.getIdentifier().equals_lower("ptr")) { + SM.onCast(Identifier); + // Eat type and PTR. + consumeToken(); + End = consumeToken(); + break; + } + } // Register, or (MASM only) . unsigned Reg; if (Tok.is(AsmToken::Identifier)) { @@ -1641,27 +1672,25 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { break; } if (Parser.isParsingMasm()) { - const std::pair RegField = + const std::pair IDField = Tok.getString().split('.'); - const StringRef RegName = RegField.first, Field = RegField.second; - SMLoc RegEndLoc = - SMLoc::getFromPointer(RegName.data() + RegName.size()); + const StringRef ID = IDField.first, Field = IDField.second; + SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size()); if (!Field.empty() && - !MatchRegisterByName(Reg, RegName, IdentLoc, RegEndLoc)) { + !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) { if (SM.onRegister(Reg, ErrMsg)) return Error(IdentLoc, ErrMsg); + StringRef Type; + unsigned Offset = 0; SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data()); - const std::pair BaseMember = Field.split('.'); - const StringRef Base = BaseMember.first, Member = BaseMember.second; - - unsigned Offset; - if (Parser.LookUpFieldOffset(Base, Member, Offset)) + if (Parser.lookUpField(Field, Type, Offset)) return Error(FieldStartLoc, "unknown offset"); else if (SM.onPlus(ErrMsg)) return Error(getTok().getLoc(), ErrMsg); else if (SM.onInteger(Offset, ErrMsg)) return Error(IdentLoc, ErrMsg); + SM.setType(Type); End = consumeToken(); break; @@ -1680,7 +1709,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { const MCExpr *Val; if (isParsingMSInlineAsm() || Parser.isParsingMasm()) { // MS Dot Operator expression - if (Identifier.count('.') && PrevTK == AsmToken::RBrac) { + if (Identifier.count('.') && + (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) { if (ParseIntelDotOperator(SM, End)) return true; break; @@ -1915,9 +1945,11 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start) { } /// Parse the '.' operator. -bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) { +bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, + SMLoc &End) { const AsmToken &Tok = getTok(); - unsigned Offset; + StringRef Type; + unsigned Offset = 0; // Drop the optional '.'. StringRef DotDispStr = Tok.getString(); @@ -1933,8 +1965,9 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) Tok.is(AsmToken::Identifier)) { const std::pair BaseMember = DotDispStr.split('.'); const StringRef Base = BaseMember.first, Member = BaseMember.second; - if (getParser().LookUpFieldOffset(SM.getSymName(), DotDispStr, Offset) && - getParser().LookUpFieldOffset(Base, Member, Offset) && + if (getParser().lookUpField(SM.getType(), DotDispStr, Type, Offset) && + getParser().lookUpField(SM.getSymName(), DotDispStr, Type, Offset) && + getParser().lookUpField(DotDispStr, Type, Offset) && (!SemaCallback || SemaCallback->LookupInlineAsmField(Base, Member, Offset))) return Error(Tok.getLoc(), "Unable to lookup field reference!"); @@ -1947,6 +1980,7 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) while (Tok.getLoc().getPointer() < DotExprEndLoc) Lex(); SM.addImm(Offset); + SM.setType(Type); return false; } diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h index 5cf4516ede974..e32335331879e 100644 --- a/llvm/lib/Target/X86/AsmParser/X86Operand.h +++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h @@ -463,7 +463,14 @@ struct X86Operand final : public MCParsedAsmOperand { bool isGR32orGR64() const { return Kind == Register && (X86MCRegisterClasses[X86::GR32RegClassID].contains(getReg()) || - X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg())); + X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg())); + } + + bool isGR16orGR32orGR64() const { + return Kind == Register && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(getReg()) || + X86MCRegisterClasses[X86::GR32RegClassID].contains(getReg()) || + X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg())); } bool isVectorReg() const { @@ -520,6 +527,15 @@ struct X86Operand final : public MCParsedAsmOperand { Inst.addOperand(MCOperand::createReg(RegNo)); } + void addGR16orGR32orGR64Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + MCRegister RegNo = getReg(); + if (X86MCRegisterClasses[X86::GR32RegClassID].contains(RegNo) || + X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo)) + RegNo = getX86SubSuperRegister(RegNo, 16); + Inst.addOperand(MCOperand::createReg(RegNo)); + } + void addAVX512RCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); addExpr(Inst, getImm()); diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index dc1ff72add491..8ca6dac036754 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1072,10 +1072,20 @@ def : ProcessorModel<"pentium-m", GenericPostRAModel, FeatureCMOV, FeatureInsertVZEROUPPER]>; foreach P = ["pentium4", "pentium4m"] in { - def : ProcessorModel; + + // Since 'pentium4' is the default 32-bit CPU on Linux and Windows, + // give it more modern tunings. + // FIXME: This wouldn't be needed if we supported mtune. + def : ProcessorModel; + FeatureCMOV, FeatureInsertVZEROUPPER, + FeatureSlow3OpsLEA, FeatureSlowDivide64, + FeatureSlowIncDec, FeatureMacroFusion]>; } // Intel Quark. diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp index 9ac401bb02537..4242790389219 100644 --- a/llvm/lib/Target/X86/X86FixupLEAs.cpp +++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp @@ -16,8 +16,11 @@ #include "X86InstrInfo.h" #include "X86Subtarget.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/Debug.h" @@ -111,6 +114,12 @@ class FixupLEAPass : public MachineFunctionPass { MachineFunctionProperties::Property::NoVRegs); } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + private: TargetSchedModel TSM; const X86InstrInfo *TII = nullptr; @@ -205,21 +214,27 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { TSM.init(&ST); TII = ST.getInstrInfo(); TRI = ST.getRegisterInfo(); + auto *PSI = &getAnalysis().getPSI(); + auto *MBFI = (PSI && PSI->hasProfileSummary()) + ? &getAnalysis().getBFI() + : nullptr; LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";); for (MachineBasicBlock &MBB : MF) { // First pass. Try to remove or optimize existing LEAs. + bool OptIncDecPerBB = + OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI); for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { if (!isLEA(I->getOpcode())) continue; - if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP)) + if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP)) continue; if (IsSlowLEA) processInstructionForSlowLEA(I, MBB); else if (IsSlow3OpsLEA) - processInstrForSlow3OpLEA(I, MBB, OptIncDec); + processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB); } // Second pass for creating LEAs. This may reverse some of the diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index c1bd80c41f131..c7ca6fb2a4fcf 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -479,6 +479,29 @@ void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB, .addCFIIndex(CFIIndex); } +/// Emits Dwarf Info specifying offsets of callee saved registers and +/// frame pointer. This is called only when basic block sections are enabled. +void X86FrameLowering::emitCalleeSavedFrameMoves( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { + MachineFunction &MF = *MBB.getParent(); + if (!hasFP(MF)) { + emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true); + return; + } + const MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + const unsigned FramePtr = TRI->getFrameRegister(MF); + const unsigned MachineFramePtr = + STI.isTarget64BitILP32() ? unsigned(getX86SubSuperRegister(FramePtr, 64)) + : FramePtr; + unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true); + // Offset = space for return address + size of the frame pointer itself. + unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4); + BuildCFI(MBB, MBBI, DebugLoc{}, + MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset)); + emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true); +} + void X86FrameLowering::emitCalleeSavedFrameMoves( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const { diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index 9326dc9e959ac..c0b4be95f88d3 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -58,9 +58,14 @@ class X86FrameLowering : public TargetFrameLowering { void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override; + void + emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const override; + void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, bool IsPrologue) const; + const DebugLoc &DL, + bool IsPrologue) const override; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index fb285376c5808..3cd80cb04ab84 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -160,10 +160,6 @@ namespace { /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; - /// If true, selector should try to optimize for code size instead of - /// performance. - bool OptForSize; - /// If true, selector should try to optimize for minimum code size. bool OptForMinSize; @@ -172,7 +168,7 @@ namespace { public: explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), OptForSize(false), + : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), OptForMinSize(false), IndirectTlsSegRefs(false) {} StringRef getPassName() const override { @@ -186,9 +182,8 @@ namespace { "indirect-tls-seg-refs"); // OptFor[Min]Size are used in pattern predicates that isel is matching. - OptForSize = MF.getFunction().hasOptSize(); OptForMinSize = MF.getFunction().hasMinSize(); - assert((!OptForMinSize || OptForSize) && + assert((!OptForMinSize || MF.getFunction().hasOptSize()) && "OptForMinSize implies OptForSize"); SelectionDAGISel::runOnMachineFunction(MF); @@ -4557,7 +4552,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { // the patterns on the add/sub/and/or/xor with immediate paterns in the // tablegen files to check immediate use count without making the patterns // unavailable to the fast-isel table. - if (!OptForSize) + if (!CurDAG->shouldOptForSize()) break; // Only handle i8/i16/i32/i64. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 721b262aa433b..bb32a17bcc122 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35043,7 +35043,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, continue; } if (M == SM_SentinelZero) { - PSHUFBMask.push_back(DAG.getConstant(255, DL, MVT::i8)); + PSHUFBMask.push_back(DAG.getConstant(0x80, DL, MVT::i8)); continue; } M = Ratio * M + i % Ratio; @@ -35074,7 +35074,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, continue; } if (M == SM_SentinelZero) { - VPPERMMask.push_back(DAG.getConstant(128, DL, MVT::i8)); + VPPERMMask.push_back(DAG.getConstant(0x80, DL, MVT::i8)); continue; } M = Ratio * M + i % Ratio; @@ -36464,9 +36464,9 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, (V.getOpcode() == X86ISD::PSHUFLW || V.getOpcode() == X86ISD::PSHUFHW) && V.getOpcode() != N.getOpcode() && - V.hasOneUse()) { + V.hasOneUse() && V.getOperand(0).hasOneUse()) { SDValue D = peekThroughOneUseBitcasts(V.getOperand(0)); - if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) { + if (D.getOpcode() == X86ISD::PSHUFD) { SmallVector VMask = getPSHUFShuffleMask(V); SmallVector DMask = getPSHUFShuffleMask(D); int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; @@ -36903,10 +36903,11 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, // insert into a zero vector. This helps get VZEXT_MOVL closer to // scalar_to_vectors where 256/512 are canonicalized to an insert and a // 128-bit scalar_to_vector. This reduces the number of isel patterns. - if (N->getOpcode() == X86ISD::VZEXT_MOVL && !DCI.isBeforeLegalizeOps()) { + if (N->getOpcode() == X86ISD::VZEXT_MOVL && !DCI.isBeforeLegalizeOps() && + N->getOperand(0).hasOneUse()) { SDValue V = peekThroughOneUseBitcasts(N->getOperand(0)); - if (V.getOpcode() == ISD::INSERT_SUBVECTOR && V.hasOneUse() && + if (V.getOpcode() == ISD::INSERT_SUBVECTOR && V.getOperand(0).isUndef() && isNullConstant(V.getOperand(2))) { SDValue In = V.getOperand(1); MVT SubVT = @@ -44522,6 +44523,8 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG, isHorizontalBinOp(LHS, RHS, DAG, Subtarget, IsFadd)) return DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS); + // NOTE: isHorizontalBinOp may have changed LHS/RHS variables. + return SDValue(); } @@ -46128,14 +46131,23 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, if (!TLI.isTypeLegal(VT)) return SDValue(); - EVT ScalarVT = VT.getScalarType(); - if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA()) - return SDValue(); - SDValue A = N->getOperand(IsStrict ? 1 : 0); SDValue B = N->getOperand(IsStrict ? 2 : 1); SDValue C = N->getOperand(IsStrict ? 3 : 2); + // If the operation allows fast-math and the target does not support FMA, + // split this into mul+add to avoid libcall(s). + SDNodeFlags Flags = N->getFlags(); + if (!IsStrict && Flags.hasAllowReassociation() && + TLI.isOperationExpand(ISD::FMA, VT)) { + SDValue Fmul = DAG.getNode(ISD::FMUL, dl, VT, A, B, Flags); + return DAG.getNode(ISD::FADD, dl, VT, Fmul, C, Flags); + } + + EVT ScalarVT = VT.getScalarType(); + if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA()) + return SDValue(); + auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) { bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); bool LegalOperations = !DCI.isBeforeLegalizeOps(); @@ -47603,6 +47615,30 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1, PMADDBuilder); } +static SDValue combineAddOrSubToHADDorHSUB(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + EVT VT = N->getValueType(0); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + bool IsAdd = N->getOpcode() == ISD::ADD; + assert((IsAdd || N->getOpcode() == ISD::SUB) && "Wrong opcode"); + + if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 || + VT == MVT::v8i32) && + Subtarget.hasSSSE3() && + isHorizontalBinOp(Op0, Op1, DAG, Subtarget, IsAdd)) { + auto HOpBuilder = [IsAdd](SelectionDAG &DAG, const SDLoc &DL, + ArrayRef Ops) { + return DAG.getNode(IsAdd ? X86ISD::HADD : X86ISD::HSUB, + DL, Ops[0].getValueType(), Ops); + }; + return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1}, + HOpBuilder); + } + + return SDValue(); +} + static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -47616,17 +47652,8 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, return MAdd; // Try to synthesize horizontal adds from adds of shuffles. - if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 || - VT == MVT::v8i32) && - Subtarget.hasSSSE3() && - isHorizontalBinOp(Op0, Op1, DAG, Subtarget, true)) { - auto HADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, - ArrayRef Ops) { - return DAG.getNode(X86ISD::HADD, DL, Ops[0].getValueType(), Ops); - }; - return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1}, - HADDBuilder); - } + if (SDValue V = combineAddOrSubToHADDorHSUB(N, DAG, Subtarget)) + return V; // If vectors of i1 are legal, turn (add (zext (vXi1 X)), Y) into // (sub Y, (sext (vXi1 X))). @@ -47799,18 +47826,8 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, } // Try to synthesize horizontal subs from subs of shuffles. - EVT VT = N->getValueType(0); - if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 || - VT == MVT::v8i32) && - Subtarget.hasSSSE3() && - isHorizontalBinOp(Op0, Op1, DAG, Subtarget, false)) { - auto HSUBBuilder = [](SelectionDAG &DAG, const SDLoc &DL, - ArrayRef Ops) { - return DAG.getNode(X86ISD::HSUB, DL, Ops[0].getValueType(), Ops); - }; - return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1}, - HSUBBuilder); - } + if (SDValue V = combineAddOrSubToHADDorHSUB(N, DAG, Subtarget)) + return V; // Try to create PSUBUS if SUB's argument is max/min if (SDValue V = combineSubToSubus(N, DAG, Subtarget)) diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 23841c3d7e506..3ea0ae8a88407 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -640,10 +640,17 @@ class ImmSExtAsmOperandClass : AsmOperandClass { def X86GR32orGR64AsmOperand : AsmOperandClass { let Name = "GR32orGR64"; } - def GR32orGR64 : RegisterOperand { let ParserMatchClass = X86GR32orGR64AsmOperand; } + +def X86GR16orGR32orGR64AsmOperand : AsmOperandClass { + let Name = "GR16orGR32orGR64"; +} +def GR16orGR32orGR64 : RegisterOperand { + let ParserMatchClass = X86GR16orGR32orGR64AsmOperand; +} + def AVX512RCOperand : AsmOperandClass { let Name = "AVX512RC"; } diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index c23bc7ebbf702..13659b5c456e3 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -207,45 +207,41 @@ let mayLoad = 1 in def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16, NotMemoryFoldable; -def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), +def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16orGR32orGR64:$src), "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16, NotMemoryFoldable; -// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo. let mayLoad = 1 in def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "lar{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32, NotMemoryFoldable; -def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), +def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR16orGR32orGR64:$src), "lar{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32, NotMemoryFoldable; -// i16mem operand in LAR64rm and GR32 operand in LAR64rr is not a typo. let mayLoad = 1 in def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable; -def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), +def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR16orGR32orGR64:$src), "lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable; -// i16mem operand in LSL32rm and GR32 operand in LSL32rr is not a typo. let mayLoad = 1 in def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16, NotMemoryFoldable; -def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), +def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16orGR32orGR64:$src), "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16, NotMemoryFoldable; -// i16mem operand in LSL64rm and GR32 operand in LSL64rr is not a typo. let mayLoad = 1 in def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32, NotMemoryFoldable; -def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), +def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR16orGR32orGR64:$src), "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32, NotMemoryFoldable; let mayLoad = 1 in def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable; -def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), +def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR16orGR32orGR64:$src), "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable; def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; diff --git a/llvm/lib/Target/X86/X86PadShortFunction.cpp b/llvm/lib/Target/X86/X86PadShortFunction.cpp index 4c6bd0ccc2cd2..ec81b07f9e5f0 100644 --- a/llvm/lib/Target/X86/X86PadShortFunction.cpp +++ b/llvm/lib/Target/X86/X86PadShortFunction.cpp @@ -58,6 +58,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 7344116e14af6..9a9ea245f7027 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -249,25 +249,10 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { : (StringRef)TargetFS; SmallString<512> Key; - Key.reserve(CPU.size() + FS.size()); - Key += CPU; - Key += FS; - - // FIXME: This is related to the code below to reset the target options, - // we need to know whether or not the soft float flag is set on the - // function before we can generate a subtarget. We also need to use - // it as a key for the subtarget since that can be the only difference - // between two functions. - bool SoftFloat = - F.getFnAttribute("use-soft-float").getValueAsString() == "true"; - // If the soft float attribute is set on the function turn on the soft float - // subtarget feature. - if (SoftFloat) - Key += FS.empty() ? "+soft-float" : ",+soft-float"; - - // Keep track of the key width after all features are added so we can extract - // the feature string out later. - unsigned CPUFSWidth = Key.size(); + // The additions here are ordered so that the definitely short strings are + // added first so we won't exceed the small size. We append the + // much longer FS string at the end so that we only heap allocate at most + // one time. // Extract prefer-vector-width attribute. unsigned PreferVectorWidthOverride = 0; @@ -275,7 +260,7 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { StringRef Val = F.getFnAttribute("prefer-vector-width").getValueAsString(); unsigned Width; if (!Val.getAsInteger(0, Width)) { - Key += ",prefer-vector-width="; + Key += "prefer-vector-width="; Key += Val; PreferVectorWidthOverride = Width; } @@ -288,16 +273,35 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { F.getFnAttribute("min-legal-vector-width").getValueAsString(); unsigned Width; if (!Val.getAsInteger(0, Width)) { - Key += ",min-legal-vector-width="; + Key += "min-legal-vector-width="; Key += Val; RequiredVectorWidth = Width; } } - // Extracted here so that we make sure there is backing for the StringRef. If - // we assigned earlier, its possible the SmallString reallocated leaving a - // dangling StringRef. - FS = Key.slice(CPU.size(), CPUFSWidth); + // Add CPU to the Key. + Key += CPU; + + // Keep track of the start of the feature portion of the string. + unsigned FSStart = Key.size(); + + // FIXME: This is related to the code below to reset the target options, + // we need to know whether or not the soft float flag is set on the + // function before we can generate a subtarget. We also need to use + // it as a key for the subtarget since that can be the only difference + // between two functions. + bool SoftFloat = + F.getFnAttribute("use-soft-float").getValueAsString() == "true"; + // If the soft float attribute is set on the function turn on the soft float + // subtarget feature. + if (SoftFloat) + Key += FS.empty() ? "+soft-float" : "+soft-float,"; + + Key += FS; + + // We may have added +soft-float to the features so move the StringRef to + // point to the full string in the Key. + FS = Key.substr(FSStart); auto &I = SubtargetMap[Key]; if (!I) { diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index b97072893cc2e..d462e1f96ca26 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -105,9 +105,9 @@ class X86TTIImpl : public BasicTTIImplBase { /// \name Cache TTI Implementation /// @{ llvm::Optional getCacheSize( - TargetTransformInfo::CacheLevel Level) const; + TargetTransformInfo::CacheLevel Level) const override; llvm::Optional getCacheAssociativity( - TargetTransformInfo::CacheLevel Level) const; + TargetTransformInfo::CacheLevel Level) const override; /// @} /// \name Vector TTI Implementations diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 6d7f08bfbe07c..f96dac5f3515c 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/NoFolder.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -78,6 +79,12 @@ static cl::opt "wrappers for non-exact definitions."), cl::init(false)); +static cl::list + SeedAllowList("attributor-seed-allow-list", cl::Hidden, + cl::desc("Comma seperated list of attrbute names that are " + "allowed to be seeded."), + cl::ZeroOrMore, cl::CommaSeparated); + /// Logic operators for the change status enum class. /// ///{ @@ -1180,6 +1187,9 @@ ChangeStatus Attributor::cleanupIR() { } } + LLVM_DEBUG(dbgs() << "[Attributor] DeadInsts size: " << DeadInsts.size() + << "\n"); + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) { @@ -1238,6 +1248,9 @@ ChangeStatus Attributor::cleanupIR() { NumFnDeleted += ToBeDeletedFunctions.size(); + LLVM_DEBUG(dbgs() << "[Attributor] Deleted " << NumFnDeleted + << " functions after manifest.\n"); + #ifdef EXPENSIVE_CHECKS for (Function *F : Functions) { if (ToBeDeletedFunctions.count(F)) @@ -1250,6 +1263,7 @@ ChangeStatus Attributor::cleanupIR() { } ChangeStatus Attributor::run() { + SeedingPeriod = false; runTillFixpoint(); ChangeStatus ManifestChange = manifestAttributes(); ChangeStatus CleanupChange = cleanupIR(); @@ -1446,6 +1460,12 @@ bool Attributor::registerFunctionSignatureRewrite( return true; } +bool Attributor::shouldSeedAttribute(AbstractAttribute &AA) { + if (SeedAllowList.size() == 0) + return true; + return std::count(SeedAllowList.begin(), SeedAllowList.end(), AA.getName()); +} + ChangeStatus Attributor::rewriteFunctionSignatures( SmallPtrSetImpl &ModifiedFns) { ChangeStatus Changed = ChangeStatus::UNCHANGED; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index dbc1541b9950e..7e9fd61eeb41e 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -4670,6 +4670,30 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating { AAValueSimplifyCallSiteArgument(const IRPosition &IRP, Attributor &A) : AAValueSimplifyFloating(IRP, A) {} + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + if (SimplifiedAssociatedValue.hasValue() && + !SimplifiedAssociatedValue.getValue()) + return Changed; + + Value &V = getAssociatedValue(); + auto *C = SimplifiedAssociatedValue.hasValue() + ? dyn_cast(SimplifiedAssociatedValue.getValue()) + : UndefValue::get(V.getType()); + if (C) { + Use &U = cast(&getAnchorValue())->getArgOperandUse(getArgNo()); + // We can replace the AssociatedValue with the constant. + if (&V != C && V.getType() == C->getType()) { + if (A.changeUseAfterManifest(U, *C)) + Changed = ChangeStatus::CHANGED; + } + } + + return Changed | AAValueSimplify::manifest(A); + } + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(value_simplify) } diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index 09a18b7b5c237..af5f72f6b6365 100644 --- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -25,12 +25,14 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/IR/NoFolder.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" @@ -287,7 +289,7 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { for (Argument &Arg : Fn.args()) { if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && - !Arg.hasPassPointeeByValueAttr()) { + !Arg.hasPassPointeeByValueCopyAttr()) { if (Arg.isUsedByMetadata()) { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); Changed = true; @@ -967,16 +969,16 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { for (unsigned Ri = 0; Ri != RetCount; ++Ri) if (NewRetIdxs[Ri] != -1) { Value *V; + IRBuilder IRB(InsertPt); if (RetTypes.size() > 1) // We are still returning a struct, so extract the value from our // return value - V = ExtractValueInst::Create(NewCB, NewRetIdxs[Ri], "newret", - InsertPt); + V = IRB.CreateExtractValue(NewCB, NewRetIdxs[Ri], "newret"); else // We are now returning a single element, so just insert that V = NewCB; // Insert the value at the old position - RetVal = InsertValueInst::Create(RetVal, V, Ri, "oldret", InsertPt); + RetVal = IRB.CreateInsertValue(RetVal, V, Ri, "oldret"); } // Now, replace all uses of the old call instruction with the return // struct we built @@ -1019,6 +1021,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { if (F->getReturnType() != NF->getReturnType()) for (BasicBlock &BB : *NF) if (ReturnInst *RI = dyn_cast(BB.getTerminator())) { + IRBuilder IRB(RI); Value *RetVal = nullptr; if (!NFTy->getReturnType()->isVoidTy()) { @@ -1033,14 +1036,14 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { RetVal = UndefValue::get(NRetTy); for (unsigned RetI = 0; RetI != RetCount; ++RetI) if (NewRetIdxs[RetI] != -1) { - ExtractValueInst *EV = - ExtractValueInst::Create(OldRet, RetI, "oldret", RI); + Value *EV = IRB.CreateExtractValue(OldRet, RetI, "oldret"); + if (RetTypes.size() > 1) { // We're still returning a struct, so reinsert the value into // our new return value at the new index - RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[RetI], - "newret", RI); + RetVal = IRB.CreateInsertValue(RetVal, EV, NewRetIdxs[RetI], + "newret"); } else { // We are now only returning a simple value, so just return the // extracted value. diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index fac52cb3d8459..8eef7e3e7e999 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -735,6 +735,9 @@ static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL, /// replace the call with. Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI, const TypeIdLowering &TIL) { + // Delay lowering if the resolution is currently unknown. + if (TIL.TheKind == TypeTestResolution::Unknown) + return nullptr; if (TIL.TheKind == TypeTestResolution::Unsat) return ConstantInt::getFalse(M.getContext()); @@ -1036,14 +1039,18 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) { report_fatal_error("Second argument of llvm.type.test must be metadata"); auto TypeIdStr = dyn_cast(TypeIdMDVal->getMetadata()); + // If this is a local unpromoted type, which doesn't have a metadata string, + // treat as Unknown and delay lowering, so that we can still utilize it for + // later optimizations. if (!TypeIdStr) - report_fatal_error( - "Second argument of llvm.type.test must be a metadata string"); + return; TypeIdLowering TIL = importTypeId(TypeIdStr->getString()); Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL); - CI->replaceAllUsesWith(Lowered); - CI->eraseFromParent(); + if (Lowered) { + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); + } } // ThinLTO backend: the function F has a jump table entry; update this module @@ -1166,8 +1173,10 @@ void LowerTypeTestsModule::lowerTypeTestCalls( for (CallInst *CI : TIUI.CallSites) { ++NumTypeTestCallsLowered; Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL); - CI->replaceAllUsesWith(Lowered); - CI->eraseFromParent(); + if (Lowered) { + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); + } } } } diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 0b2e4f24bd178..f664a24173747 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -39,6 +39,8 @@ static cl::opt DisableOpenMPOptimizations( static cl::opt PrintICVValues("openmp-print-icv-values", cl::init(false), cl::Hidden); +static cl::opt PrintOpenMPKernels("openmp-print-gpu-kernels", + cl::init(false), cl::Hidden); STATISTIC(NumOpenMPRuntimeCallsDeduplicated, "Number of OpenMP runtime calls deduplicated"); @@ -48,11 +50,38 @@ STATISTIC(NumOpenMPRuntimeFunctionsIdentified, "Number of OpenMP runtime functions identified"); STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, "Number of OpenMP runtime function uses identified"); +STATISTIC(NumOpenMPTargetRegionKernels, + "Number of OpenMP target region entry points (=kernels) identified"); +STATISTIC( + NumOpenMPParallelRegionsReplacedInGPUStateMachine, + "Number of OpenMP parallel regions replaced with ID in GPU state machines"); #if !defined(NDEBUG) static constexpr auto TAG = "[" DEBUG_TYPE "]"; #endif +/// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is +/// true, constant expression users are not given to \p CB but their uses are +/// traversed transitively. +template +static void foreachUse(Function &F, CBTy CB, + bool LookThroughConstantExprUses = true) { + SmallVector Worklist(make_pointer_range(F.uses())); + + for (unsigned idx = 0; idx < Worklist.size(); ++idx) { + Use &U = *Worklist[idx]; + + // Allow use in constant bitcasts and simply look through them. + if (LookThroughConstantExprUses && isa(U.getUser())) { + for (Use &CEU : cast(U.getUser())->uses()) + Worklist.push_back(&CEU); + continue; + } + + CB(U); + } +} + /// Helper struct to store tracked ICV values at specif instructions. struct ICVValue { Instruction *Inst; @@ -98,10 +127,12 @@ struct AAICVTracker; /// Attributor runs. struct OMPInformationCache : public InformationCache { OMPInformationCache(Module &M, AnalysisGetter &AG, - BumpPtrAllocator &Allocator, SetVector *CGSCC, - SmallPtrSetImpl &ModuleSlice) - : InformationCache(M, AG, Allocator, CGSCC), ModuleSlice(ModuleSlice), - OMPBuilder(M) { + BumpPtrAllocator &Allocator, SetVector &CGSCC, + SmallPtrSetImpl &Kernels) + : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M), + Kernels(Kernels) { + initializeModuleSlice(CGSCC); + OMPBuilder.initialize(); initializeRuntimeFunctions(); initializeInternalControlVars(); @@ -161,6 +192,9 @@ struct OMPInformationCache : public InformationCache { /// Clear UsesMap for runtime function. void clearUsesMap() { UsesMap.clear(); } + /// Boolean conversion that is true if the runtime function was found. + operator bool() const { return Declaration; } + /// Return the vector of uses in function \p F. UseVector &getOrCreateUseVector(Function *F) { std::shared_ptr &UV = UsesMap[F]; @@ -188,20 +222,20 @@ struct OMPInformationCache : public InformationCache { /// Run the callback \p CB on each use and forget the use if the result is /// true. The callback will be fed the function in which the use was /// encountered as second argument. - void foreachUse(function_ref CB) { - for (auto &It : UsesMap) - foreachUse(CB, It.first, It.second.get()); + void foreachUse(SmallVectorImpl &SCC, + function_ref CB) { + for (Function *F : SCC) + foreachUse(CB, F); } /// Run the callback \p CB on each use within the function \p F and forget /// the use if the result is true. - void foreachUse(function_ref CB, Function *F, - UseVector *Uses = nullptr) { + void foreachUse(function_ref CB, Function *F) { SmallVector ToBeDeleted; ToBeDeleted.clear(); unsigned Idx = 0; - UseVector &UV = Uses ? *Uses : getOrCreateUseVector(F); + UseVector &UV = getOrCreateUseVector(F); for (Use *U : UV) { if (CB(*U, *F)) @@ -210,7 +244,7 @@ struct OMPInformationCache : public InformationCache { } // Remove the to-be-deleted indices in reverse order as prior - // modifcations will not modify the smaller indices. + // modifications will not modify the smaller indices. while (!ToBeDeleted.empty()) { unsigned Idx = ToBeDeleted.pop_back_val(); UV[Idx] = UV.back(); @@ -224,8 +258,45 @@ struct OMPInformationCache : public InformationCache { DenseMap> UsesMap; }; + /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains + /// (a subset of) all functions that we can look at during this SCC traversal. + /// This includes functions (transitively) called from the SCC and the + /// (transitive) callers of SCC functions. We also can look at a function if + /// there is a "reference edge", i.a., if the function somehow uses (!=calls) + /// a function in the SCC or a caller of a function in the SCC. + void initializeModuleSlice(SetVector &SCC) { + ModuleSlice.insert(SCC.begin(), SCC.end()); + + SmallPtrSet Seen; + SmallVector Worklist(SCC.begin(), SCC.end()); + while (!Worklist.empty()) { + Function *F = Worklist.pop_back_val(); + ModuleSlice.insert(F); + + for (Instruction &I : instructions(*F)) + if (auto *CB = dyn_cast(&I)) + if (Function *Callee = CB->getCalledFunction()) + if (Seen.insert(Callee).second) + Worklist.push_back(Callee); + } + + Seen.clear(); + Worklist.append(SCC.begin(), SCC.end()); + while (!Worklist.empty()) { + Function *F = Worklist.pop_back_val(); + ModuleSlice.insert(F); + + // Traverse all transitive uses. + foreachUse(*F, [&](Use &U) { + if (auto *UsrI = dyn_cast(U.getUser())) + if (Seen.insert(UsrI->getFunction()).second) + Worklist.push_back(UsrI->getFunction()); + }); + } + } + /// The slice of the module we are allowed to look at. - SmallPtrSetImpl &ModuleSlice; + SmallPtrSet ModuleSlice; /// An OpenMP-IR-Builder instance OpenMPIRBuilder OMPBuilder; @@ -304,7 +375,7 @@ struct OMPInformationCache : public InformationCache { return true; } - // Helper to collect all uses of the decleration in the UsesMap. + // Helper to collect all uses of the declaration in the UsesMap. unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) { unsigned NumUses = 0; if (!RFI.Declaration) @@ -396,6 +467,9 @@ struct OMPInformationCache : public InformationCache { // TODO: We should attach the attributes defined in OMPKinds.def. } + + /// Collection of known kernels (\see Kernel) in the module. + SmallPtrSetImpl &Kernels; }; struct OpenMPOpt { @@ -411,32 +485,21 @@ struct OpenMPOpt { /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. bool run() { + if (SCC.empty()) + return false; + bool Changed = false; LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() << " functions in a slice with " << OMPInfoCache.ModuleSlice.size() << " functions\n"); - /// Print initial ICV values for testing. - /// FIXME: This should be done from the Attributor once it is added. - if (PrintICVValues) { - InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel}; - - for (Function *F : OMPInfoCache.ModuleSlice) { - for (auto ICV : ICVs) { - auto ICVInfo = OMPInfoCache.ICVs[ICV]; - auto Remark = [&](OptimizationRemark OR) { - return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) - << " Value: " - << (ICVInfo.InitValue - ? ICVInfo.InitValue->getValue().toString(10, true) - : "IMPLEMENTATION_DEFINED"); - }; + if (PrintICVValues) + printICVs(); + if (PrintOpenMPKernels) + printKernels(); - emitRemarkOnFunction(F, "OpenMPICVTracker", Remark); - } - } - } + Changed |= rewriteDeviceCodeStateMachine(); Changed |= runAttributor(); @@ -449,6 +512,42 @@ struct OpenMPOpt { return Changed; } + /// Print initial ICV values for testing. + /// FIXME: This should be done from the Attributor once it is added. + void printICVs() const { + InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel}; + + for (Function *F : OMPInfoCache.ModuleSlice) { + for (auto ICV : ICVs) { + auto ICVInfo = OMPInfoCache.ICVs[ICV]; + auto Remark = [&](OptimizationRemark OR) { + return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) + << " Value: " + << (ICVInfo.InitValue + ? ICVInfo.InitValue->getValue().toString(10, true) + : "IMPLEMENTATION_DEFINED"); + }; + + emitRemarkOnFunction(F, "OpenMPICVTracker", Remark); + } + } + } + + /// Print OpenMP GPU kernels for testing. + void printKernels() const { + for (Function *F : SCC) { + if (!OMPInfoCache.Kernels.count(F)) + continue; + + auto Remark = [&](OptimizationRemark OR) { + return OR << "OpenMP GPU kernel " + << ore::NV("OpenMPGPUKernel", F->getName()) << "\n"; + }; + + emitRemarkOnFunction(F, "OpenMPGPU", Remark); + } + } + /// Return the call if \p U is a callee use in a regular call. If \p RFI is /// given it has to be the callee or a nullptr is returned. static CallInst *getCallIfRegularCall( @@ -514,12 +613,12 @@ struct OpenMPOpt { return true; }; - RFI.foreachUse(DeleteCallCB); + RFI.foreachUse(SCC, DeleteCallCB); return Changed; } - /// Try to eliminiate runtime calls by reusing existing ones. + /// Try to eliminate runtime calls by reusing existing ones. bool deduplicateRuntimeCalls() { bool Changed = false; @@ -599,7 +698,7 @@ struct OpenMPOpt { /* GlobalOnly */ true, SingleChoice); return false; }; - RFI.foreachUse(CombineIdentStruct); + RFI.foreachUse(SCC, CombineIdentStruct); if (!Ident || !SingleChoice) { // The IRBuilder uses the insertion block to get to the module, this is @@ -615,7 +714,7 @@ struct OpenMPOpt { return Ident; } - /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or + /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or /// \p ReplVal if given. bool deduplicateRuntimeCalls(Function &F, OMPInformationCache::RuntimeFunctionInfo &RFI, @@ -699,7 +798,7 @@ struct OpenMPOpt { Changed = true; return true; }; - RFI.foreachUse(ReplaceAndDeleteCB); + RFI.foreachUse(SCC, ReplaceAndDeleteCB); return Changed; } @@ -742,7 +841,7 @@ struct OpenMPOpt { OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; - GlobThreadNumRFI.foreachUse([&](Use &U, Function &F) { + GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) { if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) AddUserArgs(*CI); return false; @@ -755,6 +854,31 @@ struct OpenMPOpt { AddUserArgs(*GTIdArgs[u]); } + /// Kernel (=GPU) optimizations and utility functions + /// + ///{{ + + /// Check if \p F is a kernel, hence entry point for target offloading. + bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); } + + /// Cache to remember the unique kernel for a function. + DenseMap> UniqueKernelMap; + + /// Find the unique kernel that will execute \p F, if any. + Kernel getUniqueKernelFor(Function &F); + + /// Find the unique kernel that will execute \p I, if any. + Kernel getUniqueKernelFor(Instruction &I) { + return getUniqueKernelFor(*I.getFunction()); + } + + /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in + /// the cases we can avoid taking the address of a function. + bool rewriteDeviceCodeStateMachine(); + + /// + ///}} + /// Emit a remark generically /// /// This template function can be used to generically emit a remark. The @@ -769,7 +893,7 @@ struct OpenMPOpt { template > void emitRemark(Instruction *Inst, StringRef RemarkName, - RemarkCallBack &&RemarkCB) { + RemarkCallBack &&RemarkCB) const { Function *F = Inst->getParent()->getParent(); auto &ORE = OREGetter(F); @@ -779,9 +903,10 @@ struct OpenMPOpt { /// Emit a remark on a function. Since only OptimizationRemark is supporting /// this, it can't be made generic. - void emitRemarkOnFunction( - Function *F, StringRef RemarkName, - function_ref &&RemarkCB) { + void + emitRemarkOnFunction(Function *F, StringRef RemarkName, + function_ref + &&RemarkCB) const { auto &ORE = OREGetter(F); ORE.emit([&]() { @@ -789,7 +914,7 @@ struct OpenMPOpt { }); } - /// The underyling module. + /// The underlying module. Module &M; /// The SCC we are operating on. @@ -835,6 +960,198 @@ struct OpenMPOpt { } }; +Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { + if (!OMPInfoCache.ModuleSlice.count(&F)) + return nullptr; + + // Use a scope to keep the lifetime of the CachedKernel short. + { + Optional &CachedKernel = UniqueKernelMap[&F]; + if (CachedKernel) + return *CachedKernel; + + // TODO: We should use an AA to create an (optimistic and callback + // call-aware) call graph. For now we stick to simple patterns that + // are less powerful, basically the worst fixpoint. + if (isKernel(F)) { + CachedKernel = Kernel(&F); + return *CachedKernel; + } + + CachedKernel = nullptr; + if (!F.hasLocalLinkage()) + return nullptr; + } + + auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel { + if (auto *Cmp = dyn_cast(U.getUser())) { + // Allow use in equality comparisons. + if (Cmp->isEquality()) + return getUniqueKernelFor(*Cmp); + return nullptr; + } + if (auto *CB = dyn_cast(U.getUser())) { + // Allow direct calls. + if (CB->isCallee(&U)) + return getUniqueKernelFor(*CB); + // Allow the use in __kmpc_kernel_prepare_parallel calls. + if (Function *Callee = CB->getCalledFunction()) + if (Callee->getName() == "__kmpc_kernel_prepare_parallel") + return getUniqueKernelFor(*CB); + return nullptr; + } + // Disallow every other use. + return nullptr; + }; + + // TODO: In the future we want to track more than just a unique kernel. + SmallPtrSet PotentialKernels; + foreachUse(F, [&](const Use &U) { + PotentialKernels.insert(GetUniqueKernelForUse(U)); + }); + + Kernel K = nullptr; + if (PotentialKernels.size() == 1) + K = *PotentialKernels.begin(); + + // Cache the result. + UniqueKernelMap[&F] = K; + + return K; +} + +bool OpenMPOpt::rewriteDeviceCodeStateMachine() { + OMPInformationCache::RuntimeFunctionInfo &KernelPrepareParallelRFI = + OMPInfoCache.RFIs[OMPRTL___kmpc_kernel_prepare_parallel]; + + bool Changed = false; + if (!KernelPrepareParallelRFI) + return Changed; + + for (Function *F : SCC) { + + // Check if the function is uses in a __kmpc_kernel_prepare_parallel call at + // all. + bool UnknownUse = false; + bool KernelPrepareUse = false; + unsigned NumDirectCalls = 0; + + SmallVector ToBeReplacedStateMachineUses; + foreachUse(*F, [&](Use &U) { + if (auto *CB = dyn_cast(U.getUser())) + if (CB->isCallee(&U)) { + ++NumDirectCalls; + return; + } + + if (isa(U.getUser())) { + ToBeReplacedStateMachineUses.push_back(&U); + return; + } + if (!KernelPrepareUse && OpenMPOpt::getCallIfRegularCall( + *U.getUser(), &KernelPrepareParallelRFI)) { + KernelPrepareUse = true; + ToBeReplacedStateMachineUses.push_back(&U); + return; + } + UnknownUse = true; + }); + + // Do not emit a remark if we haven't seen a __kmpc_kernel_prepare_parallel + // use. + if (!KernelPrepareUse) + continue; + + { + auto Remark = [&](OptimizationRemark OR) { + return OR << "Found a parallel region that is called in a target " + "region but not part of a combined target construct nor " + "nesed inside a target construct without intermediate " + "code. This can lead to excessive register usage for " + "unrelated target regions in the same translation unit " + "due to spurious call edges assumed by ptxas."; + }; + emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark); + } + + // If this ever hits, we should investigate. + // TODO: Checking the number of uses is not a necessary restriction and + // should be lifted. + if (UnknownUse || NumDirectCalls != 1 || + ToBeReplacedStateMachineUses.size() != 2) { + { + auto Remark = [&](OptimizationRemark OR) { + return OR << "Parallel region is used in " + << (UnknownUse ? "unknown" : "unexpected") + << " ways; will not attempt to rewrite the state machine."; + }; + emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark); + } + continue; + } + + // Even if we have __kmpc_kernel_prepare_parallel calls, we (for now) give + // up if the function is not called from a unique kernel. + Kernel K = getUniqueKernelFor(*F); + if (!K) { + { + auto Remark = [&](OptimizationRemark OR) { + return OR << "Parallel region is not known to be called from a " + "unique single target region, maybe the surrounding " + "function has external linkage?; will not attempt to " + "rewrite the state machine use."; + }; + emitRemarkOnFunction(F, "OpenMPParallelRegionInMultipleKernesl", + Remark); + } + continue; + } + + // We now know F is a parallel body function called only from the kernel K. + // We also identified the state machine uses in which we replace the + // function pointer by a new global symbol for identification purposes. This + // ensures only direct calls to the function are left. + + { + auto RemarkParalleRegion = [&](OptimizationRemark OR) { + return OR << "Specialize parallel region that is only reached from a " + "single target region to avoid spurious call edges and " + "excessive register usage in other target regions. " + "(parallel region ID: " + << ore::NV("OpenMPParallelRegion", F->getName()) + << ", kernel ID: " + << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; + }; + emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", + RemarkParalleRegion); + auto RemarkKernel = [&](OptimizationRemark OR) { + return OR << "Target region containing the parallel region that is " + "specialized. (parallel region ID: " + << ore::NV("OpenMPParallelRegion", F->getName()) + << ", kernel ID: " + << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; + }; + emitRemarkOnFunction(K, "OpenMPParallelRegionInNonSPMD", RemarkKernel); + } + + Module &M = *F->getParent(); + Type *Int8Ty = Type::getInt8Ty(M.getContext()); + + auto *ID = new GlobalVariable( + M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage, + UndefValue::get(Int8Ty), F->getName() + ".ID"); + + for (Use *U : ToBeReplacedStateMachineUses) + U->set(ConstantExpr::getBitCast(ID, U->get()->getType())); + + ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; + + Changed = true; + } + + return Changed; +} + /// Abstract Attribute for tracking ICV values. struct AAICVTracker : public StateWrapper { using Base = StateWrapper; @@ -856,6 +1173,14 @@ struct AAICVTracker : public StateWrapper { /// See AbstractAttribute::getName() const std::string getName() const override { return "AAICVTracker"; } + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AAICVTracker + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + static const char ID; }; @@ -903,7 +1228,7 @@ struct AAICVTrackerFunction : public AAICVTracker { return true; }; - GetterRFI.foreachUse(ReplaceAndDeleteCB); + GetterRFI.foreachUse(ReplaceAndDeleteCB, getAnchorScope()); return Changed; } @@ -1013,12 +1338,9 @@ PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C, if (DisableOpenMPOptimizations) return PreservedAnalyses::all(); - SmallPtrSet ModuleSlice; SmallVector SCC; - for (LazyCallGraph::Node &N : C) { + for (LazyCallGraph::Node &N : C) SCC.push_back(&N.getFunction()); - ModuleSlice.insert(SCC.back()); - } if (SCC.empty()) return PreservedAnalyses::all(); @@ -1038,14 +1360,16 @@ PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C, SetVector Functions(SCC.begin(), SCC.end()); BumpPtrAllocator Allocator; OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, - /*CGSCC*/ &Functions, ModuleSlice); + /*CGSCC*/ Functions, OMPInModule.getKernels()); Attributor A(Functions, InfoCache, CGUpdater); // TODO: Compute the module slice we are allowed to look at. OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(); - (void)Changed; + if (Changed) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); } @@ -1076,14 +1400,11 @@ struct OpenMPOptLegacyPass : public CallGraphSCCPass { if (DisableOpenMPOptimizations || skipSCC(CGSCC)) return false; - SmallPtrSet ModuleSlice; SmallVector SCC; for (CallGraphNode *CGN : CGSCC) if (Function *Fn = CGN->getFunction()) - if (!Fn->isDeclaration()) { + if (!Fn->isDeclaration()) SCC.push_back(Fn); - ModuleSlice.insert(Fn); - } if (SCC.empty()) return false; @@ -1103,9 +1424,9 @@ struct OpenMPOptLegacyPass : public CallGraphSCCPass { AnalysisGetter AG; SetVector Functions(SCC.begin(), SCC.end()); BumpPtrAllocator Allocator; - OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, - Allocator, - /*CGSCC*/ &Functions, ModuleSlice); + OMPInformationCache InfoCache( + *(Functions.back()->getParent()), AG, Allocator, + /*CGSCC*/ Functions, OMPInModule.getKernels()); Attributor A(Functions, InfoCache, CGUpdater); @@ -1119,14 +1440,53 @@ struct OpenMPOptLegacyPass : public CallGraphSCCPass { } // end anonymous namespace +void OpenMPInModule::identifyKernels(Module &M) { + + NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); + if (!MD) + return; + + for (auto *Op : MD->operands()) { + if (Op->getNumOperands() < 2) + continue; + MDString *KindID = dyn_cast(Op->getOperand(1)); + if (!KindID || KindID->getString() != "kernel") + continue; + + Function *KernelFn = + mdconst::dyn_extract_or_null(Op->getOperand(0)); + if (!KernelFn) + continue; + + ++NumOpenMPTargetRegionKernels; + + Kernels.insert(KernelFn); + } +} + bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) { if (OMPInModule.isKnown()) return OMPInModule; + // MSVC doesn't like long if-else chains for some reason and instead just + // issues an error. Work around it.. + do { #define OMP_RTL(_Enum, _Name, ...) \ - if (M.getFunction(_Name)) \ - return OMPInModule = true; + if (M.getFunction(_Name)) { \ + OMPInModule = true; \ + break; \ + } #include "llvm/Frontend/OpenMP/OMPKinds.def" + } while (false); + + // Identify kernels once. TODO: We should split the OMPInformationCache into a + // module and an SCC part. The kernel information, among other things, could + // go into the module part. + if (OMPInModule.isKnown() && OMPInModule) { + OMPInModule.identifyKernels(M); + return true; + } + return OMPInModule = false; } diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index b65eb469a4923..a109d69d0f5e9 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -294,6 +294,13 @@ void PassManagerBuilder::populateFunctionPassManager( if (LibraryInfo) FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + // The backends do not handle matrix intrinsics currently. + // Make sure they are also lowered in O0. + // FIXME: A lightweight version of the pass should run in the backend + // pipeline on demand. + if (EnableMatrix && OptLevel == 0) + FPM.add(createLowerMatrixIntrinsicsPass()); + if (OptLevel == 0) return; addInitialAliasAnalysisPasses(FPM); @@ -515,6 +522,7 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createBarrierNoopPass()); if (PerformThinLTO) { + MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); // Drop available_externally and unreferenced globals. This is necessary // with ThinLTO in order to avoid leaving undefined references to dead // globals in the object file. @@ -548,9 +556,11 @@ void PassManagerBuilder::populateModulePassManager( // inter-module indirect calls. For that we perform indirect call promotion // earlier in the pass pipeline, here before globalopt. Otherwise imported // available_externally functions look unreferenced and are removed. - if (PerformThinLTO) + if (PerformThinLTO) { MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true, !PGOSampleUse.empty())); + MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); + } // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops // as it will change the CFG too much to make the 2nd profile annotation @@ -774,7 +784,11 @@ void PassManagerBuilder::populateModulePassManager( // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. - MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + MPM.add(createCFGSimplificationPass(SimplifyCFGOptions() + .forwardSwitchCondToPhi(true) + .convertSwitchToLookupTable(true) + .needCanonicalLoops(false) + .sinkCommonInsts(true))); if (SLPVectorize) { MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. @@ -1079,8 +1093,8 @@ void PassManagerBuilder::populateThinLTOPassManager( PM.add(createVerifierPass()); if (ImportSummary) { - // These passes import type identifier resolutions for whole-program - // devirtualization and CFI. They must run early because other passes may + // This pass imports type identifier resolutions for whole-program + // devirtualization and CFI. It must run early because other passes may // disturb the specific instruction patterns that these passes look for, // creating dependencies on resolutions that may not appear in the summary. // @@ -1128,6 +1142,9 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at // link time if CFI is enabled. The pass does nothing if CFI is disabled. PM.add(createLowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP (which is performed earlier than this in the regular LTO pipeline). + PM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); if (OptLevel != 0) addLateLTOOptimizationPasses(PM); diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 7bea1503ea630..5a25f9857665c 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -540,7 +540,9 @@ struct DevirtModule { bool areRemarksEnabled(); - void scanTypeTestUsers(Function *TypeTestFunc); + void + scanTypeTestUsers(Function *TypeTestFunc, + DenseMap> &TypeIdMap); void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc); void buildTypeIdentifierMap( @@ -1705,7 +1707,9 @@ bool DevirtModule::areRemarksEnabled() { return false; } -void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc) { +void DevirtModule::scanTypeTestUsers( + Function *TypeTestFunc, + DenseMap> &TypeIdMap) { // Find all virtual calls via a virtual table pointer %p under an assumption // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p // points to a member of the type identifier %md. Group calls by (type ID, @@ -1724,22 +1728,59 @@ void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc) { auto &DT = LookupDomTree(*CI->getFunction()); findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); + Metadata *TypeId = + cast(CI->getArgOperand(1))->getMetadata(); // If we found any, add them to CallSlots. if (!Assumes.empty()) { - Metadata *TypeId = - cast(CI->getArgOperand(1))->getMetadata(); Value *Ptr = CI->getArgOperand(0)->stripPointerCasts(); for (DevirtCallSite Call : DevirtCalls) CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr); } - // We no longer need the assumes or the type test. - for (auto Assume : Assumes) - Assume->eraseFromParent(); - // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we - // may use the vtable argument later. - if (CI->use_empty()) - CI->eraseFromParent(); + auto RemoveTypeTestAssumes = [&]() { + // We no longer need the assumes or the type test. + for (auto Assume : Assumes) + Assume->eraseFromParent(); + // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we + // may use the vtable argument later. + if (CI->use_empty()) + CI->eraseFromParent(); + }; + + // At this point we could remove all type test assume sequences, as they + // were originally inserted for WPD. However, we can keep these in the + // code stream for later analysis (e.g. to help drive more efficient ICP + // sequences). They will eventually be removed by a second LowerTypeTests + // invocation that cleans them up. In order to do this correctly, the first + // LowerTypeTests invocation needs to know that they have "Unknown" type + // test resolution, so that they aren't treated as Unsat and lowered to + // False, which will break any uses on assumes. Below we remove any type + // test assumes that will not be treated as Unknown by LTT. + + // The type test assumes will be treated by LTT as Unsat if the type id is + // not used on a global (in which case it has no entry in the TypeIdMap). + if (!TypeIdMap.count(TypeId)) + RemoveTypeTestAssumes(); + + // For ThinLTO importing, we need to remove the type test assumes if this is + // an MDString type id without a corresponding TypeIdSummary. Any + // non-MDString type ids are ignored and treated as Unknown by LTT, so their + // type test assumes can be kept. If the MDString type id is missing a + // TypeIdSummary (e.g. because there was no use on a vcall, preventing the + // exporting phase of WPD from analyzing it), then it would be treated as + // Unsat by LTT and we need to remove its type test assumes here. If not + // used on a vcall we don't need them for later optimization use in any + // case. + else if (ImportSummary && isa(TypeId)) { + const TypeIdSummary *TidSummary = + ImportSummary->getTypeIdSummary(cast(TypeId)->getString()); + if (!TidSummary) + RemoveTypeTestAssumes(); + else + // If one was created it should not be Unsat, because if we reached here + // the type id was used on a global. + assert(TidSummary->TTRes.TheKind != TypeTestResolution::Unsat); + } } } @@ -1931,8 +1972,13 @@ bool DevirtModule::run() { (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty())) return false; + // Rebuild type metadata into a map for easy lookup. + std::vector Bits; + DenseMap> TypeIdMap; + buildTypeIdentifierMap(Bits, TypeIdMap); + if (TypeTestFunc && AssumeFunc) - scanTypeTestUsers(TypeTestFunc); + scanTypeTestUsers(TypeTestFunc, TypeIdMap); if (TypeCheckedLoadFunc) scanTypeCheckedLoadUsers(TypeCheckedLoadFunc); @@ -1954,10 +2000,6 @@ bool DevirtModule::run() { return true; } - // Rebuild type metadata into a map for easy lookup. - std::vector Bits; - DenseMap> TypeIdMap; - buildTypeIdentifierMap(Bits, TypeIdMap); if (TypeIdMap.empty()) return true; @@ -2014,14 +2056,22 @@ bool DevirtModule::run() { // function implementation at offset S.first.ByteOffset, and add to // TargetsForSlot. std::vector TargetsForSlot; - if (tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID], + WholeProgramDevirtResolution *Res = nullptr; + const std::set &TypeMemberInfos = TypeIdMap[S.first.TypeID]; + if (ExportSummary && isa(S.first.TypeID) && + TypeMemberInfos.size()) + // For any type id used on a global's type metadata, create the type id + // summary resolution regardless of whether we can devirtualize, so that + // lower type tests knows the type id is not Unsat. If it was not used on + // a global's type metadata, the TypeIdMap entry set will be empty, and + // we don't want to create an entry (with the default Unknown type + // resolution), which can prevent detection of the Unsat. + Res = &ExportSummary + ->getOrInsertTypeIdSummary( + cast(S.first.TypeID)->getString()) + .WPDRes[S.first.ByteOffset]; + if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos, S.first.ByteOffset)) { - WholeProgramDevirtResolution *Res = nullptr; - if (ExportSummary && isa(S.first.TypeID)) - Res = &ExportSummary - ->getOrInsertTypeIdSummary( - cast(S.first.TypeID)->getString()) - .WPDRes[S.first.ByteOffset]; if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) { DidVirtualConstProp |= @@ -2135,11 +2185,14 @@ void DevirtIndex::run() { std::vector TargetsForSlot; auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID); assert(TidSummary); + // Create the type id summary resolution regardlness of whether we can + // devirtualize, so that lower type tests knows the type id is used on + // a global and not Unsat. + WholeProgramDevirtResolution *Res = + &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID) + .WPDRes[S.first.ByteOffset]; if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary, S.first.ByteOffset)) { - WholeProgramDevirtResolution *Res = - &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID) - .WPDRes[S.first.ByteOffset]; if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res, DevirtTargets)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index d3c718a919c0a..1304d46fdef4f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1148,11 +1148,12 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, assert((IsAnd || Logic.getOpcode() == Instruction::Or) && "Wrong logic op"); // Match an equality compare with a non-poison constant as Cmp0. + // Also, give up if the compare can be constant-folded to avoid looping. ICmpInst::Predicate Pred0; Value *X; Constant *C; if (!match(Cmp0, m_ICmp(Pred0, m_Value(X), m_Constant(C))) || - !isGuaranteedNotToBeUndefOrPoison(C)) + !isGuaranteedNotToBeUndefOrPoison(C) || isa(X)) return nullptr; if ((IsAnd && Pred0 != ICmpInst::ICMP_EQ) || (!IsAnd && Pred0 != ICmpInst::ICMP_NE)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index dd2f59be08e92..ca51f37af4d93 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -440,6 +440,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner Instruction *visitLoadInst(LoadInst &LI); Instruction *visitStoreInst(StoreInst &SI); Instruction *visitAtomicRMWInst(AtomicRMWInst &SI); + Instruction *visitUnconditionalBranchInst(BranchInst &BI); Instruction *visitBranchInst(BranchInst &BI); Instruction *visitFenceInst(FenceInst &FI); Instruction *visitSwitchInst(SwitchInst &SI); @@ -652,7 +653,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner "New instruction already inserted into a basic block!"); BasicBlock *BB = Old.getParent(); BB->getInstList().insert(Old.getIterator(), New); // Insert inst - Worklist.push(New); + Worklist.add(New); return New; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 7203850ad24d6..dad2f23120bdb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1425,34 +1425,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (isa(Val)) return eraseInstFromFunction(SI); - auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) { - return isa(BBI) || - (isa(BBI) && BBI->getType()->isPointerTy()); - }; - - // If this store is the second-to-last instruction in the basic block - // (excluding debug info and bitcasts of pointers) and if the block ends with - // an unconditional branch, try to move the store to the successor block. - BBI = SI.getIterator(); - do { - ++BBI; - } while (IsNoopInstrForStoreMerging(BBI)); - - if (BranchInst *BI = dyn_cast(BBI)) - if (BI->isUnconditional()) - if (mergeStoreIntoSuccessor(SI)) { - // Okay, we've managed to do that. Now, let's see if now-second-to-last - // instruction is also a store that we can also sink. - BasicBlock::iterator FirstInstr = BBI->getParent()->begin(); - do { - if (BBI != FirstInstr) - --BBI; - } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI)); - if (StoreInst *PrevStore = dyn_cast(BBI)) - Worklist.add(PrevStore); - return nullptr; - } - return nullptr; } @@ -1462,8 +1434,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { /// *P = v1; if () { *P = v2; } /// into a phi node with a store in the successor. bool InstCombiner::mergeStoreIntoSuccessor(StoreInst &SI) { - assert(SI.isUnordered() && - "This code has not been audited for volatile or ordered store case."); + if (!SI.isUnordered()) + return false; // This code has not been audited for volatile/ordered case. // Check if the successor block has exactly 2 incoming edges. BasicBlock *StoreBB = SI.getParent(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 2965103d40295..f039989c004ce 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -376,6 +376,16 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Value *And = Builder.CreateAnd(X, Y, "mulbool"); return CastInst::Create(Instruction::ZExt, And, I.getType()); } + // (sext bool X) * (zext bool Y) --> sext (and X, Y) + // (zext bool X) * (sext bool Y) --> sext (and X, Y) + // Note: -1 * 1 == 1 * -1 == -1 + if (((match(Op0, m_SExt(m_Value(X))) && match(Op1, m_ZExt(m_Value(Y)))) || + (match(Op0, m_ZExt(m_Value(X))) && match(Op1, m_SExt(m_Value(Y))))) && + X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() && + (Op0->hasOneUse() || Op1->hasOneUse())) { + Value *And = Builder.CreateAnd(X, Y, "mulbool"); + return CastInst::Create(Instruction::SExt, And, I.getType()); + } // (bool X) * Y --> X ? Y : 0 // Y * (bool X) --> X ? Y : 0 @@ -1172,6 +1182,13 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { return BO; } + if (match(Op1, m_NegatedPower2())) { + // X sdiv (-(1 << C)) -> -(X sdiv (1 << C)) -> + // -> -(X udiv (1 << C)) -> -(X u>> C) + return BinaryOperator::CreateNeg(Builder.Insert(foldUDivPow2Cst( + Op0, ConstantExpr::getNeg(cast(Op1)), I, *this))); + } + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) // Safe because the only negative value (1 << Y) can take on is diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index 2b2f2e1b9470f..dfaad1b5f8c3a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -1129,6 +1129,78 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { return replaceInstUsesWith(FirstPhi, Undef); } +static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN, + const DominatorTree &DT) { + // Simplify the following patterns: + // if (cond) + // / \ + // ... ... + // \ / + // phi [true] [false] + if (!PN.getType()->isIntegerTy(1)) + return nullptr; + + if (PN.getNumOperands() != 2) + return nullptr; + + // Make sure all inputs are constants. + if (!all_of(PN.operands(), [](Value *V) { return isa(V); })) + return nullptr; + + BasicBlock *BB = PN.getParent(); + // Do not bother with unreachable instructions. + if (!DT.isReachableFromEntry(BB)) + return nullptr; + + // Same inputs. + if (PN.getOperand(0) == PN.getOperand(1)) + return PN.getOperand(0); + + BasicBlock *TruePred = nullptr, *FalsePred = nullptr; + for (auto *Pred : predecessors(BB)) { + auto *Input = cast(PN.getIncomingValueForBlock(Pred)); + if (Input->isAllOnesValue()) + TruePred = Pred; + else + FalsePred = Pred; + } + assert(TruePred && FalsePred && "Must be!"); + + // Check which edge of the dominator dominates the true input. If it is the + // false edge, we should invert the condition. + auto *IDom = DT.getNode(BB)->getIDom()->getBlock(); + auto *BI = dyn_cast(IDom->getTerminator()); + if (!BI || BI->isUnconditional()) + return nullptr; + + // Check that edges outgoing from the idom's terminators dominate respective + // inputs of the Phi. + BasicBlockEdge TrueOutEdge(IDom, BI->getSuccessor(0)); + BasicBlockEdge FalseOutEdge(IDom, BI->getSuccessor(1)); + + BasicBlockEdge TrueIncEdge(TruePred, BB); + BasicBlockEdge FalseIncEdge(FalsePred, BB); + + auto *Cond = BI->getCondition(); + if (DT.dominates(TrueOutEdge, TrueIncEdge) && + DT.dominates(FalseOutEdge, FalseIncEdge)) + // This Phi is actually equivalent to branching condition of IDom. + return Cond; + else if (DT.dominates(TrueOutEdge, FalseIncEdge) && + DT.dominates(FalseOutEdge, TrueIncEdge)) { + // This Phi is actually opposite to branching condition of IDom. We invert + // the condition that will potentially open up some opportunities for + // sinking. + auto InsertPt = BB->getFirstInsertionPt(); + if (InsertPt != BB->end()) { + Self.Builder.SetInsertPoint(&*InsertPt); + return Self.Builder.CreateNot(Cond); + } + } + + return nullptr; +} + // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { @@ -1276,5 +1348,9 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) return Res; + // Ultimately, try to replace this Phi with a dominating condition. + if (auto *V = SimplifyUsingControlFlow(*this, PN, DT)) + return replaceInstUsesWith(PN, V); + return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 233fb3878ba72..db27711f29b17 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2443,11 +2443,11 @@ Instruction *InstCombiner::foldVectorSelect(SelectInst &Sel) { return nullptr; } -static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT, - InstCombiner::BuilderTy &Builder) { +static Instruction *foldSelectToPhiImpl(SelectInst &Sel, BasicBlock *BB, + const DominatorTree &DT, + InstCombiner::BuilderTy &Builder) { // Find the block's immediate dominator that ends with a conditional branch // that matches select's condition (maybe inverted). - BasicBlock *BB = Sel.getParent(); auto *IDomNode = DT[BB]->getIDom(); if (!IDomNode) return nullptr; @@ -2469,6 +2469,10 @@ static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT, } else return nullptr; + // Make sure the branches are actually different. + if (TrueSucc == FalseSucc) + return nullptr; + // We want to replace select %cond, %a, %b with a phi that takes value %a // for all incoming edges that are dominated by condition `%cond == true`, // and value %b for edges dominated by condition `%cond == false`. If %a @@ -2500,6 +2504,21 @@ static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT, return PN; } +static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT, + InstCombiner::BuilderTy &Builder) { + // Try to replace this select with Phi in one of these blocks. + SmallSetVector CandidateBlocks; + CandidateBlocks.insert(Sel.getParent()); + for (Value *V : Sel.operands()) + if (auto *I = dyn_cast(V)) + CandidateBlocks.insert(I->getParent()); + + for (BasicBlock *BB : CandidateBlocks) + if (auto *PN = foldSelectToPhiImpl(Sel, BB, DT, Builder)) + return PN; + return nullptr; +} + Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index e810b3de25bc8..8eac8637cb9e7 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2739,10 +2739,38 @@ Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) { return nullptr; } +Instruction *InstCombiner::visitUnconditionalBranchInst(BranchInst &BI) { + assert(BI.isUnconditional() && "Only for unconditional branches."); + + // If this store is the second-to-last instruction in the basic block + // (excluding debug info and bitcasts of pointers) and if the block ends with + // an unconditional branch, try to move the store to the successor block. + + auto GetLastSinkableStore = [](BasicBlock::iterator BBI) { + auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) { + return isa(BBI) || + (isa(BBI) && BBI->getType()->isPointerTy()); + }; + + BasicBlock::iterator FirstInstr = BBI->getParent()->begin(); + do { + if (BBI != FirstInstr) + --BBI; + } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI)); + + return dyn_cast(BBI); + }; + + if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI))) + if (mergeStoreIntoSuccessor(*SI)) + return &BI; + + return nullptr; +} + Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { - // Nothing to do about unconditional branches. if (BI.isUnconditional()) - return nullptr; + return visitUnconditionalBranchInst(BI); // Change br (not X), label True, label False to: br X, label False, True Value *X = nullptr; @@ -3361,12 +3389,6 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { I->moveBefore(&*InsertPos); ++NumSunkInst; - // Drop the debug loc of non-inlinable instructions. This prevents - // single-stepping from going backwards. See HowToUpdateDebugInfo.rst for - // the full rationale. - if (!isa(I)) - I->setDebugLoc(DebugLoc()); - // Also sink all related debug uses from the source basic block. Otherwise we // get debug use before the def. Attempt to salvage debug uses first, to // maximise the range variables have location for. If we cannot salvage, then diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index ee09a4d9db7e1..647d25e6a24e0 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -434,6 +434,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, bool IsKasan) { bool IsAndroid = TargetTriple.isAndroid(); bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS(); + bool IsMacOS = TargetTriple.isMacOSX(); bool IsFreeBSD = TargetTriple.isOSFreeBSD(); bool IsNetBSD = TargetTriple.isOSNetBSD(); bool IsPS4CPU = TargetTriple.isPS4CPU(); @@ -510,6 +511,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, Mapping.Offset = kMIPS64_ShadowOffset64; else if (IsIOS) Mapping.Offset = kDynamicShadowSentinel; + else if (IsMacOS && IsAArch64) + Mapping.Offset = kDynamicShadowSentinel; else if (IsAArch64) Mapping.Offset = kAArch64_ShadowOffset64; else @@ -2103,23 +2106,10 @@ void ModuleAddressSanitizer::InstrumentGlobalsELF( SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId); } - // This should never be called when there are no globals, by the logic that - // computes the UniqueModuleId string, which is "" when there are no globals. - // It's important that this path is only used when there are actually some - // globals, because that means that there will certainly be a live - // `asan_globals` input section at link time and thus `__start_asan_globals` - // and `__stop_asan_globals` symbols will definitely be defined at link time. - // This means there's no need for the references to them to be weak, which - // enables better code generation because ExternalWeakLinkage implies - // isInterposable() and thus requires GOT indirection for PIC. Since these - // are known-defined hidden/dso_local symbols, direct PIC accesses without - // dynamic relocation are always sufficient. - assert(!MetadataGlobals.empty()); - assert(!UniqueModuleId.empty()); - // Update llvm.compiler.used, adding the new metadata globals. This is // needed so that during LTO these variables stay alive. - appendToCompilerUsed(M, MetadataGlobals); + if (!MetadataGlobals.empty()) + appendToCompilerUsed(M, MetadataGlobals); // RegisteredFlag serves two purposes. First, we can pass it to dladdr() // to look up the loaded image that contains it. Second, we can store in it @@ -2132,18 +2122,15 @@ void ModuleAddressSanitizer::InstrumentGlobalsELF( ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); - // Create start and stop symbols. These are known to be defined by - // the linker, see comment above. - auto MakeStartStopGV = [&](const char *Prefix) { - GlobalVariable *StartStop = - new GlobalVariable(M, IntptrTy, false, GlobalVariable::ExternalLinkage, - nullptr, Prefix + getGlobalMetadataSection()); - StartStop->setVisibility(GlobalVariable::HiddenVisibility); - assert(StartStop->isImplicitDSOLocal()); - return StartStop; - }; - GlobalVariable *StartELFMetadata = MakeStartStopGV("__start_"); - GlobalVariable *StopELFMetadata = MakeStartStopGV("__stop_"); + // Create start and stop symbols. + GlobalVariable *StartELFMetadata = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr, + "__start_" + getGlobalMetadataSection()); + StartELFMetadata->setVisibility(GlobalVariable::HiddenVisibility); + GlobalVariable *StopELFMetadata = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr, + "__stop_" + getGlobalMetadataSection()); + StopELFMetadata->setVisibility(GlobalVariable::HiddenVisibility); // Create a call to register the globals with the runtime. IRB.CreateCall(AsanRegisterElfGlobals, diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index d8a965a90127b..cd2ea8d5e4edd 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -130,7 +130,6 @@ class GCOVProfiler { Function * insertCounterWriteout(ArrayRef>); Function *insertReset(ArrayRef>); - Function *insertFlush(Function *ResetF); bool AddFlushBeforeForkAndExec(); @@ -909,7 +908,6 @@ bool GCOVProfiler::emitProfileArcs() { Function *WriteoutF = insertCounterWriteout(CountersBySP); Function *ResetF = insertReset(CountersBySP); - Function *FlushF = insertFlush(ResetF); // Create a small bit of code that registers the "__llvm_gcov_writeout" to // be executed at exit and the "__llvm_gcov_flush" function to be executed @@ -927,14 +925,13 @@ bool GCOVProfiler::emitProfileArcs() { IRBuilder<> Builder(BB); FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Type *Params[] = {PointerType::get(FTy, 0), PointerType::get(FTy, 0), - PointerType::get(FTy, 0)}; - FTy = FunctionType::get(Builder.getVoidTy(), Params, false); + auto *PFTy = PointerType::get(FTy, 0); + FTy = FunctionType::get(Builder.getVoidTy(), {PFTy, PFTy}, false); // Initialize the environment and register the local writeout, flush and // reset functions. FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy); - Builder.CreateCall(GCOVInit, {WriteoutF, FlushF, ResetF}); + Builder.CreateCall(GCOVInit, {WriteoutF, ResetF}); Builder.CreateRetVoid(); appendToGlobalCtors(*M, F, 0); @@ -1266,36 +1263,3 @@ Function *GCOVProfiler::insertReset( return ResetF; } - -Function *GCOVProfiler::insertFlush(Function *ResetF) { - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Function *FlushF = M->getFunction("__llvm_gcov_flush"); - if (!FlushF) - FlushF = Function::Create(FTy, GlobalValue::InternalLinkage, - "__llvm_gcov_flush", M); - FlushF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - FlushF->addFnAttr(Attribute::NoInline); - if (Options.NoRedZone) - FlushF->addFnAttr(Attribute::NoRedZone); - - BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF); - - // Write out the current counters. - Function *WriteoutF = M->getFunction("__llvm_gcov_writeout"); - assert(WriteoutF && "Need to create the writeout function first!"); - - IRBuilder<> Builder(Entry); - Builder.CreateCall(WriteoutF, {}); - Builder.CreateCall(ResetF, {}); - - Type *RetTy = FlushF->getReturnType(); - if (RetTy->isVoidTy()) - Builder.CreateRetVoid(); - else if (RetTy->isIntegerTy()) - // Used if __llvm_gcov_flush was implicitly declared. - Builder.CreateRet(ConstantInt::get(RetTy, 0)); - else - report_fatal_error("invalid return type for __llvm_gcov_flush"); - - return FlushF; -} diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 7b03bbfcdfe4b..0a3519502994b 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -57,6 +57,8 @@ using namespace llvm; #define DEBUG_TYPE "instrprof" +// FIXME: These are to be removed after switching to the new memop value +// profiling. // The start and end values of precise value profile range for memory // intrinsic sizes cl::opt MemOPSizeRange( @@ -72,6 +74,12 @@ cl::opt MemOPSizeLarge( "Value of 0 disables the large value profiling."), cl::init(8192)); +cl::opt UseOldMemOpValueProf( + "use-old-memop-value-prof", + cl::desc("Use the old memop value profiling buckets. This is " + "transitional and to be removed after switching. "), + cl::init(true)); + namespace { cl::opt DoHashBasedCounterSplit( @@ -395,6 +403,19 @@ class PGOCounterPromoter { BlockFrequencyInfo *BFI; }; +enum class ValueProfilingCallType { + // Individual values are tracked. Currently used for indiret call target + // profiling. + Default, + + // The old memop size value profiling. FIXME: To be removed after switching to + // the new one. + OldMemOp, + + // MemOp: the (new) memop size value profiling with extended buckets. + MemOp +}; + } // end anonymous namespace PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) { @@ -579,9 +600,9 @@ bool InstrProfiling::run( return true; } -static FunctionCallee -getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI, - bool IsRange = false) { +static FunctionCallee getOrInsertValueProfilingCall( + Module &M, const TargetLibraryInfo &TLI, + ValueProfilingCallType CallType = ValueProfilingCallType::Default) { LLVMContext &Ctx = M.getContext(); auto *ReturnTy = Type::getVoidTy(M.getContext()); @@ -589,16 +610,22 @@ getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI, if (auto AK = TLI.getExtAttrForI32Param(false)) AL = AL.addParamAttribute(M.getContext(), 2, AK); - if (!IsRange) { + if (CallType == ValueProfilingCallType::Default || + CallType == ValueProfilingCallType::MemOp) { Type *ParamTypes[] = { #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType #include "llvm/ProfileData/InstrProfData.inc" }; auto *ValueProfilingCallTy = FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false); - return M.getOrInsertFunction(getInstrProfValueProfFuncName(), - ValueProfilingCallTy, AL); + StringRef FuncName = CallType == ValueProfilingCallType::Default + ? getInstrProfValueProfFuncName() + : getInstrProfValueProfMemOpFuncName(); + return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL); } else { + // FIXME: This code is to be removed after switching to the new memop value + // profiling. + assert(CallType == ValueProfilingCallType::OldMemOp); Type *RangeParamTypes[] = { #define VALUE_RANGE_PROF 1 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType @@ -638,8 +665,8 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Index += It->second.NumValueSites[Kind]; IRBuilder<> Builder(Ind); - bool IsRange = (Ind->getValueKind()->getZExtValue() == - llvm::InstrProfValueKind::IPVK_MemOPSize); + bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() == + llvm::InstrProfValueKind::IPVK_MemOPSize); CallInst *Call = nullptr; auto *TLI = &GetTLI(*Ind->getFunction()); @@ -649,12 +676,19 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { // WinEHPrepare pass. SmallVector OpBundles; Ind->getOperandBundlesAsDefs(OpBundles); - if (!IsRange) { + if (!IsMemOpSize) { Value *Args[3] = {Ind->getTargetValue(), Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), Builder.getInt32(Index)}; Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args, OpBundles); + } else if (!UseOldMemOpValueProf) { + Value *Args[3] = {Ind->getTargetValue(), + Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), + Builder.getInt32(Index)}; + Call = Builder.CreateCall( + getOrInsertValueProfilingCall(*M, *TLI, ValueProfilingCallType::MemOp), + Args, OpBundles); } else { Value *Args[6] = { Ind->getTargetValue(), @@ -663,7 +697,8 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Builder.getInt64(MemOPSizeRangeStart), Builder.getInt64(MemOPSizeRangeLast), Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)}; - Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), + Call = Builder.CreateCall(getOrInsertValueProfilingCall( + *M, *TLI, ValueProfilingCallType::OldMemOp), Args, OpBundles); } if (auto AK = TLI->getExtAttrForI32Param(false)) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index f825cf99205b3..fcf7f470b3e10 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -284,6 +284,11 @@ static cl::opt ClCheckAccessAddress("msan-check-access-address", cl::desc("report accesses through a pointer which has poisoned shadow"), cl::Hidden, cl::init(true)); +static cl::opt ClEagerChecks( + "msan-eager-checks", + cl::desc("check arguments and return values at function call boundaries"), + cl::Hidden, cl::init(false)); + static cl::opt ClDumpStrictInstructions("msan-dump-strict-instructions", cl::desc("print out instructions with default strict semantics"), cl::Hidden, cl::init(false)); @@ -1052,7 +1057,6 @@ struct MemorySanitizerVisitor : public InstVisitor { bool PropagateShadow; bool PoisonStack; bool PoisonUndef; - bool CheckReturnValue; struct ShadowOriginAndInsertPoint { Value *Shadow; @@ -1076,9 +1080,6 @@ struct MemorySanitizerVisitor : public InstVisitor { PropagateShadow = SanitizeFunction; PoisonStack = SanitizeFunction && ClPoisonStack; PoisonUndef = SanitizeFunction && ClPoisonUndef; - // FIXME: Consider using SpecialCaseList to specify a list of functions that - // must always return fully initialized values. For now, we hardcode "main". - CheckReturnValue = SanitizeFunction && (F.getName() == "main"); MS.initializeCallbacks(*F.getParent()); if (MS.CompileKernel) @@ -1618,14 +1619,23 @@ struct MemorySanitizerVisitor : public InstVisitor { LLVM_DEBUG(dbgs() << "Arg is not sized\n"); continue; } + + bool FArgByVal = FArg.hasByValAttr(); + bool FArgNoUndef = FArg.hasAttribute(Attribute::NoUndef); + bool FArgEagerCheck = ClEagerChecks && !FArgByVal && FArgNoUndef; unsigned Size = FArg.hasByValAttr() ? DL.getTypeAllocSize(FArg.getParamByValType()) : DL.getTypeAllocSize(FArg.getType()); + if (A == &FArg) { bool Overflow = ArgOffset + Size > kParamTLSSize; - Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset); - if (FArg.hasByValAttr()) { + if (FArgEagerCheck) { + *ShadowPtr = getCleanShadow(V); + setOrigin(A, getCleanOrigin()); + continue; + } else if (FArgByVal) { + Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset); // ByVal pointer itself has clean shadow. We copy the actual // argument shadow to the underlying memory. // Figure out maximal valid memcpy alignment. @@ -1650,6 +1660,8 @@ struct MemorySanitizerVisitor : public InstVisitor { } *ShadowPtr = getCleanShadow(V); } else { + // Shadow over TLS + Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset); if (Overflow) { // ParamTLS overflow. *ShadowPtr = getCleanShadow(V); @@ -1668,7 +1680,9 @@ struct MemorySanitizerVisitor : public InstVisitor { setOrigin(A, getCleanOrigin()); } } - ArgOffset += alignTo(Size, kShadowTLSAlignment); + + if (!FArgEagerCheck) + ArgOffset += alignTo(Size, kShadowTLSAlignment); } assert(*ShadowPtr && "Could not find shadow for an argument"); return *ShadowPtr; @@ -2875,6 +2889,50 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } + // Instrument generic vector reduction intrinsics + // by ORing together all their fields. + void handleVectorReduceIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Value *S = IRB.CreateOrReduce(getShadow(&I, 0)); + setShadow(&I, S); + setOrigin(&I, getOrigin(&I, 0)); + } + + // Instrument experimental.vector.reduce.or intrinsic. + // Valid (non-poisoned) set bits in the operand pull low the + // corresponding shadow bits. + void handleVectorReduceOrIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Value *OperandShadow = getShadow(&I, 0); + Value *OperandUnsetBits = IRB.CreateNot(I.getOperand(0)); + Value *OperandUnsetOrPoison = IRB.CreateOr(OperandUnsetBits, OperandShadow); + // Bit N is clean if any field's bit N is 1 and unpoison + Value *OutShadowMask = IRB.CreateAndReduce(OperandUnsetOrPoison); + // Otherwise, it is clean if every field's bit N is unpoison + Value *OrShadow = IRB.CreateOrReduce(OperandShadow); + Value *S = IRB.CreateAnd(OutShadowMask, OrShadow); + + setShadow(&I, S); + setOrigin(&I, getOrigin(&I, 0)); + } + + // Instrument experimental.vector.reduce.or intrinsic. + // Valid (non-poisoned) unset bits in the operand pull down the + // corresponding shadow bits. + void handleVectorReduceAndIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Value *OperandShadow = getShadow(&I, 0); + Value *OperandSetOrPoison = IRB.CreateOr(I.getOperand(0), OperandShadow); + // Bit N is clean if any field's bit N is 0 and unpoison + Value *OutShadowMask = IRB.CreateAndReduce(OperandSetOrPoison); + // Otherwise, it is clean if every field's bit N is unpoison + Value *OrShadow = IRB.CreateOrReduce(OperandShadow); + Value *S = IRB.CreateAnd(OutShadowMask, OrShadow); + + setShadow(&I, S); + setOrigin(&I, getOrigin(&I, 0)); + } + void handleStmxcsr(IntrinsicInst &I) { IRBuilder<> IRB(&I); Value* Addr = I.getArgOperand(0); @@ -3093,6 +3151,17 @@ struct MemorySanitizerVisitor : public InstVisitor { case Intrinsic::masked_load: handleMaskedLoad(I); break; + case Intrinsic::experimental_vector_reduce_and: + handleVectorReduceAndIntrinsic(I); + break; + case Intrinsic::experimental_vector_reduce_or: + handleVectorReduceOrIntrinsic(I); + break; + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_mul: + handleVectorReduceIntrinsic(I); + break; case Intrinsic::x86_sse_stmxcsr: handleStmxcsr(I); break; @@ -3391,7 +3460,18 @@ struct MemorySanitizerVisitor : public InstVisitor { << " Shadow: " << *ArgShadow << "\n"); bool ArgIsInitialized = false; const DataLayout &DL = F.getParent()->getDataLayout(); - if (CB.paramHasAttr(i, Attribute::ByVal)) { + + bool ByVal = CB.paramHasAttr(i, Attribute::ByVal); + bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef); + bool EagerCheck = ClEagerChecks && !ByVal && NoUndef; + + if (EagerCheck) { + insertShadowCheck(A, &CB); + continue; + } + if (ByVal) { + // ByVal requires some special handling as it's too big for a single + // load assert(A->getType()->isPointerTy() && "ByVal argument is not a pointer!"); Size = DL.getTypeAllocSize(CB.getParamByValType(i)); @@ -3409,6 +3489,7 @@ struct MemorySanitizerVisitor : public InstVisitor { Alignment, Size); // TODO(glider): need to copy origins. } else { + // Any other parameters mean we need bit-grained tracking of uninit data Size = DL.getTypeAllocSize(A->getType()); if (ArgOffset + Size > kParamTLSSize) break; Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase, @@ -3437,6 +3518,13 @@ struct MemorySanitizerVisitor : public InstVisitor { // Don't emit the epilogue for musttail call returns. if (isa(CB) && cast(CB).isMustTailCall()) return; + + if (ClEagerChecks && CB.hasRetAttr(Attribute::NoUndef)) { + setShadow(&CB, getCleanShadow(&CB)); + setOrigin(&CB, getCleanOrigin()); + return; + } + IRBuilder<> IRBBefore(&CB); // Until we have full dynamic coverage, make sure the retval shadow is 0. Value *Base = getShadowPtrForRetval(&CB, IRBBefore); @@ -3489,14 +3577,26 @@ struct MemorySanitizerVisitor : public InstVisitor { // Don't emit the epilogue for musttail call returns. if (isAMustTailRetVal(RetVal)) return; Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB); - if (CheckReturnValue) { + bool HasNoUndef = + F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef); + bool StoreShadow = !(ClEagerChecks && HasNoUndef); + // FIXME: Consider using SpecialCaseList to specify a list of functions that + // must always return fully initialized values. For now, we hardcode "main". + bool EagerCheck = (ClEagerChecks && HasNoUndef) || (F.getName() == "main"); + + Value *Shadow = getShadow(RetVal); + bool StoreOrigin = true; + if (EagerCheck) { insertShadowCheck(RetVal, &I); - Value *Shadow = getCleanShadow(RetVal); - IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment); - } else { - Value *Shadow = getShadow(RetVal); + Shadow = getCleanShadow(RetVal); + StoreOrigin = false; + } + + // The caller may still expect information passed over TLS if we pass our + // check + if (StoreShadow) { IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment); - if (MS.TrackOrigins) + if (MS.TrackOrigins && StoreOrigin) IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB)); } } diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index 2b7b859891dcd..43a1434ae2d37 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -38,6 +38,8 @@ #include "llvm/Pass.h" #include "llvm/PassRegistry.h" #include "llvm/ProfileData/InstrProf.h" +#define INSTR_PROF_VALUE_PROF_MEMOP_API +#include "llvm/ProfileData/InstrProfData.inc" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -89,17 +91,25 @@ static cl::opt cl::desc("Scale the memop size counts using the basic " " block count value")); +// FIXME: These are to be removed after switching to the new memop value +// profiling. // This option sets the rangge of precise profile memop sizes. extern cl::opt MemOPSizeRange; // This option sets the value that groups large memop sizes extern cl::opt MemOPSizeLarge; +extern cl::opt UseOldMemOpValueProf; + cl::opt MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), cl::Hidden, cl::desc("Size-specialize memcmp and bcmp calls")); +static cl::opt + MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128), + cl::desc("Optimize the memop size <= this value")); + namespace { class PGOMemOPSizeOptLegacyPass : public FunctionPass { public: @@ -269,6 +279,8 @@ class MemOPSizeOpt : public InstVisitor { TargetLibraryInfo &TLI; bool Changed; std::vector WorkList; + // FIXME: These are to be removed after switching to the new memop value + // profiling. // Start of the previse range. int64_t PreciseRangeStart; // Last value of the previse range. @@ -277,6 +289,8 @@ class MemOPSizeOpt : public InstVisitor { std::unique_ptr ValueDataArray; bool perform(MemOp MO); + // FIXME: This is to be removed after switching to the new memop value + // profiling. // This kind shows which group the value falls in. For PreciseValue, we have // the profile count for that value. LargeGroup groups the values that are in // range [LargeValue, +inf). NonLargeGroup groups the rest of values. @@ -365,8 +379,11 @@ bool MemOPSizeOpt::perform(MemOp MO) { if (MemOPScaleCount) C = getScaledCount(C, ActualCount, SavedTotalCount); - // Only care precise value here. - if (getMemOPSizeKind(V) != PreciseValue) + if (UseOldMemOpValueProf) { + // Only care precise value here. + if (getMemOPSizeKind(V) != PreciseValue) + continue; + } else if (!InstrProfIsSingleValRange(V) || V > MemOpMaxOptSize) continue; // ValueCounts are sorted on the count. Break at the first un-profitable diff --git a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp index 85e096112fca1..fa97a194ea2b5 100644 --- a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp +++ b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp @@ -297,7 +297,7 @@ static bool rewrite(Function &F) { for (Value *V : I.operands()) Checks.push_back(getPoisonFor(ValToPoison, V)); - if (canCreatePoison(&I)) + if (canCreatePoison(cast(&I))) generateCreationChecks(I, Checks); ValToPoison[&I] = buildOrChain(B, Checks); } diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index c911b37afac7e..8ce12c514f0bc 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -19,7 +19,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -52,30 +53,36 @@ using namespace llvm; #define DEBUG_TYPE "tsan" -static cl::opt ClInstrumentMemoryAccesses( +static cl::opt ClInstrumentMemoryAccesses( "tsan-instrument-memory-accesses", cl::init(true), cl::desc("Instrument memory accesses"), cl::Hidden); -static cl::opt ClInstrumentFuncEntryExit( - "tsan-instrument-func-entry-exit", cl::init(true), - cl::desc("Instrument function entry and exit"), cl::Hidden); -static cl::opt ClHandleCxxExceptions( +static cl::opt + ClInstrumentFuncEntryExit("tsan-instrument-func-entry-exit", cl::init(true), + cl::desc("Instrument function entry and exit"), + cl::Hidden); +static cl::opt ClHandleCxxExceptions( "tsan-handle-cxx-exceptions", cl::init(true), cl::desc("Handle C++ exceptions (insert cleanup blocks for unwinding)"), cl::Hidden); -static cl::opt ClInstrumentAtomics( - "tsan-instrument-atomics", cl::init(true), - cl::desc("Instrument atomics"), cl::Hidden); -static cl::opt ClInstrumentMemIntrinsics( +static cl::opt ClInstrumentAtomics("tsan-instrument-atomics", + cl::init(true), + cl::desc("Instrument atomics"), + cl::Hidden); +static cl::opt ClInstrumentMemIntrinsics( "tsan-instrument-memintrinsics", cl::init(true), cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden); -static cl::opt ClDistinguishVolatile( +static cl::opt ClDistinguishVolatile( "tsan-distinguish-volatile", cl::init(false), cl::desc("Emit special instrumentation for accesses to volatiles"), cl::Hidden); -static cl::opt ClInstrumentReadBeforeWrite( +static cl::opt ClInstrumentReadBeforeWrite( "tsan-instrument-read-before-write", cl::init(false), cl::desc("Do not eliminate read instrumentation for read-before-writes"), cl::Hidden); +static cl::opt ClCompoundReadBeforeWrite( + "tsan-compound-read-before-write", cl::init(false), + cl::desc("Emit special compound instrumentation for reads-before-writes"), + cl::Hidden); STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); @@ -101,15 +108,37 @@ namespace { /// ensures the __tsan_init function is in the list of global constructors for /// the module. struct ThreadSanitizer { + ThreadSanitizer() { + // Sanity check options and warn user. + if (ClInstrumentReadBeforeWrite && ClCompoundReadBeforeWrite) { + errs() + << "warning: Option -tsan-compound-read-before-write has no effect " + "when -tsan-instrument-read-before-write is set.\n"; + } + } + bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI); private: + // Internal Instruction wrapper that contains more information about the + // Instruction from prior analysis. + struct InstructionInfo { + // Instrumentation emitted for this instruction is for a compounded set of + // read and write operations in the same basic block. + static constexpr unsigned kCompoundRW = (1U << 0); + + explicit InstructionInfo(Instruction *Inst) : Inst(Inst) {} + + Instruction *Inst; + unsigned Flags = 0; + }; + void initialize(Module &M); - bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL); + bool instrumentLoadOrStore(const InstructionInfo &II, const DataLayout &DL); bool instrumentAtomic(Instruction *I, const DataLayout &DL); bool instrumentMemIntrinsic(Instruction *I); void chooseInstructionsToInstrument(SmallVectorImpl &Local, - SmallVectorImpl &All, + SmallVectorImpl &All, const DataLayout &DL); bool addrPointsToConstantData(Value *Addr); int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL); @@ -130,6 +159,8 @@ struct ThreadSanitizer { FunctionCallee TsanVolatileWrite[kNumberOfAccessSizes]; FunctionCallee TsanUnalignedVolatileRead[kNumberOfAccessSizes]; FunctionCallee TsanUnalignedVolatileWrite[kNumberOfAccessSizes]; + FunctionCallee TsanCompoundRW[kNumberOfAccessSizes]; + FunctionCallee TsanUnalignedCompoundRW[kNumberOfAccessSizes]; FunctionCallee TsanAtomicLoad[kNumberOfAccessSizes]; FunctionCallee TsanAtomicStore[kNumberOfAccessSizes]; FunctionCallee TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1] @@ -268,6 +299,15 @@ void ThreadSanitizer::initialize(Module &M) { TsanUnalignedVolatileWrite[i] = M.getOrInsertFunction( UnalignedVolatileWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()); + SmallString<64> CompoundRWName("__tsan_read_write" + ByteSizeStr); + TsanCompoundRW[i] = M.getOrInsertFunction( + CompoundRWName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()); + + SmallString<64> UnalignedCompoundRWName("__tsan_unaligned_read_write" + + ByteSizeStr); + TsanUnalignedCompoundRW[i] = M.getOrInsertFunction( + UnalignedCompoundRWName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()); + Type *Ty = Type::getIntNTy(M.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load"); @@ -402,34 +442,42 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { // 'Local' is a vector of insns within the same BB (no calls between). // 'All' is a vector of insns that will be instrumented. void ThreadSanitizer::chooseInstructionsToInstrument( - SmallVectorImpl &Local, SmallVectorImpl &All, - const DataLayout &DL) { - SmallPtrSet WriteTargets; + SmallVectorImpl &Local, + SmallVectorImpl &All, const DataLayout &DL) { + DenseMap WriteTargets; // Map of addresses to index in All // Iterate from the end. for (Instruction *I : reverse(Local)) { - if (StoreInst *Store = dyn_cast(I)) { - Value *Addr = Store->getPointerOperand(); - if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr)) - continue; - WriteTargets.insert(Addr); - } else { - LoadInst *Load = cast(I); - Value *Addr = Load->getPointerOperand(); - if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr)) - continue; - if (!ClInstrumentReadBeforeWrite && WriteTargets.count(Addr)) { - // We will write to this temp, so no reason to analyze the read. - NumOmittedReadsBeforeWrite++; - continue; + const bool IsWrite = isa(*I); + Value *Addr = IsWrite ? cast(I)->getPointerOperand() + : cast(I)->getPointerOperand(); + + if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr)) + continue; + + if (!IsWrite) { + const auto WriteEntry = WriteTargets.find(Addr); + if (!ClInstrumentReadBeforeWrite && WriteEntry != WriteTargets.end()) { + auto &WI = All[WriteEntry->second]; + // If we distinguish volatile accesses and if either the read or write + // is volatile, do not omit any instrumentation. + const bool AnyVolatile = + ClDistinguishVolatile && (cast(I)->isVolatile() || + cast(WI.Inst)->isVolatile()); + if (!AnyVolatile) { + // We will write to this temp, so no reason to analyze the read. + // Mark the write instruction as compound. + WI.Flags |= InstructionInfo::kCompoundRW; + NumOmittedReadsBeforeWrite++; + continue; + } } + if (addrPointsToConstantData(Addr)) { // Addr points to some constant data -- it can not race with any writes. continue; } } - Value *Addr = isa(*I) - ? cast(I)->getPointerOperand() - : cast(I)->getPointerOperand(); + if (isa(GetUnderlyingObject(Addr, DL)) && !PointerMayBeCaptured(Addr, true, true)) { // The variable is addressable but not captured, so it cannot be @@ -438,7 +486,14 @@ void ThreadSanitizer::chooseInstructionsToInstrument( NumOmittedNonCaptured++; continue; } - All.push_back(I); + + // Instrument this instruction. + All.emplace_back(I); + if (IsWrite) { + // For read-before-write and compound instrumentation we only need one + // write target, and we can override any previous entry if it exists. + WriteTargets[Addr] = All.size() - 1; + } } Local.clear(); } @@ -479,7 +534,7 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, if (F.hasFnAttribute(Attribute::Naked)) return false; initialize(*F.getParent()); - SmallVector AllLoadsAndStores; + SmallVector AllLoadsAndStores; SmallVector LocalLoadsAndStores; SmallVector AtomicAccesses; SmallVector MemIntrinCalls; @@ -514,8 +569,8 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, // Instrument memory accesses only if we want to report bugs in the function. if (ClInstrumentMemoryAccesses && SanitizeFunction) - for (auto Inst : AllLoadsAndStores) { - Res |= instrumentLoadOrStore(Inst, DL); + for (const auto &II : AllLoadsAndStores) { + Res |= instrumentLoadOrStore(II, DL); } // Instrument atomic memory accesses in any case (they can be used to @@ -553,13 +608,12 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, return Res; } -bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I, +bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II, const DataLayout &DL) { - IRBuilder<> IRB(I); - bool IsWrite = isa(*I); - Value *Addr = IsWrite - ? cast(I)->getPointerOperand() - : cast(I)->getPointerOperand(); + IRBuilder<> IRB(II.Inst); + const bool IsWrite = isa(*II.Inst); + Value *Addr = IsWrite ? cast(II.Inst)->getPointerOperand() + : cast(II.Inst)->getPointerOperand(); // swifterror memory addresses are mem2reg promoted by instruction selection. // As such they cannot have regular uses like an instrumentation function and @@ -570,9 +624,9 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I, int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; - if (IsWrite && isVtableAccess(I)) { - LLVM_DEBUG(dbgs() << " VPTR : " << *I << "\n"); - Value *StoredValue = cast(I)->getValueOperand(); + if (IsWrite && isVtableAccess(II.Inst)) { + LLVM_DEBUG(dbgs() << " VPTR : " << *II.Inst << "\n"); + Value *StoredValue = cast(II.Inst)->getValueOperand(); // StoredValue may be a vector type if we are storing several vptrs at once. // In this case, just take the first element of the vector since this is // enough to find vptr races. @@ -588,36 +642,46 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I, NumInstrumentedVtableWrites++; return true; } - if (!IsWrite && isVtableAccess(I)) { + if (!IsWrite && isVtableAccess(II.Inst)) { IRB.CreateCall(TsanVptrLoad, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); NumInstrumentedVtableReads++; return true; } - const unsigned Alignment = IsWrite - ? cast(I)->getAlignment() - : cast(I)->getAlignment(); - const bool IsVolatile = - ClDistinguishVolatile && (IsWrite ? cast(I)->isVolatile() - : cast(I)->isVolatile()); + + const unsigned Alignment = IsWrite ? cast(II.Inst)->getAlignment() + : cast(II.Inst)->getAlignment(); + const bool IsCompoundRW = + ClCompoundReadBeforeWrite && (II.Flags & InstructionInfo::kCompoundRW); + const bool IsVolatile = ClDistinguishVolatile && + (IsWrite ? cast(II.Inst)->isVolatile() + : cast(II.Inst)->isVolatile()); + assert((!IsVolatile || !IsCompoundRW) && "Compound volatile invalid!"); + Type *OrigTy = cast(Addr->getType())->getElementType(); const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); FunctionCallee OnAccessFunc = nullptr; if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0) { - if (IsVolatile) + if (IsCompoundRW) + OnAccessFunc = TsanCompoundRW[Idx]; + else if (IsVolatile) OnAccessFunc = IsWrite ? TsanVolatileWrite[Idx] : TsanVolatileRead[Idx]; else OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; } else { - if (IsVolatile) + if (IsCompoundRW) + OnAccessFunc = TsanUnalignedCompoundRW[Idx]; + else if (IsVolatile) OnAccessFunc = IsWrite ? TsanUnalignedVolatileWrite[Idx] : TsanUnalignedVolatileRead[Idx]; else OnAccessFunc = IsWrite ? TsanUnalignedWrite[Idx] : TsanUnalignedRead[Idx]; } IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); - if (IsWrite) NumInstrumentedWrites++; - else NumInstrumentedReads++; + if (IsCompoundRW || IsWrite) + NumInstrumentedWrites++; + if (IsCompoundRW || !IsWrite) + NumInstrumentedReads++; return true; } diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index cd2f4ca36f3bb..48968166c605f 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -601,14 +601,21 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) { return true; } -static bool hasPositiveOperands(BinaryOperator *SDI, LazyValueInfo *LVI) { - Constant *Zero = ConstantInt::get(SDI->getType(), 0); - for (Value *O : SDI->operands()) { - auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, O, Zero, SDI); - if (Result != LazyValueInfo::True) - return false; - } - return true; +static bool isNonNegative(Value *V, LazyValueInfo *LVI, Instruction *CxtI) { + Constant *Zero = ConstantInt::get(V->getType(), 0); + auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, V, Zero, CxtI); + return Result == LazyValueInfo::True; +} + +static bool isNonPositive(Value *V, LazyValueInfo *LVI, Instruction *CxtI) { + Constant *Zero = ConstantInt::get(V->getType(), 0); + auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SLE, V, Zero, CxtI); + return Result == LazyValueInfo::True; +} + +static bool allOperandsAreNonNegative(BinaryOperator *SDI, LazyValueInfo *LVI) { + return all_of(SDI->operands(), + [&](Value *Op) { return isNonNegative(Op, LVI, SDI); }); } /// Try to shrink a udiv/urem's width down to the smallest power of two that's @@ -654,7 +661,7 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) { } static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) { - if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI)) + if (SDI->getType()->isVectorTy() || !allOperandsAreNonNegative(SDI, LVI)) return false; ++NumSRems; @@ -671,24 +678,65 @@ static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) { } /// See if LazyValueInfo's ability to exploit edge conditions or range -/// information is sufficient to prove the both operands of this SDiv are -/// positive. If this is the case, replace the SDiv with a UDiv. Even for local +/// information is sufficient to prove the signs of both operands of this SDiv. +/// If this is the case, replace the SDiv with a UDiv. Even for local /// conditions, this can sometimes prove conditions instcombine can't by /// exploiting range information. static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) { - if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI)) + if (SDI->getType()->isVectorTy()) return false; + enum class Domain { NonNegative, NonPositive, Unknown }; + auto getDomain = [&](Value *V) { + if (isNonNegative(V, LVI, SDI)) + return Domain::NonNegative; + if (isNonPositive(V, LVI, SDI)) + return Domain::NonPositive; + return Domain::Unknown; + }; + + struct Operand { + Value *V; + Domain D; + }; + std::array Ops; + for (const auto I : zip(Ops, SDI->operands())) { + Operand &Op = std::get<0>(I); + Op.V = std::get<1>(I); + Op.D = getDomain(Op.V); + if (Op.D == Domain::Unknown) + return false; + } + + // We know domains of both of the operands! ++NumSDivs; - auto *BO = BinaryOperator::CreateUDiv(SDI->getOperand(0), SDI->getOperand(1), - SDI->getName(), SDI); - BO->setDebugLoc(SDI->getDebugLoc()); - BO->setIsExact(SDI->isExact()); - SDI->replaceAllUsesWith(BO); + + // We need operands to be non-negative, so negate each one that isn't. + for (Operand &Op : Ops) { + if (Op.D == Domain::NonNegative) + continue; + auto *BO = + BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI); + BO->setDebugLoc(SDI->getDebugLoc()); + Op.V = BO; + } + + auto *UDiv = + BinaryOperator::CreateUDiv(Ops[0].V, Ops[1].V, SDI->getName(), SDI); + UDiv->setDebugLoc(SDI->getDebugLoc()); + UDiv->setIsExact(SDI->isExact()); + + Value *Res = UDiv; + + // If the operands had two different domains, we need to negate the result. + if (Ops[0].D != Ops[1].D) + Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI); + + SDI->replaceAllUsesWith(Res); SDI->eraseFromParent(); // Try to simplify our new udiv. - processUDivOrURem(BO, LVI); + processUDivOrURem(UDiv, LVI); return true; } @@ -697,9 +745,7 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { if (SDI->getType()->isVectorTy()) return false; - Constant *Zero = ConstantInt::get(SDI->getType(), 0); - if (LVI->getPredicateAt(ICmpInst::ICMP_SGE, SDI->getOperand(0), Zero, SDI) != - LazyValueInfo::True) + if (!isNonNegative(SDI->getOperand(0), LVI, SDI)) return false; ++NumAShrs; @@ -719,9 +765,7 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) { Value *Base = SDI->getOperand(0); - Constant *Zero = ConstantInt::get(Base->getType(), 0); - if (LVI->getPredicateAt(ICmpInst::ICMP_SGE, Base, Zero, SDI) != - LazyValueInfo::True) + if (!isNonNegative(Base, LVI, SDI)) return false; ++NumSExt; diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index e58db03225eea..258fd5b9454f0 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -848,7 +848,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, // Treat byval or inalloca arguments the same, stores to them are dead at the // end of the function. for (Argument &AI : BB.getParent()->args()) - if (AI.hasPassPointeeByValueAttr()) + if (AI.hasPassPointeeByValueCopyAttr()) DeadStackObjects.insert(&AI); const DataLayout &DL = BB.getModule()->getDataLayout(); @@ -1563,7 +1563,7 @@ struct DSEState { // Treat byval or inalloca arguments the same as Allocas, stores to them are // dead at the end of the function. for (Argument &AI : F.args()) - if (AI.hasPassPointeeByValueAttr()) { + if (AI.hasPassPointeeByValueCopyAttr()) { // For byval, the caller doesn't know the address of the allocation. if (AI.hasByValAttr()) State.InvisibleToCallerBeforeRet.insert(&AI); @@ -1593,6 +1593,12 @@ struct DSEState { break; } } + switch (CB->getIntrinsicID()) { + case Intrinsic::init_trampoline: + return {MemoryLocation(CB->getArgOperand(0))}; + default: + break; + } return None; } diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index b16f8591b5a46..0b416cc4afb86 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -667,6 +667,19 @@ LLVM_DUMP_METHOD void GVN::dump(DenseMap& d) const { } #endif +enum class AvaliabilityState : char { + /// We know the block *is not* fully available. This is a fixpoint. + Unavaliable = 0, + /// We know the block *is* fully available. This is a fixpoint. + Avaliable = 1, + /// We do not know whether the block is fully available or not, + /// but we are currently speculating that it will be. + SpeculativelyAvaliable = 2, + /// We are speculating for this block and have used that + /// to speculate for other blocks. + SpeculativelyAvaliableAndUsedForSpeculation = 3, +}; + /// Return true if we can prove that the value /// we're analyzing is fully available in the specified block. As we go, keep /// track of which blocks we know are fully alive in FullyAvailableBlocks. This @@ -677,24 +690,27 @@ LLVM_DUMP_METHOD void GVN::dump(DenseMap& d) const { /// currently speculating that it will be. /// 3) we are speculating for this block and have used that to speculate for /// other blocks. -static bool IsValueFullyAvailableInBlock(BasicBlock *BB, - DenseMap &FullyAvailableBlocks, - uint32_t RecurseDepth) { +static bool IsValueFullyAvailableInBlock( + BasicBlock *BB, + DenseMap &FullyAvailableBlocks, + uint32_t RecurseDepth) { if (RecurseDepth > MaxRecurseDepth) return false; - // Optimistically assume that the block is fully available and check to see - // if we already know about this block in one lookup. - std::pair::iterator, bool> IV = - FullyAvailableBlocks.insert(std::make_pair(BB, 2)); + // Optimistically assume that the block is speculatively available and check + // to see if we already know about this block in one lookup. + std::pair::iterator, bool> IV = + FullyAvailableBlocks.insert( + std::make_pair(BB, AvaliabilityState::SpeculativelyAvaliable)); // If the entry already existed for this block, return the precomputed value. if (!IV.second) { // If this is a speculative "available" value, mark it as being used for // speculation of other blocks. - if (IV.first->second == 2) - IV.first->second = 3; - return IV.first->second != 0; + if (IV.first->second == AvaliabilityState::SpeculativelyAvaliable) + IV.first->second = + AvaliabilityState::SpeculativelyAvaliableAndUsedForSpeculation; + return IV.first->second != AvaliabilityState::Unavaliable; } // Otherwise, see if it is fully available in all predecessors. @@ -717,29 +733,30 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB, // all, a fully-available block. We have a problem if we speculated on this and // used the speculation to mark other blocks as available. SpeculationFailure: - char &BBVal = FullyAvailableBlocks[BB]; + AvaliabilityState &BBVal = FullyAvailableBlocks[BB]; - // If we didn't speculate on this, just return with it set to false. - if (BBVal == 2) { - BBVal = 0; + // If we didn't speculate on this, just return with it set to unavaliable. + if (BBVal == AvaliabilityState::SpeculativelyAvaliable) { + BBVal = AvaliabilityState::Unavaliable; return false; } - // If we did speculate on this value, we could have blocks set to 1 that are - // incorrect. Walk the (transitive) successors of this block and mark them as - // 0 if set to one. + // If we did speculate on this value, we could have blocks set to + // speculatively avaliable that are incorrect. Walk the (transitive) + // successors of this block and mark them as unavaliable instead. SmallVector BBWorklist; BBWorklist.push_back(BB); do { BasicBlock *Entry = BBWorklist.pop_back_val(); - // Note that this sets blocks to 0 (unavailable) if they happen to not + // Note that this sets blocks to unavailable if they happen to not // already be in FullyAvailableBlocks. This is safe. - char &EntryVal = FullyAvailableBlocks[Entry]; - if (EntryVal == 0) continue; // Already unavailable. + AvaliabilityState &EntryVal = FullyAvailableBlocks[Entry]; + if (EntryVal == AvaliabilityState::Unavaliable) + continue; // Already unavailable. // Mark as unavailable. - EntryVal = 0; + EntryVal = AvaliabilityState::Unavaliable; BBWorklist.append(succ_begin(Entry), succ_end(Entry)); } while (!BBWorklist.empty()); @@ -1107,11 +1124,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Check to see how many predecessors have the loaded value fully // available. MapVector PredLoads; - DenseMap FullyAvailableBlocks; + DenseMap FullyAvailableBlocks; for (const AvailableValueInBlock &AV : ValuesPerBlock) - FullyAvailableBlocks[AV.BB] = true; + FullyAvailableBlocks[AV.BB] = AvaliabilityState::Avaliable; for (BasicBlock *UnavailableBB : UnavailableBlocks) - FullyAvailableBlocks[UnavailableBB] = false; + FullyAvailableBlocks[UnavailableBB] = AvaliabilityState::Unavaliable; SmallVector CriticalEdgePred; for (BasicBlock *Pred : predecessors(LoadBB)) { diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 0357d905fde52..51d12faf712ad 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1436,11 +1436,18 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { // expect a well-formed cyclic phi-with-increments. i.e. any operand not part // of the phi-SCC dominates the loop entry. Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt(); - WidePhi = dyn_cast(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); + Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt); // If the wide phi is not a phi node, for example a cast node, like bitcast, // inttoptr, ptrtoint, just skip for now. - if (!WidePhi) + if (!(WidePhi = dyn_cast(ExpandInst))) { + // if the cast node is an inserted instruction without any user, we should + // remove it to make sure the pass don't touch the function as we can not + // wide the phi. + if (ExpandInst->hasNUses(0) && + Rewriter.isInsertedInstruction(cast(ExpandInst))) + DeadInsts.emplace_back(ExpandInst); return nullptr; + } // Remembering the WideIV increment generated by SCEVExpander allows // widenIVUse to reuse it when widening the narrow IV's increment. We don't @@ -2223,7 +2230,7 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, // update the branch to use the new comparison; in the common case this // will make old comparison dead. BI->setCondition(Cond); - DeadInsts.push_back(OrigCond); + DeadInsts.emplace_back(OrigCond); ++NumLFTR; return true; @@ -2415,7 +2422,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { IsTaken ? ExitIfTrue : !ExitIfTrue); BI->setCondition(NewCond); if (OldCond->use_empty()) - DeadInsts.push_back(OldCond); + DeadInsts.emplace_back(OldCond); }; bool Changed = false; @@ -2641,7 +2648,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { Value *OldCond = BI->getCondition(); BI->setCondition(NewCond); if (OldCond->use_empty()) - DeadInsts.push_back(OldCond); + DeadInsts.emplace_back(OldCond); Changed = true; } diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 3cb4df12e9b08..aaf2840f8ff6f 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1010,6 +1010,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( return true; } +namespace { class ExpandedValuesCleaner { SCEVExpander &Expander; TargetLibraryInfo *TLI; @@ -1032,6 +1033,7 @@ class ExpandedValuesCleaner { } } }; +} // namespace /// If the stored value is a strided load in the same loop with the same stride /// this may be transformable into a memcpy. This kicks in for stuff like diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 0ed1773373a71..cfadfbb585b9f 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -1539,90 +1539,39 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const { LLVM_DEBUG(dbgs() << "Found predicate info from instruction !\n"); - auto *PWC = dyn_cast(PI); - if (!PWC) + const Optional &Constraint = PI->getConstraint(); + if (!Constraint) return nullptr; - auto *CopyOf = I->getOperand(0); - auto *Cond = PWC->Condition; - - // If this a copy of the condition, it must be either true or false depending - // on the predicate info type and edge. - if (CopyOf == Cond) { - // We should not need to add predicate users because the predicate info is - // already a use of this operand. - if (isa(PI)) - return createConstantExpression(ConstantInt::getTrue(Cond->getType())); - if (auto *PBranch = dyn_cast(PI)) { - if (PBranch->TrueEdge) - return createConstantExpression(ConstantInt::getTrue(Cond->getType())); - return createConstantExpression(ConstantInt::getFalse(Cond->getType())); - } - if (auto *PSwitch = dyn_cast(PI)) - return createConstantExpression(cast(PSwitch->CaseValue)); - } + CmpInst::Predicate Predicate = Constraint->Predicate; + Value *CmpOp0 = I->getOperand(0); + Value *CmpOp1 = Constraint->OtherOp; - // Not a copy of the condition, so see what the predicates tell us about this - // value. First, though, we check to make sure the value is actually a copy - // of one of the condition operands. It's possible, in certain cases, for it - // to be a copy of a predicateinfo copy. In particular, if two branch - // operations use the same condition, and one branch dominates the other, we - // will end up with a copy of a copy. This is currently a small deficiency in - // predicateinfo. What will end up happening here is that we will value - // number both copies the same anyway. - - // Everything below relies on the condition being a comparison. - auto *Cmp = dyn_cast(Cond); - if (!Cmp) - return nullptr; + Value *FirstOp = lookupOperandLeader(CmpOp0); + Value *SecondOp = lookupOperandLeader(CmpOp1); + Value *AdditionallyUsedValue = CmpOp0; - if (CopyOf != Cmp->getOperand(0) && CopyOf != Cmp->getOperand(1)) { - LLVM_DEBUG(dbgs() << "Copy is not of any condition operands!\n"); - return nullptr; - } - Value *FirstOp = lookupOperandLeader(Cmp->getOperand(0)); - Value *SecondOp = lookupOperandLeader(Cmp->getOperand(1)); - bool SwappedOps = false; // Sort the ops. if (shouldSwapOperands(FirstOp, SecondOp)) { std::swap(FirstOp, SecondOp); - SwappedOps = true; + Predicate = CmpInst::getSwappedPredicate(Predicate); + AdditionallyUsedValue = CmpOp1; } - CmpInst::Predicate Predicate = - SwappedOps ? Cmp->getSwappedPredicate() : Cmp->getPredicate(); - - if (isa(PI)) { - // If we assume the operands are equal, then they are equal. - if (Predicate == CmpInst::ICMP_EQ) { - addPredicateUsers(PI, I); - addAdditionalUsers(SwappedOps ? Cmp->getOperand(1) : Cmp->getOperand(0), - I); - return createVariableOrConstant(FirstOp); - } + + if (Predicate == CmpInst::ICMP_EQ) { + addPredicateUsers(PI, I); + addAdditionalUsers(AdditionallyUsedValue, I); + return createVariableOrConstant(FirstOp); } - if (const auto *PBranch = dyn_cast(PI)) { - // If we are *not* a copy of the comparison, we may equal to the other - // operand when the predicate implies something about equality of - // operations. In particular, if the comparison is true/false when the - // operands are equal, and we are on the right edge, we know this operation - // is equal to something. - if ((PBranch->TrueEdge && Predicate == CmpInst::ICMP_EQ) || - (!PBranch->TrueEdge && Predicate == CmpInst::ICMP_NE)) { - addPredicateUsers(PI, I); - addAdditionalUsers(SwappedOps ? Cmp->getOperand(1) : Cmp->getOperand(0), - I); - return createVariableOrConstant(FirstOp); - } - // Handle the special case of floating point. - if (((PBranch->TrueEdge && Predicate == CmpInst::FCMP_OEQ) || - (!PBranch->TrueEdge && Predicate == CmpInst::FCMP_UNE)) && - isa(FirstOp) && !cast(FirstOp)->isZero()) { - addPredicateUsers(PI, I); - addAdditionalUsers(SwappedOps ? Cmp->getOperand(1) : Cmp->getOperand(0), - I); - return createConstantExpression(cast(FirstOp)); - } + + // Handle the special case of floating point. + if (Predicate == CmpInst::FCMP_OEQ && isa(FirstOp) && + !cast(FirstOp)->isZero()) { + addPredicateUsers(PI, I); + addAdditionalUsers(AdditionallyUsedValue, I); + return createConstantExpression(cast(FirstOp)); } + return nullptr; } diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index 5ebd3b71fe78c..11ac7d7e15847 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -233,7 +233,7 @@ class SCCPSolver : public InstVisitor { for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) TrackedMultipleRetVals.insert( std::make_pair(std::make_pair(F, i), ValueLatticeElement())); - } else + } else if (!F->getReturnType()->isVoidTy()) TrackedRetVals.insert(std::make_pair(F, ValueLatticeElement())); } @@ -1262,55 +1262,22 @@ void SCCPSolver::handleCallResult(CallBase &CB) { auto *PI = getPredicateInfoFor(&CB); assert(PI && "Missing predicate info for ssa.copy"); - CmpInst *Cmp; - bool TrueEdge; - if (auto *PBranch = dyn_cast(PI)) { - Cmp = dyn_cast(PBranch->Condition); - TrueEdge = PBranch->TrueEdge; - } else if (auto *PAssume = dyn_cast(PI)) { - Cmp = dyn_cast(PAssume->Condition); - TrueEdge = true; - } else { + const Optional &Constraint = PI->getConstraint(); + if (!Constraint) { mergeInValue(ValueState[&CB], &CB, CopyOfVal); return; } - // Everything below relies on the condition being a comparison. - if (!Cmp) { - mergeInValue(ValueState[&CB], &CB, CopyOfVal); - return; - } + CmpInst::Predicate Pred = Constraint->Predicate; + Value *OtherOp = Constraint->OtherOp; - Value *RenamedOp = PI->RenamedOp; - Value *CmpOp0 = Cmp->getOperand(0); - Value *CmpOp1 = Cmp->getOperand(1); - // Bail out if neither of the operands matches RenamedOp. - if (CmpOp0 != RenamedOp && CmpOp1 != RenamedOp) { - mergeInValue(ValueState[&CB], &CB, getValueState(CopyOf)); + // Wait until OtherOp is resolved. + if (getValueState(OtherOp).isUnknown()) { + addAdditionalUser(OtherOp, &CB); return; } - auto Pred = Cmp->getPredicate(); - if (CmpOp1 == RenamedOp) { - std::swap(CmpOp0, CmpOp1); - Pred = Cmp->getSwappedPredicate(); - } - - // Wait until CmpOp1 is resolved. - if (getValueState(CmpOp1).isUnknown()) { - addAdditionalUser(CmpOp1, &CB); - return; - } - - // The code below relies on PredicateInfo only inserting copies for the - // true branch when the branch condition is an AND and only inserting - // copies for the false branch when the branch condition is an OR. This - // ensures we can intersect the range from the condition with the range of - // CopyOf. - if (!TrueEdge) - Pred = CmpInst::getInversePredicate(Pred); - - ValueLatticeElement CondVal = getValueState(CmpOp1); + ValueLatticeElement CondVal = getValueState(OtherOp); ValueLatticeElement &IV = ValueState[&CB]; if (CondVal.isConstantRange() || CopyOfVal.isConstantRange()) { auto ImposedCR = @@ -1334,7 +1301,7 @@ void SCCPSolver::handleCallResult(CallBase &CB) { if (!CopyOfCR.contains(NewCR) && CopyOfCR.getSingleMissingElement()) NewCR = CopyOfCR; - addAdditionalUser(CmpOp1, &CB); + addAdditionalUser(OtherOp, &CB); // TODO: Actually filp MayIncludeUndef for the created range to false, // once most places in the optimizer respect the branches on // undef/poison are UB rule. The reason why the new range cannot be @@ -1351,7 +1318,7 @@ void SCCPSolver::handleCallResult(CallBase &CB) { } else if (Pred == CmpInst::ICMP_EQ && CondVal.isConstant()) { // For non-integer values or integer constant expressions, only // propagate equal constants. - addAdditionalUser(CmpOp1, &CB); + addAdditionalUser(OtherOp, &CB); mergeInValue(IV, &CB, CondVal); return; } diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 9d088547b4369..42f79d89f0a28 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -139,7 +139,7 @@ void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) { } void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createCFGSimplificationPass(1, false, false, true)); + unwrap(PM)->add(createCFGSimplificationPass()); } void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) { diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 2e459c9a64d44..99055b9918050 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -40,6 +40,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include using namespace llvm; @@ -212,22 +213,22 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, } // Command-line settings override compile-time settings. -SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) { - Options.BonusInstThreshold = UserBonusInstThreshold.getNumOccurrences() - ? UserBonusInstThreshold - : Opts.BonusInstThreshold; - Options.ForwardSwitchCondToPhi = UserForwardSwitchCond.getNumOccurrences() - ? UserForwardSwitchCond - : Opts.ForwardSwitchCondToPhi; - Options.ConvertSwitchToLookupTable = UserSwitchToLookup.getNumOccurrences() - ? UserSwitchToLookup - : Opts.ConvertSwitchToLookupTable; - Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences() - ? UserKeepLoops - : Opts.NeedCanonicalLoop; - Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences() - ? UserSinkCommonInsts - : Opts.SinkCommonInsts; +static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) { + if (UserBonusInstThreshold.getNumOccurrences()) + Options.BonusInstThreshold = UserBonusInstThreshold; + if (UserForwardSwitchCond.getNumOccurrences()) + Options.ForwardSwitchCondToPhi = UserForwardSwitchCond; + if (UserSwitchToLookup.getNumOccurrences()) + Options.ConvertSwitchToLookupTable = UserSwitchToLookup; + if (UserKeepLoops.getNumOccurrences()) + Options.NeedCanonicalLoop = UserKeepLoops; + if (UserSinkCommonInsts.getNumOccurrences()) + Options.SinkCommonInsts = UserSinkCommonInsts; +} + +SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) + : Options(Opts) { + applyCommandLineOverridesToOptions(Options); } PreservedAnalyses SimplifyCFGPass::run(Function &F, @@ -247,33 +248,14 @@ struct CFGSimplifyPass : public FunctionPass { SimplifyCFGOptions Options; std::function PredicateFtor; - CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false, - bool ConvertSwitch = false, bool KeepLoops = true, - bool SinkCommon = false, + CFGSimplifyPass(SimplifyCFGOptions Options_ = SimplifyCFGOptions(), std::function Ftor = nullptr) - : FunctionPass(ID), PredicateFtor(std::move(Ftor)) { + : FunctionPass(ID), Options(Options_), PredicateFtor(std::move(Ftor)) { initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); // Check for command-line overrides of options for debug/customization. - Options.BonusInstThreshold = UserBonusInstThreshold.getNumOccurrences() - ? UserBonusInstThreshold - : Threshold; - - Options.ForwardSwitchCondToPhi = UserForwardSwitchCond.getNumOccurrences() - ? UserForwardSwitchCond - : ForwardSwitchCond; - - Options.ConvertSwitchToLookupTable = UserSwitchToLookup.getNumOccurrences() - ? UserSwitchToLookup - : ConvertSwitch; - - Options.NeedCanonicalLoop = - UserKeepLoops.getNumOccurrences() ? UserKeepLoops : KeepLoops; - - Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences() - ? UserSinkCommonInsts - : SinkCommon; + applyCommandLineOverridesToOptions(Options); } bool runOnFunction(Function &F) override { @@ -310,10 +292,7 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, // Public interface to the CFGSimplification pass FunctionPass * -llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond, - bool ConvertSwitch, bool KeepLoops, - bool SinkCommon, +llvm::createCFGSimplificationPass(SimplifyCFGOptions Options, std::function Ftor) { - return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch, - KeepLoops, SinkCommon, std::move(Ftor)); + return new CFGSimplifyPass(Options, std::move(Ftor)); } diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp index f2208edd5b196..7ff73fcdada79 100644 --- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp +++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp @@ -6,9 +6,12 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "assume-builder" + #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ValueTracking.h" @@ -19,6 +22,7 @@ #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/DebugCounter.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -33,6 +37,16 @@ cl::opt EnableKnowledgeRetention( cl::desc( "enable preservation of attributes throughout code transformation")); +STATISTIC(NumAssumeBuilt, "Number of assume built by the assume builder"); +STATISTIC(NumBundlesInAssumes, "Total number of Bundles in the assume built"); +STATISTIC(NumAssumesMerged, + "Number of assume merged by the assume simplify pass"); +STATISTIC(NumAssumesRemoved, + "Number of assume removed by the assume simplify pass"); + +DEBUG_COUNTER(BuildAssumeCounter, "assume-builder-counter", + "Controls which assumes gets created"); + namespace { bool isUsefullToPreserve(Attribute::AttrKind Kind) { @@ -204,6 +218,8 @@ struct AssumeBuilderState { IntrinsicInst *build() { if (AssumedKnowledgeMap.empty()) return nullptr; + if (!DebugCounter::shouldExecute(BuildAssumeCounter)) + return nullptr; Function *FnAssume = Intrinsic::getDeclaration(M, Intrinsic::assume); LLVMContext &C = M->getContext(); SmallVector OpBundle; @@ -220,7 +236,9 @@ struct AssumeBuilderState { OpBundle.push_back(OperandBundleDefT( std::string(Attribute::getNameFromAttrKind(MapElem.first.second)), Args)); + NumBundlesInAssumes++; } + NumAssumeBuilt++; return cast(CallInst::Create( FnAssume, ArrayRef({ConstantInt::getTrue(C)}), OpBundle)); } @@ -328,6 +346,10 @@ struct AssumeSimplify { (!ForceCleanup && !isAssumeWithEmptyBundle(*Assume))) continue; MadeChange = true; + if (ForceCleanup) + NumAssumesMerged++; + else + NumAssumesRemoved++; Assume->eraseFromParent(); } CleanupToDo.clear(); diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 085d91031cf90..86b2eb0464cb4 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -758,12 +758,22 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); + + Loop *L = nullptr; + BasicBlock *OldLatch = nullptr; // Splitting the predecessors of a loop header creates a preheader block. - if (LI && LI->isLoopHeader(BB)) + if (LI && LI->isLoopHeader(BB)) { + L = LI->getLoopFor(BB); // Using the loop start line number prevents debuggers stepping into the // loop body for this instruction. - BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc()); - else + BI->setDebugLoc(L->getStartLoc()); + + // If BB is the header of the Loop, it is possible that the loop is + // modified, such that the current latch does not remain the latch of the + // loop. If that is the case, the loop metadata from the current latch needs + // to be applied to the new latch. + OldLatch = L->getLoopLatch(); + } else BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); // Move the edges from Preds to point to NewBB instead of BB. @@ -798,6 +808,15 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit); } + if (OldLatch) { + BasicBlock *NewLatch = L->getLoopLatch(); + if (NewLatch != OldLatch) { + MDNode *MD = OldLatch->getTerminator()->getMetadata("llvm.loop"); + NewLatch->getTerminator()->setMetadata("llvm.loop", MD); + OldLatch->getTerminator()->setMetadata("llvm.loop", nullptr); + } + } + return NewBB; } diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp index 510c033f64743..460ba9e97fc6e 100644 --- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp +++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp @@ -84,7 +84,7 @@ struct FixIrreducible : public FunctionPass { initializeFixIrreduciblePass(*PassRegistry::getPassRegistry()); } - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequiredID(LowerSwitchID); AU.addRequired(); AU.addRequired(); @@ -93,7 +93,7 @@ struct FixIrreducible : public FunctionPass { AU.addPreserved(); } - bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; }; } // namespace @@ -281,6 +281,9 @@ static bool makeReducible(LoopInfo &LI, DominatorTree &DT, Graph &&G) { LLVM_DEBUG(dbgs() << "Found headers:"); for (auto BB : reverse(Blocks)) { for (const auto P : predecessors(BB)) { + // Skip unreachable predecessors. + if (!DT.isReachableFromEntry(P)) + continue; if (!Blocks.count(P)) { LLVM_DEBUG(dbgs() << " " << BB->getName()); Headers.insert(BB); diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp index 9d8f59d62d6d0..0c43c1e1ac2a3 100644 --- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp +++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp @@ -77,7 +77,8 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) { if (CI.isNoBuiltin() || !CI.getCalledFunction()) return; - const std::string ScalarName = std::string(CI.getCalledFunction()->getName()); + StringRef ScalarName = CI.getCalledFunction()->getName(); + // Nothing to be done if the TLI thinks the function is not // vectorizable. if (!TLI.isFunctionVectorizable(ScalarName)) diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index b0b7ca4847980..5c9cb1245d01c 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1245,7 +1245,7 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { Function *CalledFunc = CB.getCalledFunction(); for (Argument &Arg : CalledFunc->args()) { unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0; - if (Align && !Arg.hasPassPointeeByValueAttr() && !Arg.hasNUses(0)) { + if (Align && !Arg.hasPassPointeeByValueCopyAttr() && !Arg.hasNUses(0)) { if (!DTCalculated) { DT.recalculate(*CB.getCaller()); DTCalculated = true; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index da40c342af3ac..3d163b8a86bcc 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -453,21 +453,24 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, /// trivially dead, delete them too, recursively. Return true if any /// instructions were deleted. bool llvm::RecursivelyDeleteTriviallyDeadInstructions( - Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) { + Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU, + std::function AboutToDeleteCallback) { Instruction *I = dyn_cast(V); if (!I || !isInstructionTriviallyDead(I, TLI)) return false; SmallVector DeadInsts; DeadInsts.push_back(I); - RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU); + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU, + AboutToDeleteCallback); return true; } bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive( SmallVectorImpl &DeadInsts, const TargetLibraryInfo *TLI, - MemorySSAUpdater *MSSAU) { + MemorySSAUpdater *MSSAU, + std::function AboutToDeleteCallback) { unsigned S = 0, E = DeadInsts.size(), Alive = 0; for (; S != E; ++S) { auto *I = cast(DeadInsts[S]); @@ -478,13 +481,15 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive( } if (Alive == E) return false; - RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU); + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU, + AboutToDeleteCallback); return true; } void llvm::RecursivelyDeleteTriviallyDeadInstructions( SmallVectorImpl &DeadInsts, const TargetLibraryInfo *TLI, - MemorySSAUpdater *MSSAU) { + MemorySSAUpdater *MSSAU, + std::function AboutToDeleteCallback) { // Process the dead instruction list until empty. while (!DeadInsts.empty()) { Value *V = DeadInsts.pop_back_val(); @@ -498,6 +503,9 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions( // Don't lose the debug info while deleting the instructions. salvageDebugInfo(*I); + if (AboutToDeleteCallback) + AboutToDeleteCallback(I); + // Null out all of the instruction's operands to see if any operand becomes // dead as we go. for (Use &OpU : I->operands()) { diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index 8804bba975b6a..d3151bb91047a 100644 --- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -44,6 +44,8 @@ using namespace llvm; #define DEBUG_TYPE "loop-rotate" +STATISTIC(NumNotRotatedDueToHeaderSize, + "Number of loops not rotated due to the header size"); STATISTIC(NumRotated, "Number of loops rotated"); static cl::opt @@ -320,6 +322,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { << " instructions, which is more than the threshold (" << MaxHeaderSize << " instructions): "; L->dump()); + ++NumNotRotatedDueToHeaderSize; return Rotated; } } @@ -740,12 +743,7 @@ bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, const SimplifyQuery &SQ, bool RotationOnly = true, unsigned Threshold = unsigned(-1), bool IsUtilMode = true) { - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly, IsUtilMode); - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - return LR.processLoop(L); } diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 2515b1676cb99..ebcd820a27bda 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" @@ -505,6 +506,32 @@ static bool canProfitablyUnrollMultiExitLoop( // know of kinds of multiexit loops that would benefit from unrolling. } +// Assign the maximum possible trip count as the back edge weight for the +// remainder loop if the original loop comes with a branch weight. +static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop, + Loop *RemainderLoop, + uint64_t UnrollFactor) { + uint64_t TrueWeight, FalseWeight; + BranchInst *LatchBR = + cast(OrigLoop->getLoopLatch()->getTerminator()); + if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader() + ? FalseWeight + : TrueWeight; + assert(UnrollFactor > 1); + uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight; + BasicBlock *Header = RemainderLoop->getHeader(); + BasicBlock *Latch = RemainderLoop->getLoopLatch(); + auto *RemainderLatchBR = cast(Latch->getTerminator()); + unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1); + MDBuilder MDB(RemainderLatchBR->getContext()); + MDNode *WeightNode = + HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) + : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); + RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); + } +} + /// Insert code in the prolog/epilog code when unrolling a loop with a /// run-time trip-count. /// @@ -788,6 +815,11 @@ bool llvm::UnrollRuntimeLoopRemainder( InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); + // Assign the maximum possible trip count as the back edge weight for the + // remainder loop if the original loop comes with a branch weight. + if (remainderLoop && !UnrollRemainder) + updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count); + // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), F->getBasicBlockList(), diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp index c81efd77aa5ff..280d3a996d508 100644 --- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -205,14 +205,14 @@ struct ValueDFS_Compare { // numbering will say the placed predicaeinfos should go first (IE // LN_beginning), so we won't be in this function. For assumes, we will end // up here, beause we need to order the def we will place relative to the - // assume. So for the purpose of ordering, we pretend the def is the assume - // because that is where we will insert the info. + // assume. So for the purpose of ordering, we pretend the def is right + // after the assume, because that is where we will insert the info. if (!VD.U) { assert(VD.PInfo && "No def, no use, and no predicateinfo should not occur"); assert(isa(VD.PInfo) && "Middle of block should only occur for assumes"); - return cast(VD.PInfo)->AssumeInst; + return cast(VD.PInfo)->AssumeInst->getNextNode(); } return nullptr; } @@ -621,7 +621,9 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter, auto *PAssume = dyn_cast(ValInfo); assert(PAssume && "Should not have gotten here without it being an assume"); - IRBuilder<> B(PAssume->AssumeInst); + // Insert the predicate directly after the assume. While it also holds + // directly before it, assume(i1 true) is not a useful fact. + IRBuilder<> B(PAssume->AssumeInst->getNextNode()); Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); if (IF->users().empty()) PI.CreatedDeclarations.insert(IF); @@ -820,6 +822,53 @@ PredicateInfo::~PredicateInfo() { } } +Optional PredicateBase::getConstraint() const { + switch (Type) { + case PT_Assume: + case PT_Branch: { + bool TrueEdge = true; + if (auto *PBranch = dyn_cast(this)) + TrueEdge = PBranch->TrueEdge; + + if (Condition == RenamedOp) { + return {{CmpInst::ICMP_EQ, + TrueEdge ? ConstantInt::getTrue(Condition->getType()) + : ConstantInt::getFalse(Condition->getType())}}; + } + + CmpInst *Cmp = dyn_cast(Condition); + assert(Cmp && "Condition should be a CmpInst"); + + CmpInst::Predicate Pred; + Value *OtherOp; + if (Cmp->getOperand(0) == RenamedOp) { + Pred = Cmp->getPredicate(); + OtherOp = Cmp->getOperand(1); + } else if (Cmp->getOperand(1) == RenamedOp) { + Pred = Cmp->getSwappedPredicate(); + OtherOp = Cmp->getOperand(0); + } else { + // TODO: Make this an assertion once RenamedOp is fully accurate. + return None; + } + + // Invert predicate along false edge. + if (!TrueEdge) + Pred = CmpInst::getInversePredicate(Pred); + + return {{Pred, OtherOp}}; + } + case PT_Switch: + if (Condition != RenamedOp) { + // TODO: Make this an assertion once RenamedOp is fully accurate. + return None; + } + + return {{CmpInst::ICMP_EQ, cast(this)->CaseValue}}; + } + llvm_unreachable("Unknown predicate type"); +} + void PredicateInfo::verifyPredicateInfo() const {} char PredicateInfoPrinterLegacyPass::ID = 0; @@ -883,11 +932,11 @@ class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter { public: PredicateInfoAnnotatedWriter(const PredicateInfo *M) : PredInfo(M) {} - virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, - formatted_raw_ostream &OS) {} + void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS) override {} - virtual void emitInstructionAnnot(const Instruction *I, - formatted_raw_ostream &OS) { + void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) override { if (const auto *PI = PredInfo->getPredicateInfoFor(I)) { OS << "; Has predicate info\n"; if (const auto *PB = dyn_cast(PI)) { diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index b450d71c996cb..aa015ffe56227 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -147,7 +148,14 @@ STATISTIC( NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)"); STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares"); -STATISTIC(NumSinkCommons, +STATISTIC( + NumHoistCommonCode, + "Number of common instruction 'blocks' hoisted up to the begin block"); +STATISTIC(NumHoistCommonInstrs, + "Number of common instructions hoisted up to the begin block"); +STATISTIC(NumSinkCommonCode, + "Number of common instruction 'blocks' sunk down to the end block"); +STATISTIC(NumSinkCommonInstrs, "Number of common instructions sunk down to the end block"); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); @@ -1285,6 +1293,12 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, BasicBlock *BIParent = BI->getParent(); bool Changed = false; + + auto _ = make_scope_exit([&]() { + if (Changed) + ++NumHoistCommonCode; + }); + do { // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. @@ -1353,6 +1367,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, I2->eraseFromParent(); Changed = true; } + ++NumHoistCommonInstrs; I1 = &*BB1_Itr++; I2 = &*BB2_Itr++; @@ -1407,6 +1422,8 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, I2->replaceAllUsesWith(NT); NT->takeName(I1); } + Changed = true; + ++NumHoistCommonInstrs; // Ensure terminator gets a debug location, even an unknown one, in case // it involves inlinable calls. @@ -1453,7 +1470,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, AddPredecessorToBlock(Succ, BIParent, BB1); EraseTerminatorAndDCECond(BI); - return true; + return Changed; } // Check lifetime markers. @@ -1800,7 +1817,6 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { if (UnconditionalPreds.size() < 2) return false; - bool Changed = false; // We take a two-step approach to tail sinking. First we scan from the end of // each block upwards in lockstep. If the n'th instruction from the end of each // block can be sunk, those instructions are added to ValuesToSink and we @@ -1820,6 +1836,12 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { --LRI; } + // If no instructions can be sunk, early-return. + if (ScanIdx == 0) + return false; + + bool Changed = false; + auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) { unsigned NumPHIdValues = 0; for (auto *I : *LRI) @@ -1834,7 +1856,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { return NumPHIInsts <= 1; }; - if (ScanIdx > 0 && Cond) { + if (Cond) { // Check if we would actually sink anything first! This mutates the CFG and // adds an extra block. The goal in doing this is to allow instructions that // couldn't be sunk before to be sunk - obviously, speculatable instructions @@ -1875,7 +1897,8 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { // sink presuming a later value will also be sunk, but stop half way through // and never actually sink it which means we produce more PHIs than intended. // This is unlikely in practice though. - for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) { + unsigned SinkIdx = 0; + for (; SinkIdx != ScanIdx; ++SinkIdx) { LLVM_DEBUG(dbgs() << "SINK: Sink: " << *UnconditionalPreds[0]->getTerminator()->getPrevNode() << "\n"); @@ -1890,11 +1913,18 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { break; } - if (!sinkLastInstruction(UnconditionalPreds)) - return Changed; - NumSinkCommons++; + if (!sinkLastInstruction(UnconditionalPreds)) { + LLVM_DEBUG( + dbgs() + << "SINK: stopping here, failed to actually sink instruction!\n"); + break; + } + + NumSinkCommonInstrs++; Changed = true; } + if (SinkIdx != 0) + ++NumSinkCommonCode; return Changed; } @@ -2374,11 +2404,13 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, int BudgetRemaining = TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + bool Changed = false; for (BasicBlock::iterator II = BB->begin(); isa(II);) { PHINode *PN = cast(II++); if (Value *V = SimplifyInstruction(PN, {DL, PN})) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); + Changed = true; continue; } @@ -2386,7 +2418,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, BudgetRemaining, TTI) || !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts, BudgetRemaining, TTI)) - return false; + return Changed; } // If we folded the first phi, PN dangles at this point. Refresh it. If @@ -2413,7 +2445,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, isa(IfCond)) && !CanHoistNotFromBothValues(PN->getIncomingValue(0), PN->getIncomingValue(1))) - return false; + return Changed; // If all PHI nodes are promotable, check to make sure that all instructions // in the predecessor blocks can be promoted as well. If not, we won't be able @@ -2431,7 +2463,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. - return false; + return Changed; } } @@ -2444,7 +2476,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. - return false; + return Changed; } } assert(DomBlock && "Failed to find root DomBlock"); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 6ad8bc6e09426..cfcc3454a2102 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1448,7 +1448,7 @@ static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B) { /// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x). Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); - AttributeList Attrs = Pow->getCalledFunction()->getAttributes(); + AttributeList Attrs; // Attributes are only meaningful on the original call Module *Mod = Pow->getModule(); Type *Ty = Pow->getType(); bool Ignored; @@ -1615,7 +1615,7 @@ static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno, /// Use square root in place of pow(x, +/-0.5). Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) { Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); - AttributeList Attrs = Pow->getCalledFunction()->getAttributes(); + AttributeList Attrs; // Attributes are only meaningful on the original call Module *Mod = Pow->getModule(); Type *Ty = Pow->getType(); @@ -1785,6 +1785,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) { Function *Callee = CI->getCalledFunction(); + AttributeList Attrs; // Attributes are only meaningful on the original call StringRef Name = Callee->getName(); Value *Ret = nullptr; if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) && @@ -1801,7 +1802,7 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) { if (Value *Exp = getIntToFPVal(Op, B)) return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl, - B, CI->getCalledFunction()->getAttributes()); + B, Attrs); } return Ret; @@ -1836,7 +1837,7 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) { Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) { Function *LogFn = Log->getCalledFunction(); - AttributeList Attrs = LogFn->getAttributes(); + AttributeList Attrs; // Attributes are only meaningful on the original call StringRef LogNm = LogFn->getName(); Intrinsic::ID LogID = LogFn->getIntrinsicID(); Module *Mod = Log->getModule(); diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp index 8cdb4d670ac02..b10deee3907c7 100644 --- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -33,7 +33,7 @@ struct UnifyLoopExits : public FunctionPass { initializeUnifyLoopExitsPass(*PassRegistry::getPassRegistry()); } - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequiredID(LowerSwitchID); AU.addRequired(); AU.addRequired(); @@ -42,7 +42,7 @@ struct UnifyLoopExits : public FunctionPass { AU.addPreserved(); } - bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; }; } // namespace diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp index c9839121e7f64..6ff08cd287124 100644 --- a/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -403,21 +403,17 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, if (Offset == -1) return Offset; - // Don't coerce non-integral pointers to integers or vice versa, and the - // memtransfer is implicitly a raw byte code - if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) - // TODO: Can allow nullptrs from constant zeros - return -1; - unsigned AS = Src->getType()->getPointerAddressSpace(); // Otherwise, see if we can constant fold a load from the constant with the // offset applied as appropriate. - Src = - ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); - Constant *OffsetCst = - ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, - OffsetCst); + if (Offset) { + Src = ConstantExpr::getBitCast(Src, + Type::getInt8PtrTy(Src->getContext(), AS)); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), + Src, OffsetCst); + } Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL)) return Offset; @@ -587,16 +583,18 @@ T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset, // Otherwise, this is a memcpy/memmove from a constant global. MemTransferInst *MTI = cast(SrcInst); Constant *Src = cast(MTI->getSource()); - unsigned AS = Src->getType()->getPointerAddressSpace(); + unsigned AS = Src->getType()->getPointerAddressSpace(); // Otherwise, see if we can constant fold a load from the constant with the // offset applied as appropriate. - Src = - ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); - Constant *OffsetCst = - ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, - OffsetCst); + if (Offset) { + Src = ConstantExpr::getBitCast(Src, + Type::getInt8PtrTy(Src->getContext(), AS)); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), + Src, OffsetCst); + } Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 10e690d56ffd1..7f13a689cedbd 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4213,26 +4213,68 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, case InductionDescriptor::IK_PtrInduction: { // Handle the pointer induction variable case. assert(P->getType()->isPointerTy() && "Unexpected type."); - // This is the normalized GEP that starts counting at zero. - Value *PtrInd = Induction; - PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStep()->getType()); - // Determine the number of scalars we need to generate for each unroll - // iteration. If the instruction is uniform, we only need to generate the - // first lane. Otherwise, we generate all VF values. - unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF; - // These are the scalar results. Notice that we don't generate vector GEPs - // because scalar GEPs result in better code. - for (unsigned Part = 0; Part < UF; ++Part) { - for (unsigned Lane = 0; Lane < Lanes; ++Lane) { - Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF); - Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); - Value *SclrGep = - emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II); - SclrGep->setName("next.gep"); - VectorLoopValueMap.setScalarValue(P, {Part, Lane}, SclrGep); + + if (Cost->isScalarAfterVectorization(P, VF)) { + // This is the normalized GEP that starts counting at zero. + Value *PtrInd = + Builder.CreateSExtOrTrunc(Induction, II.getStep()->getType()); + // Determine the number of scalars we need to generate for each unroll + // iteration. If the instruction is uniform, we only need to generate the + // first lane. Otherwise, we generate all VF values. + unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF; + for (unsigned Part = 0; Part < UF; ++Part) { + for (unsigned Lane = 0; Lane < Lanes; ++Lane) { + Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF); + Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); + Value *SclrGep = + emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II); + SclrGep->setName("next.gep"); + VectorLoopValueMap.setScalarValue(P, {Part, Lane}, SclrGep); + } } + return; + } + assert(isa(II.getStep()) && + "Induction step not a SCEV constant!"); + Type *PhiType = II.getStep()->getType(); + + // Build a pointer phi + Value *ScalarStartValue = II.getStartValue(); + Type *ScStValueType = ScalarStartValue->getType(); + PHINode *NewPointerPhi = + PHINode::Create(ScStValueType, 2, "pointer.phi", Induction); + NewPointerPhi->addIncoming(ScalarStartValue, LoopVectorPreHeader); + + // A pointer induction, performed by using a gep + BasicBlock *LoopLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch(); + Instruction *InductionLoc = LoopLatch->getTerminator(); + const SCEV *ScalarStep = II.getStep(); + SCEVExpander Exp(*PSE.getSE(), DL, "induction"); + Value *ScalarStepValue = + Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc); + Value *InductionGEP = GetElementPtrInst::Create( + ScStValueType->getPointerElementType(), NewPointerPhi, + Builder.CreateMul(ScalarStepValue, ConstantInt::get(PhiType, VF * UF)), + "ptr.ind", InductionLoc); + NewPointerPhi->addIncoming(InductionGEP, LoopLatch); + + // Create UF many actual address geps that use the pointer + // phi as base and a vectorized version of the step value + // () as offset. + for (unsigned Part = 0; Part < UF; ++Part) { + SmallVector Indices; + // Create a vector of consecutive numbers from zero to VF. + for (unsigned i = 0; i < VF; ++i) + Indices.push_back(ConstantInt::get(PhiType, i + Part * VF)); + Constant *StartOffset = ConstantVector::get(Indices); + + Value *GEP = Builder.CreateGEP( + ScStValueType->getPointerElementType(), NewPointerPhi, + Builder.CreateMul(StartOffset, + Builder.CreateVectorSplat(VF, ScalarStepValue), + "vector.gep")); + VectorLoopValueMap.setVectorValue(P, Part, GEP); } - return; } } } @@ -4468,6 +4510,7 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { // accesses that will remain scalar. SmallSetVector ScalarPtrs; SmallPtrSet PossibleNonScalarPtrs; + auto *Latch = TheLoop->getLoopLatch(); // A helper that returns true if the use of Ptr by MemAccess will be scalar. // The pointer operands of loads and stores will be scalar as long as the @@ -4493,11 +4536,33 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { !TheLoop->isLoopInvariant(V); }; - // A helper that evaluates a memory access's use of a pointer. If the use - // will be a scalar use, and the pointer is only used by memory accesses, we - // place the pointer in ScalarPtrs. Otherwise, the pointer is placed in - // PossibleNonScalarPtrs. + auto isScalarPtrInduction = [&](Instruction *MemAccess, Value *Ptr) { + if (!isa(Ptr) || + !Legal->getInductionVars().count(cast(Ptr))) + return false; + auto &Induction = Legal->getInductionVars()[cast(Ptr)]; + if (Induction.getKind() != InductionDescriptor::IK_PtrInduction) + return false; + return isScalarUse(MemAccess, Ptr); + }; + + // A helper that evaluates a memory access's use of a pointer. If the + // pointer is actually the pointer induction of a loop, it is being + // inserted into Worklist. If the use will be a scalar use, and the + // pointer is only used by memory accesses, we place the pointer in + // ScalarPtrs. Otherwise, the pointer is placed in PossibleNonScalarPtrs. auto evaluatePtrUse = [&](Instruction *MemAccess, Value *Ptr) { + if (isScalarPtrInduction(MemAccess, Ptr)) { + Worklist.insert(cast(Ptr)); + Instruction *Update = cast( + cast(Ptr)->getIncomingValueForBlock(Latch)); + Worklist.insert(Update); + LLVM_DEBUG(dbgs() << "LV: Found new scalar instruction: " << *Ptr + << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found new scalar instruction: " << *Update + << "\n"); + return; + } // We only care about bitcast and getelementptr instructions contained in // the loop. if (!isLoopVaryingBitCastOrGEP(Ptr)) @@ -4521,10 +4586,9 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { }; // We seed the scalars analysis with three classes of instructions: (1) - // instructions marked uniform-after-vectorization, (2) bitcast and - // getelementptr instructions used by memory accesses requiring a scalar use, - // and (3) pointer induction variables and their update instructions (we - // currently only scalarize these). + // instructions marked uniform-after-vectorization and (2) bitcast, + // getelementptr and (pointer) phi instructions used by memory accesses + // requiring a scalar use. // // (1) Add to the worklist all instructions that have been identified as // uniform-after-vectorization. @@ -4550,24 +4614,6 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { Worklist.insert(I); } - // (3) Add to the worklist all pointer induction variables and their update - // instructions. - // - // TODO: Once we are able to vectorize pointer induction variables we should - // no longer insert them into the worklist here. - auto *Latch = TheLoop->getLoopLatch(); - for (auto &Induction : Legal->getInductionVars()) { - auto *Ind = Induction.first; - auto *IndUpdate = cast(Ind->getIncomingValueForBlock(Latch)); - if (Induction.second.getKind() != InductionDescriptor::IK_PtrInduction) - continue; - Worklist.insert(Ind); - Worklist.insert(IndUpdate); - LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n"); - LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate - << "\n"); - } - // Insert the forced scalars. // FIXME: Currently widenPHIInstruction() often creates a dead vector // induction variable when the PHI user is scalarized. @@ -4603,14 +4649,6 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { auto *Ind = Induction.first; auto *IndUpdate = cast(Ind->getIncomingValueForBlock(Latch)); - // We already considered pointer induction variables, so there's no reason - // to look at their users again. - // - // TODO: Once we are able to vectorize pointer induction variables we - // should no longer skip over them here. - if (Induction.second.getKind() == InductionDescriptor::IK_PtrInduction) - continue; - // If tail-folding is applied, the primary induction variable will be used // to feed a vector compare. if (Ind == Legal->getPrimaryInduction() && foldTailByMasking()) @@ -4949,8 +4987,14 @@ bool LoopVectorizationCostModel::runtimeChecksRequired() { return true; } - assert(Legal->getLAI()->getSymbolicStrides().empty() && - "Specializing for stride == 1 under -Os/-Oz"); + // FIXME: Avoid specializing for stride==1 instead of bailing out. + if (!Legal->getLAI()->getSymbolicStrides().empty()) { + reportVectorizationFailure("Runtime stride check for small trip count", + "runtime stride == 1 checks needed. Enable vectorization of " + "this loop without such check by compiling with -Os/-Oz", + "CantVersionLoopWithOptForSize", ORE, TheLoop); + return true; + } return false; } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d4b16fac985d9..5fb8ad56d8b3e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3022,19 +3022,24 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, return; } case Instruction::Call: { - // Check if the calls are all to the same vectorizable intrinsic. + // Check if the calls are all to the same vectorizable intrinsic or + // library function. CallInst *CI = cast(VL0); - // Check if this is an Intrinsic call or something that can be - // represented by an intrinsic call Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); - if (!isTriviallyVectorizable(ID)) { + + VFShape Shape = VFShape::get( + *CI, {static_cast(VL.size()), false /*Scalable*/}, + false /*HasGlobalPred*/); + Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape); + + if (!VecFunc && !isTriviallyVectorizable(ID)) { BS.cancelScheduling(VL, VL0); newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; } - Function *Int = CI->getCalledFunction(); + Function *F = CI->getCalledFunction(); unsigned NumArgs = CI->getNumArgOperands(); SmallVector ScalarArgs(NumArgs, nullptr); for (unsigned j = 0; j != NumArgs; ++j) @@ -3042,8 +3047,10 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, ScalarArgs[j] = CI->getArgOperand(j); for (Value *V : VL) { CallInst *CI2 = dyn_cast(V); - if (!CI2 || CI2->getCalledFunction() != Int || + if (!CI2 || CI2->getCalledFunction() != F || getVectorIntrinsicIDForCall(CI2, TLI) != ID || + (VecFunc && + VecFunc != VFDatabase(*CI2).getVectorizedFunction(Shape)) || !CI->hasIdenticalOperandBundleSchema(*CI2)) { BS.cancelScheduling(VL, VL0); newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, @@ -4507,7 +4514,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI); - bool UseIntrinsic = VecCallCosts.first <= VecCallCosts.second; + bool UseIntrinsic = ID != Intrinsic::not_intrinsic && + VecCallCosts.first <= VecCallCosts.second; Value *ScalarArg = nullptr; std::vector OpVecs; @@ -4527,15 +4535,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { OpVecs.push_back(OpVec); } - Module *M = F->getParent(); - Type *Tys[] = {FixedVectorType::get(CI->getType(), E->Scalars.size())}; - Function *CF = Intrinsic::getDeclaration(M, ID, Tys); - + Function *CF; if (!UseIntrinsic) { VFShape Shape = VFShape::get( *CI, {static_cast(VecTy->getNumElements()), false}, false /*HasGlobalPred*/); CF = VFDatabase(*CI).getVectorizedFunction(Shape); + } else { + Type *Tys[] = {FixedVectorType::get(CI->getType(), E->Scalars.size())}; + CF = Intrinsic::getDeclaration(F->getParent(), ID, Tys); } SmallVector OpBundles; @@ -7399,7 +7407,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { while (SameTypeIt != E && (*SameTypeIt)->getType() == EltTy && - (SameTypeIt - IncIt) < MaxNumElts) { + static_cast(SameTypeIt - IncIt) < MaxNumElts) { VisitedInstrs.insert(*SameTypeIt); ++SameTypeIt; } diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index d56f7af583eda..0f29e24a26eb4 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -413,7 +413,7 @@ else() # if this is included from LLVM's CMake # runtime_register_target(target) # Utility function to register external runtime target. function(runtime_register_target name target) - cmake_parse_arguments(ARG "" "" "DEPENDS" ${ARGN}) + cmake_parse_arguments(ARG "" "" "DEPENDS;CMAKE_ARGS" ${ARGN}) include(${LLVM_BINARY_DIR}/runtimes/${name}/Components.cmake OPTIONAL) set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${LLVM_BINARY_DIR}/runtimes/${name}/Components.cmake) @@ -457,6 +457,7 @@ else() # if this is included from LLVM's CMake set(test_targets "${test_targets}" PARENT_SCOPE) endif() + set(${name}_extra_args ${ARG_CMAKE_ARGS}) get_cmake_property(variableNames VARIABLES) foreach(variableName ${variableNames}) string(FIND "${variableName}" "RUNTIMES_${target}_" out) diff --git a/llvm/test/Analysis/BasicAA/recphi.ll b/llvm/test/Analysis/BasicAA/recphi.ll index 130058c745604..dfc88937bf699 100644 --- a/llvm/test/Analysis/BasicAA/recphi.ll +++ b/llvm/test/Analysis/BasicAA/recphi.ll @@ -83,3 +83,109 @@ if.then: ; preds = %f.exit if.end: ; preds = %f.exit ret i32 0 } + +; CHECK-LABEL: Function: reverse: 6 pointers, 0 call sites +; CHECK: MustAlias: [10 x i32]* %tab, i8* %0 +; CHECK: MustAlias: [10 x i32]* %tab, i32* %arrayidx +; CHECK: MustAlias: i32* %arrayidx, i8* %0 +; CHECK: PartialAlias: [10 x i32]* %tab, i32* %arrayidx1 +; CHECK: NoAlias: i32* %arrayidx1, i8* %0 +; CHECK: NoAlias: i32* %arrayidx, i32* %arrayidx1 +; CHECK: MayAlias: [10 x i32]* %tab, i32* %p.addr.05.i +; CHECK: MayAlias: i32* %p.addr.05.i, i8* %0 +; CHECK: MayAlias: i32* %arrayidx, i32* %p.addr.05.i +; CHECK: MayAlias: i32* %arrayidx1, i32* %p.addr.05.i +; CHECK: MayAlias: [10 x i32]* %tab, i32* %incdec.ptr.i +; CHECK: MayAlias: i32* %incdec.ptr.i, i8* %0 +; CHECK: MayAlias: i32* %arrayidx, i32* %incdec.ptr.i +; CHECK: MayAlias: i32* %arrayidx1, i32* %incdec.ptr.i +; CHECK: NoAlias: i32* %incdec.ptr.i, i32* %p.addr.05.i +define i32 @reverse() nounwind { +entry: + %tab = alloca [10 x i32], align 4 + %0 = bitcast [10 x i32]* %tab to i8* + %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %tab, i32 0, i32 0 + store i32 0, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* %tab, i32 0, i32 9 + store i32 0, i32* %arrayidx1, align 4 + %1 = add i32 1, 1 + %cmp4.i = icmp slt i32 %1, 2 + br i1 %cmp4.i, label %while.body.i, label %f.exit + +while.body.i: ; preds = %while.body.i, %entry + %2 = phi i32 [ 1, %while.body.i ], [ %1, %entry ] + %foo.06.i = phi i32 [ %sub.i, %while.body.i ], [ 2, %entry ] + %p.addr.05.i = phi i32* [ %incdec.ptr.i, %while.body.i ], [ %arrayidx1, %entry ] + %sub.i = sub nsw i32 %foo.06.i, %2 + %incdec.ptr.i = getelementptr inbounds i32, i32* %p.addr.05.i, i32 -1 + store i32 %sub.i, i32* %p.addr.05.i, align 4 + %cmp.i = icmp sgt i32 %sub.i, 1 + br i1 %cmp.i, label %while.body.i, label %f.exit + +f.exit: ; preds = %entry, %while.body.i + %3 = load i32, i32* %arrayidx1, align 4 + %cmp = icmp eq i32 %3, 2 + %4 = load i32, i32* %arrayidx, align 4 + %cmp4 = icmp eq i32 %4, 1 + %or.cond = and i1 %cmp, %cmp4 + br i1 %or.cond, label %if.end, label %if.then + +if.then: ; preds = %f.exit + unreachable + +if.end: ; preds = %f.exit + ret i32 0 +} + +; CHECK-LABEL: Function: negative: 6 pointers, 1 call sites +; CHECK: NoAlias: [3 x i16]* %int_arr.10, i16** %argv.6.par +; CHECK: NoAlias: i16* %_tmp1, i16** %argv.6.par +; CHECK: PartialAlias: [3 x i16]* %int_arr.10, i16* %_tmp1 +; CHECK: MayAlias: i16* %ls1.9.0, i16** %argv.6.par +; CHECK: MayAlias: [3 x i16]* %int_arr.10, i16* %ls1.9.0 +; CHECK: MayAlias: i16* %_tmp1, i16* %ls1.9.0 +; CHECK: MayAlias: i16* %_tmp7, i16** %argv.6.par +; CHECK: MayAlias: [3 x i16]* %int_arr.10, i16* %_tmp7 +; CHECK: MayAlias: i16* %_tmp1, i16* %_tmp7 +; CHECK: NoAlias: i16* %_tmp7, i16* %ls1.9.0 +; CHECK: NoAlias: i16* %_tmp11, i16** %argv.6.par +; CHECK: PartialAlias: [3 x i16]* %int_arr.10, i16* %_tmp11 +; CHECK: NoAlias: i16* %_tmp1, i16* %_tmp11 +; CHECK: MayAlias: i16* %_tmp11, i16* %ls1.9.0 +; CHECK: MayAlias: i16* %_tmp11, i16* %_tmp7 +; CHECK: Both ModRef: Ptr: i16** %argv.6.par <-> %_tmp16 = call i16 @call(i32 %_tmp13) +; CHECK: NoModRef: Ptr: [3 x i16]* %int_arr.10 <-> %_tmp16 = call i16 @call(i32 %_tmp13) +; CHECK: NoModRef: Ptr: i16* %_tmp1 <-> %_tmp16 = call i16 @call(i32 %_tmp13) +; CHECK: Both ModRef: Ptr: i16* %ls1.9.0 <-> %_tmp16 = call i16 @call(i32 %_tmp13) +; CHECK: Both ModRef: Ptr: i16* %_tmp7 <-> %_tmp16 = call i16 @call(i32 %_tmp13) +; CHECK: NoModRef: Ptr: i16* %_tmp11 <-> %_tmp16 = call i16 @call(i32 %_tmp13) +define i16 @negative(i16 %argc.5.par, i16** nocapture readnone %argv.6.par) { + %int_arr.10 = alloca [3 x i16], align 1 + %_tmp1 = getelementptr inbounds [3 x i16], [3 x i16]* %int_arr.10, i16 0, i16 2 + br label %bb1 + +bb1: ; preds = %bb1, %0 + %i.7.0 = phi i16 [ 2, %0 ], [ %_tmp5, %bb1 ] + %ls1.9.0 = phi i16* [ %_tmp1, %0 ], [ %_tmp7, %bb1 ] + store i16 %i.7.0, i16* %ls1.9.0, align 1 + %_tmp5 = add nsw i16 %i.7.0, -1 + %_tmp7 = getelementptr i16, i16* %ls1.9.0, i16 -1 + %_tmp9 = icmp sgt i16 %i.7.0, 0 + br i1 %_tmp9, label %bb1, label %bb3 + +bb3: ; preds = %bb1 + %_tmp11 = getelementptr inbounds [3 x i16], [3 x i16]* %int_arr.10, i16 0, i16 1 + %_tmp12 = load i16, i16* %_tmp11, align 1 + %_tmp13 = sext i16 %_tmp12 to i32 + %_tmp16 = call i16 @call(i32 %_tmp13) + %_tmp18.not = icmp eq i16 %_tmp12, 1 + br i1 %_tmp18.not, label %bb5, label %bb4 + +bb4: ; preds = %bb3 + ret i16 1 + +bb5: ; preds = %bb3, %bb4 + ret i16 0 +} + +declare i16 @call(i32) diff --git a/llvm/test/Analysis/CallGraph/ignore-callback-uses.ll b/llvm/test/Analysis/CallGraph/ignore-callback-uses.ll new file mode 100644 index 0000000000000..8964ca1efd866 --- /dev/null +++ b/llvm/test/Analysis/CallGraph/ignore-callback-uses.ll @@ -0,0 +1,51 @@ +; RUN: opt < %s -print-callgraph -disable-output 2>&1 | FileCheck %s +; CHECK: Call graph node <><<{{.*}}>> #uses=0 +; CHECK-NEXT: CS<{{.*}}> calls function 'f' +; CHECK-NEXT: CS<{{.*}}> calls function '__kmpc_fork_call' +; CHECK-EMPTY: + +%struct.ident_t = type { i32, i32, i32, i32, i8* } + +@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local void @f() { +entry: + br label %omp_parallel + +omp_parallel: ; preds = %entry + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @f..omp_par to void (i32*, i32*, ...)*)) + br label %omp.par.exit.split + +omp.par.exit.split: ; preds = %omp_parallel + ret void +} + +; Function Attrs: norecurse nounwind +define internal void @f..omp_par(i32* noalias %tid.addr, i32* noalias %zero.addr) { +omp.par.entry: + %tid.addr.local = alloca i32, align 4 + %0 = load i32, i32* %tid.addr, align 4 + store i32 %0, i32* %tid.addr.local, align 4 + %tid = load i32, i32* %tid.addr.local, align 4 + br label %omp.par.region + +omp.par.exit.split.exitStub: ; preds = %omp.par.outlined.exit + ret void + +omp.par.region: ; preds = %omp.par.entry + br label %omp.par.pre_finalize + +omp.par.pre_finalize: ; preds = %omp.par.region + br label %omp.par.outlined.exit + +omp.par.outlined.exit: ; preds = %omp.par.pre_finalize + br label %omp.par.exit.split.exitStub +} + +; Function Attrs: nounwind +declare !callback !2 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #2 + +!2 = !{!3} +!3 = !{i64 2, i64 -1, i64 -1, i1 true} diff --git a/llvm/test/Analysis/ConstantFolding/allones.ll b/llvm/test/Analysis/ConstantFolding/allones.ll new file mode 100644 index 0000000000000..1315b3628475e --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/allones.ll @@ -0,0 +1,46 @@ +; RUN: opt -early-cse -S -o - %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64-ni:2" +target triple = "armv7-unknown-linux-gnueabi" + +%struct.anon = type { i32 } + +@onesstruct = private constant %struct.anon { i32 -1 }, align 4 + +define i32 @allones_struct() { +; CHECK-LABEL: @allones_struct() +; CHECK-NEXT: %1 = load [1 x i32], [1 x i32]* bitcast (%struct.anon* @onesstruct to [1 x i32]*), align 4 +; CHECK-NEXT: %2 = extractvalue [1 x i32] %1, 0 +; CHECK-NEXT: ret i32 %2 + %1 = load [1 x i32], [1 x i32]* bitcast (%struct.anon* @onesstruct to [1 x i32]*), align 4 + %2 = extractvalue [1 x i32] %1, 0 + ret i32 %2 +} + +define i32 @allones_int() { +; CHECK-LABEL: @allones_int() +; CHECK-NEXT: ret i32 -1 + %1 = load i32, i32* bitcast (%struct.anon* @onesstruct to i32*), align 4 + ret i32 %1 +} + +define i32* @allones_ptr() { +; CHECK-LABEL: @allones_ptr() +; CHECK-NEXT: ret i32* inttoptr (i32 -1 to i32*) + %1 = load i32*, i32** bitcast (%struct.anon* @onesstruct to i32**), align 4 + ret i32* %1 +} + +define i32 addrspace(1)* @allones_ptr1() { +; CHECK-LABEL: @allones_ptr1() +; CHECK-NEXT: ret i32 addrspace(1)* inttoptr (i32 -1 to i32 addrspace(1)*) + %1 = load i32 addrspace(1)*, i32 addrspace(1)** bitcast (%struct.anon* @onesstruct to i32 addrspace(1)**), align 4 + ret i32 addrspace(1)* %1 +} + +define i32 addrspace(2)* @allones_ptr2() { +; CHECK-LABEL: @allones_ptr2() +; CHECK-NEXT: %1 = load i32 addrspace(2)*, i32 addrspace(2)** bitcast (%struct.anon* @onesstruct to i32 addrspace(2)**), align 4 +; CHECK-NEXT: ret i32 addrspace(2)* %1 + %1 = load i32 addrspace(2)*, i32 addrspace(2)** bitcast (%struct.anon* @onesstruct to i32 addrspace(2)**), align 4 + ret i32 addrspace(2)* %1 +} diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-bitcast.ll b/llvm/test/Analysis/CostModel/AArch64/sve-bitcast.ll new file mode 100644 index 0000000000000..c9695061e7f12 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/sve-bitcast.ll @@ -0,0 +1,12 @@ +; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -cost-model -analyze < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. +; WARN-NOT: warning + +; CHECK: Found an estimated cost of 0 for instruction: %b = bitcast %a to + +define @foo( %a, i32 %x) { + %b = bitcast %a to + ret %b +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll new file mode 100644 index 0000000000000..7af1e48125d70 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll @@ -0,0 +1,48 @@ +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=FUSED,ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=SLOW,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=FUSED,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=SLOW,ALL %s + +target triple = "amdgcn--" + +; ALL-LABEL: 'fmul_fadd_f32': +; FUSED: estimated cost of 0 for instruction: %mul = fmul float +; SLOW: estimated cost of 1 for instruction: %mul = fmul float +; ALL: estimated cost of 1 for instruction: %add = fadd float +define float @fmul_fadd_f32(float %r0, float %r1, float %r2) #0 { + %mul = fmul float %r0, %r1 + %add = fadd float %mul, %r2 + ret float %add +} + +; ALL-LABEL: 'fmul_fadd_v2f32': +; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x float> +; SLOW: estimated cost of 2 for instruction: %mul = fmul <2 x float> +; ALL: estimated cost of 2 for instruction: %add = fadd <2 x float> +define <2 x float> @fmul_fadd_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 { + %mul = fmul <2 x float> %r0, %r1 + %add = fadd <2 x float> %mul, %r2 + ret <2 x float> %add +} + +; ALL-LABEL: 'fmul_fsub_f32': +; FUSED: estimated cost of 0 for instruction: %mul = fmul float +; SLOW: estimated cost of 1 for instruction: %mul = fmul float +; ALL: estimated cost of 1 for instruction: %sub = fsub float +define float @fmul_fsub_f32(float %r0, float %r1, float %r2) #0 { + %mul = fmul float %r0, %r1 + %sub = fsub float %mul, %r2 + ret float %sub +} + +; ALL-LABEL: 'fmul_fsub_v2f32': +; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x float> +; SLOW: estimated cost of 2 for instruction: %mul = fmul <2 x float> +; ALL: estimated cost of 2 for instruction: %sub = fsub <2 x float> +define <2 x float> @fmul_fsub_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 { + %mul = fmul <2 x float> %r0, %r1 + %sub = fsub <2 x float> %mul, %r2 + ret <2 x float> %sub +} + +attributes #0 = { nounwind } diff --git a/llvm/test/Analysis/IVUsers/quadradic-exit-value.ll b/llvm/test/Analysis/IVUsers/quadradic-exit-value.ll index 1597bfa8a3745..e01c066cbd97a 100644 --- a/llvm/test/Analysis/IVUsers/quadradic-exit-value.ll +++ b/llvm/test/Analysis/IVUsers/quadradic-exit-value.ll @@ -6,7 +6,7 @@ ; checks at that point. ; RUN: opt < %s -analyze -iv-users | FileCheck %s --check-prefixes=CHECK,CHECK-NO-LCSSA -; RUN: opt < %s -disable-output -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -disable-output -passes='print' 2>&1 | FileCheck %s ; Provide legal integer types. target datalayout = "n8:16:32:64" diff --git a/llvm/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll b/llvm/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll index 7380da3ae7f87..e927ba1bccbae 100644 --- a/llvm/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll +++ b/llvm/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s ; PR1533 @array = weak global [101 x i32] zeroinitializer, align 32 ; <[100 x i32]*> [#uses=1] diff --git a/llvm/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll b/llvm/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll index 9e19ccab6eb1c..52601823de284 100644 --- a/llvm/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll +++ b/llvm/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s +; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s "-passes=print" -disable-output 2>&1 | FileCheck %s ; PR1597 ; CHECK: Loop %bb: backedge-taken count is (-1 + (-1 * %x) + %y) diff --git a/llvm/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll b/llvm/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll index b65a525024474..f766674460dfc 100644 --- a/llvm/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll +++ b/llvm/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s ; PR1706 ; CHECK: backedge-taken count is 13 diff --git a/llvm/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll b/llvm/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll index c12721d82f011..ce6d298443f24 100644 --- a/llvm/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll +++ b/llvm/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; PR1810 define void @fun() { diff --git a/llvm/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll b/llvm/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll index 6ebfa61de41d1..33fc682d4b1fa 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s +; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s "-passes=print" -disable-output 2>&1 | FileCheck %s ; CHECK: Loop %header: backedge-taken count is (0 smax %n) diff --git a/llvm/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll b/llvm/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll index ce0329d9ce8c9..cec11a8f9fb8a 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s +; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s "-passes=print" -disable-output 2>&1 | FileCheck %s ; PR2002 ; CHECK: Loop %loop: backedge-taken count is (100 + (-100 smax %n)) diff --git a/llvm/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll b/llvm/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll index 527fd273cd25b..34c1f34f11fba 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; PR2003 ; CHECK: umax diff --git a/llvm/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll b/llvm/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll index 9a05d88c4ce7c..2ab3f6d7004d4 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s ; PR2364 ; CHECK: backedge-taken count is 61 diff --git a/llvm/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll b/llvm/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll index d503329292c7d..09b924c726a5a 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution 2>/dev/null +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>/dev/null +; RUN: opt < %s -disable-output "-passes=print" 2>/dev/null ; PR2433 define i32 @main1(i32 %argc, i8** %argv) nounwind { diff --git a/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll b/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll index 7e42530798f62..a0bb0978f65d7 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 2>&1 | FileCheck %s ; PR2261 ; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'foo' diff --git a/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll b/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll index c804bd905510c..964300fffe4eb 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 2>&1 | FileCheck %s ; PR2070 ; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'a' diff --git a/llvm/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll b/llvm/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll index ad34f6cedf61d..808f315173f3e 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s ; PR2088 ; CHECK: Unpredictable diff --git a/llvm/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll b/llvm/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll index 82b9d560425e0..c9fe1bbf6fc96 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s ; PR2088 ; CHECK: backedge-taken count is 113 diff --git a/llvm/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll b/llvm/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll index 75bd634b3ef12..d7c7e4ef12d7d 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s ; PR2607 define i32 @_Z1aj(i32 %j) nounwind { diff --git a/llvm/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll b/llvm/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll index d930706d7d2ac..1deb654f79de6 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s ; PR2607 define i32 @b(i32 %x, i32 %y) nounwind { diff --git a/llvm/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll b/llvm/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll index 3b31d797cf488..216988f9f8d2b 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s ; PR2621 define i32 @a() nounwind { diff --git a/llvm/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll b/llvm/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll index b296a19716c8b..edb083102aa45 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s ; PR2621 define i32 @a() nounwind { diff --git a/llvm/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll b/llvm/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll index 7722122117dc6..0f6eeb6c3ff60 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution +; RUN: opt < %s -disable-output -scalar-evolution ; PR1827 declare void @use(i32) diff --git a/llvm/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll b/llvm/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll index 2e2aabc475a0c..bcb6559d77a9c 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution +; RUN: opt < %s -disable-output -scalar-evolution ; PR2602 define i32 @a() nounwind { diff --git a/llvm/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll b/llvm/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll index 84561c5c6dc93..364f42ac0f91b 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 2>&1 | FileCheck %s ; CHECK: Loop %bb: backedge-taken count is (7 + (-1 * %argc)) diff --git a/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll b/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll index 7acf90c7330cb..d780feb1251e3 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; CHECK: Loop %bb: backedge-taken count is ((-5 + %x) /u 3) ; CHECK: Loop %bb: max backedge-taken count is 1431655764 diff --git a/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll b/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll index 2b2296a3a24fa..cece09305e0c6 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 2>&1 | FileCheck %s ; CHECK: Loop %bb: backedge-taken count is ((999 + (-1 * %x)) /u 3) ; CHECK: Loop %bb: max backedge-taken count is 334 diff --git a/llvm/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll b/llvm/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll index 0c24ee4eaff1f..abe1272b35e87 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; CHECK: backedge-taken count is 255 diff --git a/llvm/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll b/llvm/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll index 12254e37dcc77..0c90b60859c0f 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; CHECK: @f ; CHECK: Loop %bb16.preheader: backedge-taken count is (-1 + %c.idx.val) diff --git a/llvm/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll b/llvm/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll index 95aa1fc85e20c..9e24519f2d1ae 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 2>&1 | FileCheck %s ; XFAIL: * ; CHECK: (((-1 * %i0) + (100005 smax %i0)) /u 5) diff --git a/llvm/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll b/llvm/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll index 70588bc0574e8..97b686c676aa2 100644 --- a/llvm/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll +++ b/llvm/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 2>&1 | FileCheck %s ; CHECK: /u 5 diff --git a/llvm/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll b/llvm/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll index ebcecbf74294d..75b5162876361 100644 --- a/llvm/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll +++ b/llvm/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; PR3275 ; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'func_15' diff --git a/llvm/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll b/llvm/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll index a845465a26b51..16bb84e4b5040 100644 --- a/llvm/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll +++ b/llvm/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test1' ; CHECK-NOT: (trunc i{{.*}}ext diff --git a/llvm/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll b/llvm/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll index 4f6b90b39f6f2..487309bebd850 100644 --- a/llvm/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll +++ b/llvm/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; PR3171 ; CHECK: count is 2 diff --git a/llvm/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll b/llvm/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll index d18bdaf7cba73..6797d15dcb5af 100644 --- a/llvm/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll +++ b/llvm/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution +; RUN: opt < %s -disable-output -scalar-evolution ; PR4501 define void @test() { diff --git a/llvm/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll b/llvm/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll index 5a02398104186..2e28da170be54 100644 --- a/llvm/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll +++ b/llvm/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll @@ -1,4 +1,5 @@ -; RUN: opt -indvars -scalar-evolution -analyze < %s | FileCheck %s +; RUN: opt -indvars -scalar-evolution -analyze -enable-new-pm=0 < %s | FileCheck %s +; RUN: opt "-passes=loop(indvars),print" -disable-output < %s 2>&1 | FileCheck %s ; This test checks if the SCEV analysis is printed out at all. ; It failed once as the RequiredTransitive option was not implemented ; correctly. diff --git a/llvm/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll b/llvm/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll index 4f126fbf6b3e7..53d24eb2ee140 100644 --- a/llvm/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll +++ b/llvm/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll @@ -1,5 +1,6 @@ -; RUN: opt < %s -analyze -iv-users -; RUN: opt < %s -passes='print' +; RUN: opt < %s -analyze -enable-new-pm=0 -iv-users +; RUN: opt < %s -disable-output -iv-users +; RUN: opt < %s -passes='print' ; PR9633: Tests that SCEV handles the mul.i2 recurrence being folded to ; constant zero. diff --git a/llvm/test/Analysis/ScalarEvolution/2011-10-04-ConstEvolve.ll b/llvm/test/Analysis/ScalarEvolution/2011-10-04-ConstEvolve.ll index 29bb64ad642b9..32a65238db5e9 100644 --- a/llvm/test/Analysis/ScalarEvolution/2011-10-04-ConstEvolve.ll +++ b/llvm/test/Analysis/ScalarEvolution/2011-10-04-ConstEvolve.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; Exercise getConstantEvolvingPHIOperands on an interesting loop. ; This should complete in milliseconds, not minutes. diff --git a/llvm/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll b/llvm/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll index 853d43c4f875a..83682627a5445 100644 --- a/llvm/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll +++ b/llvm/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -iv-users -S -disable-output -; RUN: opt < %s -passes='require' -S -disable-output +; RUN: opt < %s -passes='require' -S -disable-output ; ; PR12868: Infinite recursion: ; getUDivExpr()->getZeroExtendExpr()->isLoopBackedgeGuardedBy() diff --git a/llvm/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll b/llvm/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll index f7ef0ea9e4847..f4175fc7979a1 100644 --- a/llvm/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll +++ b/llvm/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s ; PR1101 diff --git a/llvm/test/Analysis/ScalarEvolution/ZeroStep.ll b/llvm/test/Analysis/ScalarEvolution/ZeroStep.ll index fc6ed018e9033..9d3b872110014 100644 --- a/llvm/test/Analysis/ScalarEvolution/ZeroStep.ll +++ b/llvm/test/Analysis/ScalarEvolution/ZeroStep.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s -o - -S | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s -o - -S | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s -o - -S 2>&1 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll index e164b7fc5e431..93a3bf4d4c378 100644 --- a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll +++ b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -disable-output "-passes=print" 2>&1 | FileCheck %s ; Reduced from test-suite/MultiSource/Benchmarks/MiBench/office-ispell/correct.c ; getelementptr, obviously, takes pointer as it's base, and returns a pointer. diff --git a/llvm/test/Analysis/ScalarEvolution/add-like-or.ll b/llvm/test/Analysis/ScalarEvolution/add-like-or.ll index c0be633f2eb3c..73a82388d7e4d 100644 --- a/llvm/test/Analysis/ScalarEvolution/add-like-or.ll +++ b/llvm/test/Analysis/ScalarEvolution/add-like-or.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -disable-output "-passes=print" 2>&1 | FileCheck %s define i8 @or-of-constant-with-no-common-bits-set(i8 %x, i8 %y) { ; CHECK-LABEL: 'or-of-constant-with-no-common-bits-set' diff --git a/llvm/test/Analysis/ScalarEvolution/and-xor.ll b/llvm/test/Analysis/ScalarEvolution/and-xor.ll index 8217e1a1d91e6..aa57d9d043ba7 100644 --- a/llvm/test/Analysis/ScalarEvolution/and-xor.ll +++ b/llvm/test/Analysis/ScalarEvolution/and-xor.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s +; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s "-passes=print" -disable-output 2>&1 | FileCheck %s ; CHECK-LABEL: @test1 ; CHECK: --> (zext diff --git a/llvm/test/Analysis/ScalarEvolution/avoid-assume-hang.ll b/llvm/test/Analysis/ScalarEvolution/avoid-assume-hang.ll index e2428ed1f73f7..3a5abc46dbe6a 100644 --- a/llvm/test/Analysis/ScalarEvolution/avoid-assume-hang.ll +++ b/llvm/test/Analysis/ScalarEvolution/avoid-assume-hang.ll @@ -1,4 +1,5 @@ -; RUN: opt %s -always-inline | opt -analyze -scalar-evolution +; RUN: opt %s -always-inline | opt -analyze -enable-new-pm=0 -scalar-evolution +; RUN: opt %s -always-inline 2>&1 | opt -disable-output -scalar-evolution ; There was optimization bug in ScalarEvolution, that causes too long ; compute time and stack overflow crash. @@ -136,4 +137,4 @@ loop: exit: ret void -} \ No newline at end of file +} diff --git a/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll b/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll index 0976ef92985ce..20a42320883d5 100644 --- a/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll +++ b/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution +; RUN: opt < %s -disable-output -scalar-evolution ; PR4537 ; ModuleID = 'b.bc' diff --git a/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll b/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll index 8e519d409edeb..7f1cc17c924f8 100644 --- a/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll +++ b/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -iv-users -; RUN: opt < %s -passes='require' +; RUN: opt < %s -passes='require' ; PR4538 ; ModuleID = 'bugpoint-reduced-simplified.bc' diff --git a/llvm/test/Analysis/ScalarEvolution/avoid-smax-0.ll b/llvm/test/Analysis/ScalarEvolution/avoid-smax-0.ll index a282ee6993f01..f3603de2afadf 100644 --- a/llvm/test/Analysis/ScalarEvolution/avoid-smax-0.ll +++ b/llvm/test/Analysis/ScalarEvolution/avoid-smax-0.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s +; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s "-passes=print" -disable-output 2>&1 | FileCheck %s ; CHECK: Loop %bb3: backedge-taken count is (-1 + %n) diff --git a/llvm/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/llvm/test/Analysis/ScalarEvolution/avoid-smax-1.ll index 50c30431af585..52fc3285e3b8c 100644 --- a/llvm/test/Analysis/ScalarEvolution/avoid-smax-1.ll +++ b/llvm/test/Analysis/ScalarEvolution/avoid-smax-1.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution -S | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -S | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -S 2>&1 | FileCheck %s ; Indvars should be able to find the trip count for the bb6 loop ; without using a maximum calculation (icmp, select) because it should diff --git a/llvm/test/Analysis/ScalarEvolution/binomial-explision.ll b/llvm/test/Analysis/ScalarEvolution/binomial-explision.ll index ff27bfcbd764e..7513378bf2a4b 100644 --- a/llvm/test/Analysis/ScalarEvolution/binomial-explision.ll +++ b/llvm/test/Analysis/ScalarEvolution/binomial-explision.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" diff --git a/llvm/test/Analysis/ScalarEvolution/constant_condition.ll b/llvm/test/Analysis/ScalarEvolution/constant_condition.ll index 32ab91b2c857b..f4dee5da32b17 100644 --- a/llvm/test/Analysis/ScalarEvolution/constant_condition.ll +++ b/llvm/test/Analysis/ScalarEvolution/constant_condition.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s define i32 @branch_true(i32 %x, i32 %y) { ; CHECK-LABEL: Classifying expressions for: @branch_true diff --git a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll index 1af50352e3ff5..074d150b1e038 100644 --- a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll +++ b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; This test set ensures that we can correctly operate with recurrencies from ; different loops. diff --git a/llvm/test/Analysis/ScalarEvolution/div-overflow.ll b/llvm/test/Analysis/ScalarEvolution/div-overflow.ll index aca964ae62b6e..f05e87e29f33b 100644 --- a/llvm/test/Analysis/ScalarEvolution/div-overflow.ll +++ b/llvm/test/Analysis/ScalarEvolution/div-overflow.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s +; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s "-passes=print" -disable-output 2>&1 | FileCheck %s ; CHECK: --> ((-128 * %a) /u -128) diff --git a/llvm/test/Analysis/ScalarEvolution/do-loop.ll b/llvm/test/Analysis/ScalarEvolution/do-loop.ll index e35ea7d57e3a5..1cbd3719ea9f0 100644 --- a/llvm/test/Analysis/ScalarEvolution/do-loop.ll +++ b/llvm/test/Analysis/ScalarEvolution/do-loop.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; PR1614 ; CHECK: smax diff --git a/llvm/test/Analysis/ScalarEvolution/exact-exit-count-more-precise.ll b/llvm/test/Analysis/ScalarEvolution/exact-exit-count-more-precise.ll index 8b1f878b1c0a7..fa49bf39fae1c 100644 --- a/llvm/test/Analysis/ScalarEvolution/exact-exit-count-more-precise.ll +++ b/llvm/test/Analysis/ScalarEvolution/exact-exit-count-more-precise.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" %s 2>&1 | FileCheck %s target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/exact_iter_count.ll b/llvm/test/Analysis/ScalarEvolution/exact_iter_count.ll index 443da146e771d..e4f4ef3147b44 100644 --- a/llvm/test/Analysis/ScalarEvolution/exact_iter_count.ll +++ b/llvm/test/Analysis/ScalarEvolution/exact_iter_count.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s +; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s "-passes=print" -disable-output 2>&1 | FileCheck %s ; One side exit dominating the latch, exact backedge taken count is known. define void @test_01() { diff --git a/llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll b/llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll index 16d9cc5443872..83659ceea685c 100644 --- a/llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll +++ b/llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/exponential-behavior.ll b/llvm/test/Analysis/ScalarEvolution/exponential-behavior.ll index 919521a58b6d1..867a4d08b7762 100644 --- a/llvm/test/Analysis/ScalarEvolution/exponential-behavior.ll +++ b/llvm/test/Analysis/ScalarEvolution/exponential-behavior.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'f': diff --git a/llvm/test/Analysis/ScalarEvolution/extract-highbits-sameconstmask.ll b/llvm/test/Analysis/ScalarEvolution/extract-highbits-sameconstmask.ll index a4df455e2abb7..b6b8777a92d0e 100644 --- a/llvm/test/Analysis/ScalarEvolution/extract-highbits-sameconstmask.ll +++ b/llvm/test/Analysis/ScalarEvolution/extract-highbits-sameconstmask.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; The obvious case. define i32 @div(i32 %val) nounwind { diff --git a/llvm/test/Analysis/ScalarEvolution/extract-highbits-variablemask.ll b/llvm/test/Analysis/ScalarEvolution/extract-highbits-variablemask.ll index 56d86f0309e9d..d4ab8d8bef1f8 100644 --- a/llvm/test/Analysis/ScalarEvolution/extract-highbits-variablemask.ll +++ b/llvm/test/Analysis/ScalarEvolution/extract-highbits-variablemask.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; These testcases aren't *identical* but they have the same/similar meaning. diff --git a/llvm/test/Analysis/ScalarEvolution/extract-lowbits-sameconstmask.ll b/llvm/test/Analysis/ScalarEvolution/extract-lowbits-sameconstmask.ll index cb7af18a8c1fa..079298630ea1a 100644 --- a/llvm/test/Analysis/ScalarEvolution/extract-lowbits-sameconstmask.ll +++ b/llvm/test/Analysis/ScalarEvolution/extract-lowbits-sameconstmask.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; The obvious case. define i32 @mul(i32 %val) nounwind { diff --git a/llvm/test/Analysis/ScalarEvolution/extract-lowbits-variablemask.ll b/llvm/test/Analysis/ScalarEvolution/extract-lowbits-variablemask.ll index cd73cf366b8a0..88ccd1a63141b 100644 --- a/llvm/test/Analysis/ScalarEvolution/extract-lowbits-variablemask.ll +++ b/llvm/test/Analysis/ScalarEvolution/extract-lowbits-variablemask.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; These testcases aren't *identical* but they have the same/similar meaning. diff --git a/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll b/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll index d2f9f2b979509..8c1a2e76315af 100644 --- a/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll +++ b/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; Make sure poison value tracking works in the presence of @llvm.dbg ; intrinsics. Unfortunately, I was not able to reduce this file diff --git a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll index 122d7dc5f1718..49f8af1554bda 100644 --- a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll +++ b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -disable-output "-passes=print" 2>&1 | FileCheck %s ; Positive and negative tests for inferring flags like nsw from ; reasoning about how a poison value from overflow would trigger diff --git a/llvm/test/Analysis/ScalarEvolution/flattened-0.ll b/llvm/test/Analysis/ScalarEvolution/flattened-0.ll index e6614ffd6467e..dc4cb06e4ca95 100644 --- a/llvm/test/Analysis/ScalarEvolution/flattened-0.ll +++ b/llvm/test/Analysis/ScalarEvolution/flattened-0.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s +; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s "-passes=print" -disable-output 2>&1 | FileCheck %s define void @foo([7 x i8]* %a) { ; CHECK-LABEL: @foo diff --git a/llvm/test/Analysis/ScalarEvolution/fold.ll b/llvm/test/Analysis/ScalarEvolution/fold.ll index 1006b9f81d6c5..d55651cef3c64 100644 --- a/llvm/test/Analysis/ScalarEvolution/fold.ll +++ b/llvm/test/Analysis/ScalarEvolution/fold.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution -S < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -S < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" -S < %s 2>&1 | FileCheck %s define i16 @test1(i8 %x) { %A = zext i8 %x to i12 diff --git a/llvm/test/Analysis/ScalarEvolution/how-far-to-zero.ll b/llvm/test/Analysis/ScalarEvolution/how-far-to-zero.ll index 07af88ffbebeb..7c9b25da322c8 100644 --- a/llvm/test/Analysis/ScalarEvolution/how-far-to-zero.ll +++ b/llvm/test/Analysis/ScalarEvolution/how-far-to-zero.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; PR13228 define void @f() nounwind uwtable readnone { diff --git a/llvm/test/Analysis/ScalarEvolution/huge_expression_limit.ll b/llvm/test/Analysis/ScalarEvolution/huge_expression_limit.ll index 5740915783189..e72c3b755032b 100644 --- a/llvm/test/Analysis/ScalarEvolution/huge_expression_limit.ll +++ b/llvm/test/Analysis/ScalarEvolution/huge_expression_limit.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-huge-expr-threshold=1 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-huge-expr-threshold=1 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-huge-expr-threshold=1 2>&1 | FileCheck %s define void @test(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) { ; CHECK-LABEL: 'test' diff --git a/llvm/test/Analysis/ScalarEvolution/implied-via-division.ll b/llvm/test/Analysis/ScalarEvolution/implied-via-division.ll index 43f4c04fa9275..3ff9dc276e1c8 100644 --- a/llvm/test/Analysis/ScalarEvolution/implied-via-division.ll +++ b/llvm/test/Analysis/ScalarEvolution/implied-via-division.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s declare void @llvm.experimental.guard(i1, ...) diff --git a/llvm/test/Analysis/ScalarEvolution/incorrect-nsw.ll b/llvm/test/Analysis/ScalarEvolution/incorrect-nsw.ll index dd981c404ccde..9b0c3a6aba66d 100644 --- a/llvm/test/Analysis/ScalarEvolution/incorrect-nsw.ll +++ b/llvm/test/Analysis/ScalarEvolution/incorrect-nsw.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print,print" < %s 2>&1 | FileCheck %s define void @bad.nsw() { ; CHECK-LABEL: Classifying expressions for: @bad.nsw diff --git a/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll b/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll index 249698d36ed5e..d4fda88bd0bd1 100644 --- a/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll +++ b/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s define void @f0(i1 %c) { ; CHECK-LABEL: Classifying expressions for: @f0 diff --git a/llvm/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll b/llvm/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll index 318078ebf6ae1..8e698685ac193 100644 --- a/llvm/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll +++ b/llvm/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll @@ -1,4 +1,5 @@ -; ; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; ; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; ; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s define void @infer.sext.0(i1* %c, i32 %start, i32* %buf) { ; CHECK-LABEL: Classifying expressions for: @infer.sext.0 diff --git a/llvm/test/Analysis/ScalarEvolution/inner-loop-by-latch-cond-unknown.ll b/llvm/test/Analysis/ScalarEvolution/inner-loop-by-latch-cond-unknown.ll index cf75d1fb8cf01..766d18e3d5fd6 100644 --- a/llvm/test/Analysis/ScalarEvolution/inner-loop-by-latch-cond-unknown.ll +++ b/llvm/test/Analysis/ScalarEvolution/inner-loop-by-latch-cond-unknown.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -iv-users -S | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -iv-users -S | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -S 2>&1 | FileCheck %s ; This is a regression test for the commit rL327362. diff --git a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll index 6057270f50944..8837b305e7ac0 100644 --- a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll +++ b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll @@ -1,4 +1,5 @@ -; RUN: opt -scalar-evolution-max-arith-depth=0 -scalar-evolution-max-cast-depth=0 -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -scalar-evolution-max-arith-depth=0 -scalar-evolution-max-cast-depth=0 -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -scalar-evolution-max-arith-depth=0 -scalar-evolution-max-cast-depth=0 -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; Check that depth set to 0 prevents getAddExpr and getMulExpr from making ; transformations in SCEV. We expect the result to be very straightforward. diff --git a/llvm/test/Analysis/ScalarEvolution/load-with-range-metadata.ll b/llvm/test/Analysis/ScalarEvolution/load-with-range-metadata.ll index f26c8d56754d1..68793cc15ad25 100644 --- a/llvm/test/Analysis/ScalarEvolution/load-with-range-metadata.ll +++ b/llvm/test/Analysis/ScalarEvolution/load-with-range-metadata.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s define i32 @slt_trip_count_with_range(i32 *%ptr0, i32 *%ptr1) { ; CHECK-LABEL: slt_trip_count_with_range diff --git a/llvm/test/Analysis/ScalarEvolution/load.ll b/llvm/test/Analysis/ScalarEvolution/load.ll index ea79476de6b31..6d9745bd76efb 100644 --- a/llvm/test/Analysis/ScalarEvolution/load.ll +++ b/llvm/test/Analysis/ScalarEvolution/load.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" target triple = "i386-pc-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/lshr-shl-differentconstmask.ll b/llvm/test/Analysis/ScalarEvolution/lshr-shl-differentconstmask.ll index c7fda08fc713b..c8285fbd4d229 100644 --- a/llvm/test/Analysis/ScalarEvolution/lshr-shl-differentconstmask.ll +++ b/llvm/test/Analysis/ScalarEvolution/lshr-shl-differentconstmask.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; The obvious case. define i32 @udiv_biggerLshr(i32 %val) nounwind { diff --git a/llvm/test/Analysis/ScalarEvolution/max-addops-inline.ll b/llvm/test/Analysis/ScalarEvolution/max-addops-inline.ll index 2701ed32839f2..7c8d44c89aef5 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-addops-inline.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-addops-inline.ll @@ -1,5 +1,7 @@ -; RUN: opt -analyze -scalar-evolution -scev-addops-inline-threshold=1 < %s | FileCheck --check-prefix=CHECK1 %s -; RUN: opt -analyze -scalar-evolution -scev-addops-inline-threshold=10 < %s | FileCheck --check-prefix=CHECK10 %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -scev-addops-inline-threshold=1 < %s | FileCheck --check-prefix=CHECK1 %s +; RUN: opt -disable-output "-passes=print" -scev-addops-inline-threshold=1 < %s 2>&1 | FileCheck --check-prefix=CHECK1 %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -scev-addops-inline-threshold=10 < %s | FileCheck --check-prefix=CHECK10 %s +; RUN: opt -disable-output "-passes=print" -scev-addops-inline-threshold=10 < %s 2>&1 | FileCheck --check-prefix=CHECK10 %s define i32 @foo(i64 %p0, i32 %p1) { ; CHECK1: %add2 = add nsw i32 %mul1, %add diff --git a/llvm/test/Analysis/ScalarEvolution/max-addrec-size.ll b/llvm/test/Analysis/ScalarEvolution/max-addrec-size.ll index aad0ddda37bcf..c37d68d4ee7f0 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-addrec-size.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-addrec-size.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution -scalar-evolution-max-add-rec-size=3 < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-add-rec-size=3 < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" -scalar-evolution-max-add-rec-size=3 < %s 2>&1 | FileCheck %s ; Show that we are able to avoid creation of huge SCEVs by capping the max ; AddRec size. diff --git a/llvm/test/Analysis/ScalarEvolution/max-be-count-not-constant.ll b/llvm/test/Analysis/ScalarEvolution/max-be-count-not-constant.ll index b593fc269a7b0..f05d4ff0f2f1d 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-be-count-not-constant.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-be-count-not-constant.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll b/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll index a0fa4a9d21625..8d401c3bb70fd 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; SCEV would take a long time to compute SCEV expressions for this IR. If SCEV ; finishes in < 1 second then the bug is fixed. diff --git a/llvm/test/Analysis/ScalarEvolution/max-mulops-inline.ll b/llvm/test/Analysis/ScalarEvolution/max-mulops-inline.ll index c0dc6e012c123..ec21ec3c31f06 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-mulops-inline.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-mulops-inline.ll @@ -1,5 +1,7 @@ -; RUN: opt -analyze -scalar-evolution -scev-mulops-inline-threshold=1 < %s | FileCheck --check-prefix=CHECK1 %s -; RUN: opt -analyze -scalar-evolution -scev-mulops-inline-threshold=10 < %s | FileCheck --check-prefix=CHECK10 %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -scev-mulops-inline-threshold=1 < %s | FileCheck --check-prefix=CHECK1 %s +; RUN: opt -disable-output "-passes=print" -scev-mulops-inline-threshold=1 < %s 2>&1 | FileCheck --check-prefix=CHECK1 %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -scev-mulops-inline-threshold=10 < %s | FileCheck --check-prefix=CHECK10 %s +; RUN: opt -disable-output "-passes=print" -scev-mulops-inline-threshold=10 < %s 2>&1 | FileCheck --check-prefix=CHECK10 %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll b/llvm/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll index 5260fe90e6b7d..aaae7ace00237 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; ScalarEvolution should be able to understand the loop and eliminate the casts. diff --git a/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll index e3ba313a690b8..c266d1621ac2c 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; ScalarEvolution should be able to understand the loop and eliminate the casts. diff --git a/llvm/test/Analysis/ScalarEvolution/merge-add-rec-many-inputs.ll b/llvm/test/Analysis/ScalarEvolution/merge-add-rec-many-inputs.ll index 7a18dcda423e3..3a3ac55699419 100644 --- a/llvm/test/Analysis/ScalarEvolution/merge-add-rec-many-inputs.ll +++ b/llvm/test/Analysis/ScalarEvolution/merge-add-rec-many-inputs.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; Check that isImpliedViaMerge wouldn't crash when trying to prove ; SCEVUnknown and AddRec with phi having many inputs diff --git a/llvm/test/Analysis/ScalarEvolution/min-max-exprs.ll b/llvm/test/Analysis/ScalarEvolution/min-max-exprs.ll index 51f72c643cc6b..3cc515b957ca9 100644 --- a/llvm/test/Analysis/ScalarEvolution/min-max-exprs.ll +++ b/llvm/test/Analysis/ScalarEvolution/min-max-exprs.ll @@ -1,4 +1,5 @@ -; RUN: opt -scalar-evolution -analyze < %s | FileCheck %s +; RUN: opt -scalar-evolution -analyze -enable-new-pm=0 < %s | FileCheck %s +; RUN: opt "-passes=print" -disable-output < %s 2>&1 | FileCheck %s ; ; This checks if the min and max expressions are properly recognized by ; ScalarEvolution even though they the ICmpInst and SelectInst have different diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll index 8ce80ee14118a..5a7bb3c9e5cd5 100644 --- a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll +++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll @@ -1,4 +1,5 @@ -; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -disable-output "-passes=print" < %s 2>&1 | FileCheck %s !0 = !{i8 0, i8 127} diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll index bc01f22b3f308..8a7e0bc34576b 100644 --- a/llvm/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll +++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s declare void @llvm.experimental.guard(i1, ...) declare void @llvm.assume(i1) diff --git a/llvm/test/Analysis/ScalarEvolution/non-IV-phi.ll b/llvm/test/Analysis/ScalarEvolution/non-IV-phi.ll index f0d6c2f5d9d33..9f18719eea3c6 100644 --- a/llvm/test/Analysis/ScalarEvolution/non-IV-phi.ll +++ b/llvm/test/Analysis/ScalarEvolution/non-IV-phi.ll @@ -1,4 +1,5 @@ -; RUN: opt -scalar-evolution -analyze < %s | FileCheck %s +; RUN: opt -scalar-evolution -analyze -enable-new-pm=0 < %s | FileCheck %s +; RUN: opt "-passes=print" -disable-output < %s 2>&1 | FileCheck %s define void @test1(i8 %t, i32 %len) { ; CHECK-LABEL: test1 diff --git a/llvm/test/Analysis/ScalarEvolution/nowrap-preinc-limits.ll b/llvm/test/Analysis/ScalarEvolution/nowrap-preinc-limits.ll index fa5ab82e064c6..650eb8a6fa3ed 100644 --- a/llvm/test/Analysis/ScalarEvolution/nowrap-preinc-limits.ll +++ b/llvm/test/Analysis/ScalarEvolution/nowrap-preinc-limits.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s define void @f(i1* %condition) { ; CHECK-LABEL: Classifying expressions for: @f diff --git a/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll b/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll index 3ef31ff5581bd..b6867ca471f7a 100644 --- a/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll +++ b/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -disable-output "-passes=print" 2>&1 | FileCheck %s ; ScalarEvolution should be able to fold away the sign-extensions ; on this loop with a primary induction variable incremented with diff --git a/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll b/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll index a5c4b575f1365..0310ff341516b 100644 --- a/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll +++ b/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -disable-output "-passes=print" 2>&1 | FileCheck %s ; ScalarEvolution should be able to fold away the sign-extensions ; on this loop with a primary induction variable incremented with diff --git a/llvm/test/Analysis/ScalarEvolution/nsw.ll b/llvm/test/Analysis/ScalarEvolution/nsw.ll index cb48aa9d23fc3..39f199868eaa5 100644 --- a/llvm/test/Analysis/ScalarEvolution/nsw.ll +++ b/llvm/test/Analysis/ScalarEvolution/nsw.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; The addrecs in this loop are analyzable only by using nsw information. diff --git a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll index c58a034578e12..7d6e87eef805c 100644 --- a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s declare { i16, i1 } @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) nounwind readnone diff --git a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics.ll b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics.ll index fb3d816c9fab4..02ed4d88c808e 100644 --- a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics.ll +++ b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/pointer-sign-bits.ll b/llvm/test/Analysis/ScalarEvolution/pointer-sign-bits.ll index b2cec2d9fc892..ed6b478962877 100644 --- a/llvm/test/Analysis/ScalarEvolution/pointer-sign-bits.ll +++ b/llvm/test/Analysis/ScalarEvolution/pointer-sign-bits.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution +; RUN: opt < %s -disable-output -scalar-evolution target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" %JavaObject = type { [0 x i32 (...)*]*, i8* } diff --git a/llvm/test/Analysis/ScalarEvolution/pr22179.ll b/llvm/test/Analysis/ScalarEvolution/pr22179.ll index 21ed055dc7b2c..8bea883cdc5b2 100644 --- a/llvm/test/Analysis/ScalarEvolution/pr22179.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr22179.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s %struct.anon = type { i8 } %struct.S = type { i32 } diff --git a/llvm/test/Analysis/ScalarEvolution/pr22641.ll b/llvm/test/Analysis/ScalarEvolution/pr22641.ll index 3b55afe084545..6c824e47a4eb8 100644 --- a/llvm/test/Analysis/ScalarEvolution/pr22641.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr22641.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s define i1 @main(i16 %a) { ; CHECK-LABEL: Classifying expressions for: @main diff --git a/llvm/test/Analysis/ScalarEvolution/pr24757.ll b/llvm/test/Analysis/ScalarEvolution/pr24757.ll index 83baade34ad3a..981661597d576 100644 --- a/llvm/test/Analysis/ScalarEvolution/pr24757.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr24757.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; CHECK: Loop %bb1: backedge-taken count is ((2 * %a.promoted) /u 2) diff --git a/llvm/test/Analysis/ScalarEvolution/pr25369.ll b/llvm/test/Analysis/ScalarEvolution/pr25369.ll index 10754867a3683..6378d7443e316 100644 --- a/llvm/test/Analysis/ScalarEvolution/pr25369.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr25369.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/pr27315.ll b/llvm/test/Analysis/ScalarEvolution/pr27315.ll index 8f5f79df563b9..697d90a1eeef3 100644 --- a/llvm/test/Analysis/ScalarEvolution/pr27315.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr27315.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s declare i1 @use(i64) diff --git a/llvm/test/Analysis/ScalarEvolution/pr34538.ll b/llvm/test/Analysis/ScalarEvolution/pr34538.ll index abef58e496822..55fd76c9882c8 100644 --- a/llvm/test/Analysis/ScalarEvolution/pr34538.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr34538.ll @@ -1,5 +1,7 @@ -; RUN: opt -scalar-evolution -loop-deletion -simplifycfg -analyze < %s | FileCheck %s --check-prefix=CHECK-ANALYSIS-1 -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s --check-prefix=CHECK-ANALYSIS-2 +; RUN: opt -scalar-evolution -loop-deletion -simplifycfg -analyze -enable-new-pm=0 < %s | FileCheck %s --check-prefix=CHECK-ANALYSIS-1 +; RUN: opt "-passes=print,loop(loop-deletion),simplifycfg" -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ANALYSIS-1 +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s --check-prefix=CHECK-ANALYSIS-2 +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ANALYSIS-2 define i32 @pr34538() local_unnamed_addr #0 { ; CHECK-ANALYSIS-1: Loop %do.body: backedge-taken count is 10000 diff --git a/llvm/test/Analysis/ScalarEvolution/pr40420.ll b/llvm/test/Analysis/ScalarEvolution/pr40420.ll index 0f57958c24c6d..a1654af5cae82 100644 --- a/llvm/test/Analysis/ScalarEvolution/pr40420.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr40420.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 2>&1 | FileCheck %s ; REQUIRES: asserts define void @test(i8 %tmp6) { diff --git a/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll index b07662ed95f5a..3a30733dc7524 100644 --- a/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Analysis/ScalarEvolution/range-signedness.ll b/llvm/test/Analysis/ScalarEvolution/range-signedness.ll index d04fc9eb56baa..bfbbe3668cb37 100644 --- a/llvm/test/Analysis/ScalarEvolution/range-signedness.ll +++ b/llvm/test/Analysis/ScalarEvolution/range-signedness.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s define void @x(i1* %cond) { ; CHECK-LABEL: Classifying expressions for: @x diff --git a/llvm/test/Analysis/ScalarEvolution/range_nw_flag.ll b/llvm/test/Analysis/ScalarEvolution/range_nw_flag.ll index 0a0b5fb5eb8c6..b49d473eb394a 100644 --- a/llvm/test/Analysis/ScalarEvolution/range_nw_flag.ll +++ b/llvm/test/Analysis/ScalarEvolution/range_nw_flag.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -disable-output "-passes=print" 2>&1 | FileCheck %s ; copied from flags-from-poison.ll ; CHECK-LABEL: @test-add-nuw diff --git a/llvm/test/Analysis/ScalarEvolution/returned.ll b/llvm/test/Analysis/ScalarEvolution/returned.ll index 4c07cd1346657..e0079e8810b75 100644 --- a/llvm/test/Analysis/ScalarEvolution/returned.ll +++ b/llvm/test/Analysis/ScalarEvolution/returned.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -disable-output "-passes=print" 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" define i8* @foo(i32 %no, i8* nocapture %d) nounwind { diff --git a/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll b/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll index 3adb2e0b1a068..34758e11c0a20 100644 --- a/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll +++ b/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll @@ -1,4 +1,5 @@ -; RUN: opt -scalar-evolution -analyze < %s | FileCheck %s +; RUN: opt -scalar-evolution -analyze -enable-new-pm=0 < %s | FileCheck %s +; RUN: opt "-passes=print" -disable-output < %s 2>&1 | FileCheck %s ; CHECK: %1 = getelementptr , * null, i32 3 ; CHECK: --> (3 * sizeof()) U: [0,-15) S: [-9223372036854775808,9223372036854775793) diff --git a/llvm/test/Analysis/ScalarEvolution/scev-dispositions.ll b/llvm/test/Analysis/ScalarEvolution/scev-dispositions.ll index 4e382a93cda95..94074111f9f7b 100644 --- a/llvm/test/Analysis/ScalarEvolution/scev-dispositions.ll +++ b/llvm/test/Analysis/ScalarEvolution/scev-dispositions.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s define void @single_loop(i32* %buf, i32 %start) { ; CHECK-LABEL: Classifying expressions for: @single_loop diff --git a/llvm/test/Analysis/ScalarEvolution/scev-prestart-nowrap.ll b/llvm/test/Analysis/ScalarEvolution/scev-prestart-nowrap.ll index 77f3482f03c06..89698b65f28a7 100644 --- a/llvm/test/Analysis/ScalarEvolution/scev-prestart-nowrap.ll +++ b/llvm/test/Analysis/ScalarEvolution/scev-prestart-nowrap.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; An example run where SCEV(%postinc)->getStart() may overflow: ; diff --git a/llvm/test/Analysis/ScalarEvolution/sdiv.ll b/llvm/test/Analysis/ScalarEvolution/sdiv.ll index 106cda1b7f0ff..89a3e77564ae5 100644 --- a/llvm/test/Analysis/ScalarEvolution/sdiv.ll +++ b/llvm/test/Analysis/ScalarEvolution/sdiv.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s +; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s "-passes=print" -disable-output 2>&1 | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/sext-inreg.ll b/llvm/test/Analysis/ScalarEvolution/sext-inreg.ll index cc738a17bc471..ec473ab027145 100644 --- a/llvm/test/Analysis/ScalarEvolution/sext-inreg.ll +++ b/llvm/test/Analysis/ScalarEvolution/sext-inreg.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9.6" diff --git a/llvm/test/Analysis/ScalarEvolution/sext-iv-0.ll b/llvm/test/Analysis/ScalarEvolution/sext-iv-0.ll index 5634078d2b0e4..3bbd1d238753d 100644 --- a/llvm/test/Analysis/ScalarEvolution/sext-iv-0.ll +++ b/llvm/test/Analysis/ScalarEvolution/sext-iv-0.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s +; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s "-passes=print" -disable-output 2>&1 | FileCheck %s ; Convert (sext {-128,+,1}) to {sext(-128),+,sext(1)}, since the ; trip count is within range where this is safe. diff --git a/llvm/test/Analysis/ScalarEvolution/sext-iv-1.ll b/llvm/test/Analysis/ScalarEvolution/sext-iv-1.ll index 575b744a1a40b..a198bcb2e8fdb 100644 --- a/llvm/test/Analysis/ScalarEvolution/sext-iv-1.ll +++ b/llvm/test/Analysis/ScalarEvolution/sext-iv-1.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -scalar-evolution -analyze \ -; RUN: | FileCheck %s +; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s "-passes=print" -disable-output 2>&1 | FileCheck %s ; CHECK: --> (sext i{{.}} {{{.*}},+,{{.*}}}<%bb1> to i64) ; CHECK: --> (sext i{{.}} {{{.*}},+,{{.*}}}<%bb1> to i64) diff --git a/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll b/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll index 8749ff3987faa..b84c13938dfae 100644 --- a/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll +++ b/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; CHECK: %tmp3 = sext i8 %tmp2 to i32 ; CHECK: --> (sext i8 {0,+,1}<%bb1> to i32){{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: -1 diff --git a/llvm/test/Analysis/ScalarEvolution/sext-mul.ll b/llvm/test/Analysis/ScalarEvolution/sext-mul.ll index 4a10749819712..b3a8dca7902f7 100644 --- a/llvm/test/Analysis/ScalarEvolution/sext-mul.ll +++ b/llvm/test/Analysis/ScalarEvolution/sext-mul.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; CHECK: %tmp9 = shl i64 %tmp8, 33 ; CHECK-NEXT: --> {{.*}} Exits: (-8589934592 + (8589934592 * (zext i32 %arg2 to i64))) diff --git a/llvm/test/Analysis/ScalarEvolution/sext-to-zext.ll b/llvm/test/Analysis/ScalarEvolution/sext-to-zext.ll index ca9c6de0d50d5..4cda4c7497e3b 100644 --- a/llvm/test/Analysis/ScalarEvolution/sext-to-zext.ll +++ b/llvm/test/Analysis/ScalarEvolution/sext-to-zext.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s define void @f(i1 %c) { ; CHECK-LABEL: Classifying expressions for: @f diff --git a/llvm/test/Analysis/ScalarEvolution/sext-zero.ll b/llvm/test/Analysis/ScalarEvolution/sext-zero.ll index cac42638e9592..4c6abd052df48 100644 --- a/llvm/test/Analysis/ScalarEvolution/sext-zero.ll +++ b/llvm/test/Analysis/ScalarEvolution/sext-zero.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; CHECK: %tmp9 = shl i64 %tmp8, 33 ; CHECK-NEXT: --> {{.*}} Exits: (-8589934592 + (8589934592 * (zext i32 %arg2 to i64))) diff --git a/llvm/test/Analysis/ScalarEvolution/shift-op.ll b/llvm/test/Analysis/ScalarEvolution/shift-op.ll index ae13b2879df85..e3e63d5a9901d 100644 --- a/llvm/test/Analysis/ScalarEvolution/shift-op.ll +++ b/llvm/test/Analysis/ScalarEvolution/shift-op.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s define void @test0(i32 %init) { ; CHECK-LABEL: Classifying expressions for: @test0 diff --git a/llvm/test/Analysis/ScalarEvolution/shl-lshr-differentconstmask.ll b/llvm/test/Analysis/ScalarEvolution/shl-lshr-differentconstmask.ll index 1886848b8be93..373dd7d666e58 100644 --- a/llvm/test/Analysis/ScalarEvolution/shl-lshr-differentconstmask.ll +++ b/llvm/test/Analysis/ScalarEvolution/shl-lshr-differentconstmask.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; The obvious case. define i32 @mul_biggerShl(i32 %val) nounwind { diff --git a/llvm/test/Analysis/ScalarEvolution/sle.ll b/llvm/test/Analysis/ScalarEvolution/sle.ll index f24c4807114fd..3e208c280b2e8 100644 --- a/llvm/test/Analysis/ScalarEvolution/sle.ll +++ b/llvm/test/Analysis/ScalarEvolution/sle.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; ScalarEvolution should be able to use nsw information to prove that ; this loop has a finite trip count. diff --git a/llvm/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll b/llvm/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll index dc24bd1b80478..62d9c7dc1d5c2 100644 --- a/llvm/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll +++ b/llvm/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s define i32 @f0(i32 %x, i32 %y) { ; CHECK-LABEL: Classifying expressions for: @f0 diff --git a/llvm/test/Analysis/ScalarEvolution/smax.ll b/llvm/test/Analysis/ScalarEvolution/smax.ll index 122e9e47e56f4..2b2c81c8e90df 100644 --- a/llvm/test/Analysis/ScalarEvolution/smax.ll +++ b/llvm/test/Analysis/ScalarEvolution/smax.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; PR1614 ; CHECK: --> (%a smax %b) diff --git a/llvm/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll b/llvm/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll index 7ed0dadca252b..525b8df764b9f 100644 --- a/llvm/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll +++ b/llvm/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/solve-quadratic-overflow.ll b/llvm/test/Analysis/ScalarEvolution/solve-quadratic-overflow.ll index 0f18b8df69c6f..331bf31e9e78d 100644 --- a/llvm/test/Analysis/ScalarEvolution/solve-quadratic-overflow.ll +++ b/llvm/test/Analysis/ScalarEvolution/solve-quadratic-overflow.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution -S < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -S < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" -S < %s 2>&1 | FileCheck %s ; The exit value from this loop was originally calculated as 0. ; The actual exit condition is 256*256 == 0 (in i16). diff --git a/llvm/test/Analysis/ScalarEvolution/solve-quadratic.ll b/llvm/test/Analysis/ScalarEvolution/solve-quadratic.ll index 5d5e02efd0e4a..e23d0ab73c409 100644 --- a/llvm/test/Analysis/ScalarEvolution/solve-quadratic.ll +++ b/llvm/test/Analysis/ScalarEvolution/solve-quadratic.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution -S -debug-only=scalar-evolution,apint < %s 2>&1 | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -S -debug-only=scalar-evolution,apint < %s 2>&1 | FileCheck %s +; RUN: opt -disable-output "-passes=print" -S -debug-only=scalar-evolution,apint < %s 2>&1 2>&1 | FileCheck %s ; REQUIRES: asserts ; Use the following template to get a chrec {L,+,M,+,N}. diff --git a/llvm/test/Analysis/ScalarEvolution/srem.ll b/llvm/test/Analysis/ScalarEvolution/srem.ll index 6debab34e3b31..197437b51ca12 100644 --- a/llvm/test/Analysis/ScalarEvolution/srem.ll +++ b/llvm/test/Analysis/ScalarEvolution/srem.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s +; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s "-passes=print" -disable-output 2>&1 | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/strip-injective-zext.ll b/llvm/test/Analysis/ScalarEvolution/strip-injective-zext.ll index b618b71a358de..353e3d40418fd 100644 --- a/llvm/test/Analysis/ScalarEvolution/strip-injective-zext.ll +++ b/llvm/test/Analysis/ScalarEvolution/strip-injective-zext.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; The initial SCEV for the backedge count is ; (zext i2 {(trunc i32 (1 + %a1) to i2),+,1}<%b2> to i32). diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-andor.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-andor.ll index 7ffc423e08759..bafb75486a8c1 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-andor.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-andor.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-classify-expressions=0 2>&1 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-pow2.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-pow2.ll index 3a6f5fec2b8f7..d4c98de296697 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-pow2.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-pow2.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s +; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s "-passes=print" -disable-output 2>&1 | FileCheck %s define void @test1(i32 %n) { entry: diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-switch.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-switch.ll index 2d2b6b4994089..db335dc89aca2 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-switch.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-switch.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s declare void @foo() diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll index 60370d63e036a..fa9c7832adbe3 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; ScalarEvolution should be able to compute trip count of the loop by proving ; that this is not an infinite loop with side effects. diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count.ll b/llvm/test/Analysis/ScalarEvolution/trip-count.ll index aef7f1b9bba52..dd6cf4eaae4fd 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-max-iterations=0 -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count10.ll b/llvm/test/Analysis/ScalarEvolution/trip-count10.ll index 5540e3e6a2dac..459d293af9401 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count10.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count10.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s ; Trip counts with trivial exit conditions. diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count11.ll b/llvm/test/Analysis/ScalarEvolution/trip-count11.ll index 819a89efd5079..88728690d926f 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count11.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count11.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count12.ll b/llvm/test/Analysis/ScalarEvolution/trip-count12.ll index d0086ee2e6acc..66577e4cc4a26 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count12.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count12.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; CHECK: Determining loop execution counts for: @test ; CHECK: Loop %for.body: backedge-taken count is ((-2 + %len) /u 2) diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count13.ll b/llvm/test/Analysis/ScalarEvolution/trip-count13.ll index 3e1009748f1cc..42d96ca9d42f4 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count13.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count13.ll @@ -1,4 +1,5 @@ -; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -disable-output "-passes=print" < %s 2>&1 | FileCheck %s define void @u_0(i8 %rhs) { ; E.g.: %rhs = 255, %start = 99, backedge taken 156 times diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count14.ll b/llvm/test/Analysis/ScalarEvolution/trip-count14.ll index 711939bc112e2..3f3f816369125 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count14.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count14.ll @@ -1,4 +1,5 @@ -; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -disable-output "-passes=print" < %s 2>&1 | FileCheck %s define void @s32_max1(i32 %n, i32* %p) { entry: diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count15.ll b/llvm/test/Analysis/ScalarEvolution/trip-count15.ll index 3ad83776b1ce0..bfc88452a9cf2 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count15.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count15.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -S -disable-output "-passes=print" < %s 2>&1 | FileCheck %s define void @umin_unsigned_check(i64 %n) { ; CHECK-LABEL: 'umin_unsigned_check' diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count2.ll b/llvm/test/Analysis/ScalarEvolution/trip-count2.ll index 7f45527238606..626cacd2b4db7 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count2.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count2.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s @A = weak global [1000 x i32] zeroinitializer, align 32 diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count3.ll b/llvm/test/Analysis/ScalarEvolution/trip-count3.ll index a50886be325f3..7941d4ab33822 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count3.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count3.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -scalar-evolution -analyze -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s "-passes=print" -disable-output -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s ; ScalarEvolution can't compute a trip count because it doesn't know if ; dividing by the stride will have a remainder. This could theoretically diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count4.ll b/llvm/test/Analysis/ScalarEvolution/trip-count4.ll index 4c2d079e1922f..24978f62df729 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count4.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count4.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s ; ScalarEvolution should be able to compute a loop exit value for %indvar.i8. diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count5.ll b/llvm/test/Analysis/ScalarEvolution/trip-count5.ll index f3ca343da6f15..3359a1f5d96e6 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count5.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count5.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; ScalarEvolution should be able to compute a maximum trip count ; value sufficient to fold away both sext casts. diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count6.ll b/llvm/test/Analysis/ScalarEvolution/trip-count6.ll index 103b097e09b15..925dfbb096957 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count6.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count6.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s @mode_table = global [4 x i32] zeroinitializer ; <[4 x i32]*> [#uses=1] diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count7.ll b/llvm/test/Analysis/ScalarEvolution/trip-count7.ll index 8b92bf71c041e..195a03ec30d61 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count7.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count7.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count8.ll b/llvm/test/Analysis/ScalarEvolution/trip-count8.ll index ac06fbf5db59c..a70e12c8f23f6 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count8.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count8.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s ; PR4599 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count9.ll b/llvm/test/Analysis/ScalarEvolution/trip-count9.ll index d0fb51a1ee5f3..664f9b1296b36 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count9.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count9.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -analyze -scalar-evolution -S -scalar-evolution-classify-expressions=0 < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -S -scalar-evolution-classify-expressions=0 < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" -S -scalar-evolution-classify-expressions=0 < %s 2>&1 | FileCheck %s ; Every combination of ; - starting at 0, 1, or %x diff --git a/llvm/test/Analysis/ScalarEvolution/tripmultiple_calculation.ll b/llvm/test/Analysis/ScalarEvolution/tripmultiple_calculation.ll index 133532e31a5be..f57ab3dbdad36 100644 --- a/llvm/test/Analysis/ScalarEvolution/tripmultiple_calculation.ll +++ b/llvm/test/Analysis/ScalarEvolution/tripmultiple_calculation.ll @@ -1,4 +1,5 @@ -; RUN: opt -S -analyze -scalar-evolution < %s 2>&1 | FileCheck %s +; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s 2>&1 | FileCheck %s +; RUN: opt -S -disable-output "-passes=print" < %s 2>&1 2>&1 | FileCheck %s ; umin is represented using -1 * umax in scalar evolution. -1 is considered as the ; constant of the multiply expression (-1 * ((-1 + (-1 * %a)) umax (-1 + (-1 * %b)))). diff --git a/llvm/test/Analysis/ScalarEvolution/trunc-simplify.ll b/llvm/test/Analysis/ScalarEvolution/trunc-simplify.ll index cf37371939d5e..e4c752b02e0eb 100644 --- a/llvm/test/Analysis/ScalarEvolution/trunc-simplify.ll +++ b/llvm/test/Analysis/ScalarEvolution/trunc-simplify.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; Check that we convert ; trunc(C * a) -> trunc(C) * trunc(a) diff --git a/llvm/test/Analysis/ScalarEvolution/truncate.ll b/llvm/test/Analysis/ScalarEvolution/truncate.ll index 7ae6908fc2097..148bbe0746c4c 100644 --- a/llvm/test/Analysis/ScalarEvolution/truncate.ll +++ b/llvm/test/Analysis/ScalarEvolution/truncate.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 2>&1 | FileCheck %s ; RUN: opt < %s -passes='print' -S 2>&1 | FileCheck %s ; Regression test for assert ScalarEvolution::getTruncateExpr. diff --git a/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll b/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll index fb8f59fe42520..40cdb4921a42c 100644 --- a/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll +++ b/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s define void @umin_sext_x_zext_x(i32 %len) { ; CHECK-LABEL: 'umin_sext_x_zext_x' diff --git a/llvm/test/Analysis/ScalarEvolution/undefined.ll b/llvm/test/Analysis/ScalarEvolution/undefined.ll index b1f44460af6bd..693a2adf80ce1 100644 --- a/llvm/test/Analysis/ScalarEvolution/undefined.ll +++ b/llvm/test/Analysis/ScalarEvolution/undefined.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; ScalarEvolution shouldn't attempt to interpret expressions which have ; undefined results. diff --git a/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll b/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll index e5335b1ae9137..cce9c218e24bc 100644 --- a/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll +++ b/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s define void @merge_values_with_ranges(i32 *%a_len_ptr, i32 *%b_len_ptr, i1 %unknown_cond) { diff --git a/llvm/test/Analysis/ScalarEvolution/unreachable-code.ll b/llvm/test/Analysis/ScalarEvolution/unreachable-code.ll index 69a7e39839a7e..90049e43fa0f5 100644 --- a/llvm/test/Analysis/ScalarEvolution/unreachable-code.ll +++ b/llvm/test/Analysis/ScalarEvolution/unreachable-code.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; CHECK: %t = add i64 %t, 1 ; CHECK: --> undef diff --git a/llvm/test/Analysis/ScalarEvolution/unsimplified-loop.ll b/llvm/test/Analysis/ScalarEvolution/unsimplified-loop.ll index a3175077b6861..0ab46cd7d0c2b 100644 --- a/llvm/test/Analysis/ScalarEvolution/unsimplified-loop.ll +++ b/llvm/test/Analysis/ScalarEvolution/unsimplified-loop.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; This loop has no preheader, multiple backedges, etc., but ScalarEvolution ; should still be able to analyze it. diff --git a/llvm/test/Analysis/ScalarEvolution/urem-0.ll b/llvm/test/Analysis/ScalarEvolution/urem-0.ll index a53f75b86faa8..25998fa340f7f 100644 --- a/llvm/test/Analysis/ScalarEvolution/urem-0.ll +++ b/llvm/test/Analysis/ScalarEvolution/urem-0.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s +; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s "-passes=print" -disable-output 2>&1 | FileCheck %s define i8 @foo(i8 %a) { ; CHECK-LABEL: @foo diff --git a/llvm/test/Analysis/ScalarEvolution/widenable-condition.ll b/llvm/test/Analysis/ScalarEvolution/widenable-condition.ll index b7b5f71542826..52890936063cd 100644 --- a/llvm/test/Analysis/ScalarEvolution/widenable-condition.ll +++ b/llvm/test/Analysis/ScalarEvolution/widenable-condition.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; The semanics of this example are a bit subtle. The loop is required ; execute some number of times up to 1999. The compiler is free to reduce diff --git a/llvm/test/Analysis/ScalarEvolution/zext-divrem.ll b/llvm/test/Analysis/ScalarEvolution/zext-divrem.ll index 86037437f979e..3e5f3b0f5485e 100644 --- a/llvm/test/Analysis/ScalarEvolution/zext-divrem.ll +++ b/llvm/test/Analysis/ScalarEvolution/zext-divrem.ll @@ -1,4 +1,5 @@ -; RUN: opt -analyze -scalar-evolution -S < %s | FileCheck %s +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -S < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" -S < %s 2>&1 | FileCheck %s define i64 @test1(i32 %a, i32 %b) { ; CHECK-LABEL: @test1 diff --git a/llvm/test/Analysis/ScalarEvolution/zext-mul.ll b/llvm/test/Analysis/ScalarEvolution/zext-mul.ll index 0c0f16c1deaa8..cf820f550741f 100644 --- a/llvm/test/Analysis/ScalarEvolution/zext-mul.ll +++ b/llvm/test/Analysis/ScalarEvolution/zext-mul.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; Check that we convert ; zext((a * b)) diff --git a/llvm/test/Analysis/ScalarEvolution/zext-wrap.ll b/llvm/test/Analysis/ScalarEvolution/zext-wrap.ll index 34462208fbb31..66bedcea7edf2 100644 --- a/llvm/test/Analysis/ScalarEvolution/zext-wrap.ll +++ b/llvm/test/Analysis/ScalarEvolution/zext-wrap.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; PR4569 define i16 @main() nounwind { diff --git a/llvm/test/Analysis/ValueTracking/assume-queries-counter.ll b/llvm/test/Analysis/ValueTracking/assume-queries-counter.ll new file mode 100644 index 0000000000000..d234205648c77 --- /dev/null +++ b/llvm/test/Analysis/ValueTracking/assume-queries-counter.ll @@ -0,0 +1,112 @@ +; REQUIRES: asserts + +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine --debug-counter=assume-queries-counter-skip=0,assume-queries-counter-count=1 -S | FileCheck %s --check-prefixes=SAME,COUNTER1 +; RUN: opt < %s -instcombine --debug-counter=assume-queries-counter-skip=1,assume-queries-counter-count=2 -S | FileCheck %s --check-prefixes=SAME,COUNTER2 +; RUN: opt < %s -instcombine --debug-counter=assume-queries-counter-skip=2,assume-queries-counter-count=1 -S | FileCheck %s --check-prefixes=SAME,COUNTER3 + +declare i1 @get_val() +declare void @llvm.assume(i1) + +define dso_local i1 @test1(i32* readonly %0) { +; COUNTER1-LABEL: @test1( +; COUNTER1-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0:%.*]]) ] +; COUNTER1-NEXT: ret i1 false +; +; COUNTER2-LABEL: @test1( +; COUNTER2-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0:%.*]]) ] +; COUNTER2-NEXT: [[TMP2:%.*]] = icmp eq i32* [[TMP0]], null +; COUNTER2-NEXT: ret i1 [[TMP2]] +; +; COUNTER3-LABEL: @test1( +; COUNTER3-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0:%.*]]) ] +; COUNTER3-NEXT: [[TMP2:%.*]] = icmp eq i32* [[TMP0]], null +; COUNTER3-NEXT: ret i1 [[TMP2]] +; + call void @llvm.assume(i1 true) ["nonnull"(i32* %0)] + %2 = icmp eq i32* %0, null + ret i1 %2 +} + +define dso_local i1 @test2(i32* readonly %0) { +; COUNTER1-LABEL: @test2( +; COUNTER1-NEXT: [[TMP2:%.*]] = icmp eq i32* [[TMP0:%.*]], null +; COUNTER1-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0]]) ] +; COUNTER1-NEXT: ret i1 [[TMP2]] +; +; COUNTER2-LABEL: @test2( +; COUNTER2-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0:%.*]]) ] +; COUNTER2-NEXT: ret i1 false +; +; COUNTER3-LABEL: @test2( +; COUNTER3-NEXT: [[TMP2:%.*]] = icmp eq i32* [[TMP0:%.*]], null +; COUNTER3-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0]]) ] +; COUNTER3-NEXT: ret i1 [[TMP2]] +; + %2 = icmp eq i32* %0, null + call void @llvm.assume(i1 true) ["nonnull"(i32* %0)] + ret i1 %2 +} + +define dso_local i32 @test4(i32* readonly %0, i1 %cond) { +; COUNTER1-LABEL: @test4( +; COUNTER1-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[TMP0:%.*]], i32 4) ] +; COUNTER1-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; COUNTER1: B: +; COUNTER1-NEXT: br label [[A]] +; COUNTER1: A: +; COUNTER1-NEXT: [[TMP2:%.*]] = icmp eq i32* [[TMP0]], null +; COUNTER1-NEXT: br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]] +; COUNTER1: 3: +; COUNTER1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 +; COUNTER1-NEXT: br label [[TMP5]] +; COUNTER1: 5: +; COUNTER1-NEXT: [[TMP6:%.*]] = phi i32 [ [[TMP4]], [[TMP3]] ], [ 0, [[A]] ] +; COUNTER1-NEXT: ret i32 [[TMP6]] +; +; COUNTER2-LABEL: @test4( +; COUNTER2-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[TMP0:%.*]], i32 4) ] +; COUNTER2-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; COUNTER2: B: +; COUNTER2-NEXT: br label [[A]] +; COUNTER2: A: +; COUNTER2-NEXT: br i1 false, label [[TMP4:%.*]], label [[TMP2:%.*]] +; COUNTER2: 2: +; COUNTER2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 +; COUNTER2-NEXT: br label [[TMP4]] +; COUNTER2: 4: +; COUNTER2-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP3]], [[TMP2]] ], [ 0, [[A]] ] +; COUNTER2-NEXT: ret i32 [[TMP5]] +; +; COUNTER3-LABEL: @test4( +; COUNTER3-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[TMP0:%.*]], i32 4) ] +; COUNTER3-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; COUNTER3: B: +; COUNTER3-NEXT: br label [[A]] +; COUNTER3: A: +; COUNTER3-NEXT: br i1 false, label [[TMP4:%.*]], label [[TMP2:%.*]] +; COUNTER3: 2: +; COUNTER3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 +; COUNTER3-NEXT: br label [[TMP4]] +; COUNTER3: 4: +; COUNTER3-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP3]], [[TMP2]] ], [ 0, [[A]] ] +; COUNTER3-NEXT: ret i32 [[TMP5]] +; + call void @llvm.assume(i1 true) ["dereferenceable"(i32* %0, i32 4)] + br i1 %cond, label %A, label %B + +B: + br label %A + +A: + %2 = icmp eq i32* %0, null + br i1 %2, label %5, label %3 + +3: ; preds = %1 + %4 = load i32, i32* %0, align 4 + br label %5 + +5: ; preds = %1, %3 + %6 = phi i32 [ %4, %3 ], [ 0, %A ] + ret i32 %6 +} diff --git a/llvm/test/Bitcode/summary_version.ll b/llvm/test/Bitcode/summary_version.ll index 2a67073713c0b..98feab6fe2f99 100644 --- a/llvm/test/Bitcode/summary_version.ll +++ b/llvm/test/Bitcode/summary_version.ll @@ -2,7 +2,7 @@ ; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s ; CHECK: +; CHECK: diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index c6573ebc3e3e9..b083184a05b4a 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -16,6 +16,7 @@ llvm_canonicalize_cmake_booleans( LLVM_ENABLE_PLUGINS LLVM_BYE_LINK_INTO_TOOLS LLVM_HAVE_TF_AOT + LLVM_HAVE_TF_API ) configure_lit_site_cfg( @@ -84,6 +85,7 @@ set(LLVM_TEST_DEPENDS llvm-install-name-tool llvm-jitlink llvm-lib + llvm-libtool-darwin llvm-link llvm-lipo llvm-locstats diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-trunc-sextload.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-trunc-sextload.mir new file mode 100644 index 0000000000000..483547ac0511c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-trunc-sextload.mir @@ -0,0 +1,81 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +--- +name: test_combine_sext_trunc_of_sextload +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_combine_sext_trunc_of_sextload + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 2) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[SEXTLOAD]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; CHECK: $w0 = COPY [[COPY1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s64) = G_SEXTLOAD %0:_(p0) :: (load 2) + %2:_(s32) = G_TRUNC %1:_(s64) + %3:_(s32) = G_SEXT_INREG %2:_(s32), 16 + $w0 = COPY %3(s32) +... +--- +name: test_combine_sext_of_sextload +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_combine_sext_of_sextload + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: $w0 = COPY [[COPY2]](s32) + %0:_(p0) = COPY $x0 + %1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load 2) + %2:_(s32) = COPY %1:_(s32) + %3:_(s32) = G_SEXT_INREG %2:_(s32), 16 + $w0 = COPY %3(s32) +... +--- +name: test_combine_sext_of_sextload_not_matching +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + ; Here we're trying to extend from a larger width than was extended in the load. + ; CHECK-LABEL: name: test_combine_sext_of_sextload_not_matching + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32) + ; CHECK: $w0 = COPY [[COPY1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load 2) + %2:_(s32) = G_SEXT_INREG %1:_(s32), 24 + $w0 = COPY %2(s32) +... +--- +name: test_combine_sext_of_sextload_not_enough_src_signbits +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + ; Here we're trying to extend from a smaller width than was extended in the load. + ; Don't perform the combine. + ; CHECK-LABEL: name: test_combine_sext_of_sextload_not_enough_src_signbits + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 8 + ; CHECK: $w0 = COPY [[SEXT_INREG]](s32) + %0:_(p0) = COPY $x0 + %1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load 2) + %2:_(s32) = G_SEXT_INREG %1:_(s32), 8 + $w0 = COPY %2(s32) +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll index f1be1011fa865..f8b23ef84721e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll @@ -211,3 +211,35 @@ define i32 @test_memory_constraint(i32* %a) nounwind { %1 = tail call i32 asm "ldr $0, $1", "=r,*m"(i32* %a) ret i32 %1 } + +define i16 @test_anyext_input() { + ; CHECK-LABEL: name: test_anyext_input + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY [[ANYEXT]](s32) + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 655370 /* regdef:GPR32common */, def %0, 9 /* reguse */, [[COPY]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; CHECK: $w0 = COPY [[ANYEXT1]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1 = call i16 asm sideeffect "", "=r,r"(i16 1) + ret i16 %1 +} + +define i16 @test_anyext_input_with_matching_constraint() { + ; CHECK-LABEL: name: test_anyext_input_with_matching_constraint + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY [[ANYEXT]](s32) + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 655370 /* regdef:GPR32common */, def %0, 2147483657 /* reguse tiedto:$0 */, [[COPY]](tied-def 3) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; CHECK: $w0 = COPY [[ANYEXT1]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1 = call i16 asm sideeffect "", "=r,0"(i16 1) + ret i16 %1 +} diff --git a/llvm/test/CodeGen/AArch64/README b/llvm/test/CodeGen/AArch64/README new file mode 100644 index 0000000000000..b0a93e8668eeb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/README @@ -0,0 +1,11 @@ +++ SVE CodeGen Warnings ++ + +When the WARN check lines fail in the SVE codegen tests it most likely means you +have introduced a warning due to: +1. Adding an invalid call to VectorType::getNumElements() or EVT::getVectorNumElements() + when the type is a scalable vector. +2. Relying upon an implicit cast conversion from TypeSize to uint64_t. + +For generic code, please modify your code to work with ElementCount and TypeSize directly. +For target-specific code that only deals with fixed-width vectors, use the fixed-size interfaces. +Please refer to the code where those functions live for more details. diff --git a/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll b/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll index 5966fc65b0c00..58b025afd9370 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll @@ -38,3 +38,38 @@ define fp128 @baz() optsize { ; CHECK-NEXT: ret ret fp128 0xL00000000000000000000000000000000 } + +; CHECK: literal8 +; CHECK: .quad 0x0000001fffffffd +define double @foo2_pgso() !prof !14 { +; CHECK: _foo2_pgso: +; CHECK: adrp x[[REG:[0-9]+]], lCPI4_0@PAGE +; CHECK: ldr d0, [x[[REG]], lCPI4_0@PAGEOFF] +; CHECK-NEXT: ret + ret double 0x1FFFFFFFd1 +} + +define float @bar_pgso() !prof !14 { +; CHECK: _bar_pgso: +; CHECK: adrp x[[REG:[0-9]+]], lCPI5_0@PAGE +; CHECK: ldr s0, [x[[REG]], lCPI5_0@PAGEOFF] +; CHECK-NEXT: ret + ret float 0x400921FB80000000 +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 0} diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll index 82fec748928a8..3a7c06c37e01f 100644 --- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll @@ -27,10 +27,10 @@ entry: ; NONE16: fmov s1, wzr ; NONE16: fmov d2, xzr ; NONE16: movi{{(.16b)?}} v3{{(.2d)?}}, #0 -; ZEROFP: ldr h0,{{.*}} -; ZEROFP: movi v{{[0-3]+}}.2d, #0 -; ZEROFP: movi v{{[0-3]+}}.2d, #0 -; ZEROFP: movi v{{[0-3]+}}.2d, #0 +; ZEROFP-DAG: ldr h0,{{.*}} +; ZEROFP-DAG: movi v{{[0-3]+}}.2d, #0 +; ZEROFP-DAG: movi v{{[0-3]+}}.2d, #0 +; ZEROFP-DAG: movi v{{[0-3]+}}.2d, #0 ; ZERO16: movi v{{[0-3]+}}.2d, #0 ; ZERO16: movi v{{[0-3]+}}.2d, #0 ; ZERO16: movi v{{[0-3]+}}.2d, #0 diff --git a/llvm/test/CodeGen/AArch64/cmp-bool.ll b/llvm/test/CodeGen/AArch64/cmp-bool.ll new file mode 100644 index 0000000000000..907d982a7efd1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cmp-bool.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +define void @bool_eq(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind { +; CHECK-LABEL: bool_eq: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.eq .LBB0_2 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %if.then +; CHECK-NEXT: br x2 +entry: + %0 = xor i1 %a, %b + br i1 %0, label %if.end, label %if.then + +if.then: + tail call void %c() #1 + br label %if.end + +if.end: + ret void +} + +define void @bool_ne(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind { +; CHECK-LABEL: bool_ne: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.eq .LBB1_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: br x2 +; CHECK-NEXT: .LBB1_2: // %if.end +; CHECK-NEXT: ret +entry: + %cmp = xor i1 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void %c() #1 + br label %if.end + +if.end: + ret void +} diff --git a/llvm/test/CodeGen/AArch64/fadd-combines.ll b/llvm/test/CodeGen/AArch64/fadd-combines.ll index 0e4f2c02c3110..2ff4858307802 100644 --- a/llvm/test/CodeGen/AArch64/fadd-combines.ll +++ b/llvm/test/CodeGen/AArch64/fadd-combines.ll @@ -207,6 +207,10 @@ define double @fadd_fma_fmul_1(double %a, double %b, double %c, double %d, doubl ret double %a2 } +; Minimum FMF - the 1st fadd is contracted because that combines +; fmul+fadd as specified by the order of operations; the 2nd fadd +; requires reassociation to fuse with c*d. + define float @fadd_fma_fmul_fmf(float %a, float %b, float %c, float %d, float %n0) nounwind { ; CHECK-LABEL: fadd_fma_fmul_fmf: ; CHECK: // %bb.0: @@ -220,13 +224,14 @@ define float @fadd_fma_fmul_fmf(float %a, float %b, float %c, float %d, float %n ret float %a2 } -; Minimum FMF, commute final add operands, change type. +; Not minimum FMF. define float @fadd_fma_fmul_2(float %a, float %b, float %c, float %d, float %n0) nounwind { ; CHECK-LABEL: fadd_fma_fmul_2: ; CHECK: // %bb.0: -; CHECK-NEXT: fmadd s2, s2, s3, s4 +; CHECK-NEXT: fmul s2, s2, s3 ; CHECK-NEXT: fmadd s0, s0, s1, s2 +; CHECK-NEXT: fadd s0, s4, s0 ; CHECK-NEXT: ret %m1 = fmul float %a, %b %m2 = fmul float %c, %d diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir index 75013bc475a13..046357b860b3b 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir @@ -30,6 +30,7 @@ define void @test_address_sve_fp() nounwind { entry: unreachable } define void @test_stack_arg_sve() nounwind { entry: unreachable } define void @test_address_sve_out_of_range() nounwind { entry: unreachable } + define void @test_address_gpr_vla_nobp() nounwind { entry: unreachable } define aarch64_sve_vector_pcs void @save_restore_pregs_sve() nounwind { entry: unreachable } define aarch64_sve_vector_pcs void @save_restore_zregs_sve() nounwind { entry: unreachable } define aarch64_sve_vector_pcs void @save_restore_sve() nounwind { entry: unreachable } @@ -334,6 +335,39 @@ body: | RET_ReallyLR --- ... +# Test that non-SVE objects are accessed from FP when there is no BP, +# but the SP cannot be used because of variable-length arrays. +# +# +----------+ <- FP +# | %fstack.0| // 16 scalable bytes +# +----------+ <- @FP - 16 scalable bytes +# | %stack.0 | // 16 bytes +# +----------+ <- @FP - 16 scalable bytes - 16b +# : %stack.1 : // variable length +# +----------+ <- SP + +# CHECK-LABEL: name: test_address_gpr_vla_nobp +# CHECK: bb.0.entry: +# CHECK: $[[TMP:x[0-9]+]] = ADDVL_XXI $fp, -1 +# CHECK-NEXT: STURXi $xzr, killed $[[TMP]], -16 +# CHECK: RET_ReallyLR +name: test_address_gpr_vla_nobp +frameInfo: + maxAlignment: 16 +fixedStack: + - { id: 0, stack-id: sve-vec, size: 16, alignment: 8, offset: -16 } +stack: + - { id: 0, stack-id: default, size: 16, alignment: 8 } + - { id: 1, stack-id: default, type: variable-sized } +body: | + bb.0.entry: + liveins: $xzr + + STRXui $xzr, %stack.0, 0 + + RET_ReallyLR +--- +... # CHECK-LABEL: name: save_restore_pregs_sve # CHECK: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK: frame-setup STR_PXI killed $p6, $sp, 5 diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll index 77b7012d2ed1e..6850846fec068 100644 --- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll +++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll @@ -95,6 +95,8 @@ exit: ret void } +; TODO: rev16? + define void @rotate16_in_place(i8* %p) { ; A53-LABEL: rotate16_in_place: ; A53: // %bb.0: @@ -112,6 +114,8 @@ define void @rotate16_in_place(i8* %p) { ret void } +; TODO: rev16? + define void @rotate16(i8* %p, i8* %q) { ; A53-LABEL: rotate16: ; A53: // %bb.0: @@ -134,10 +138,9 @@ define void @rotate16(i8* %p, i8* %q) { define void @rotate32_in_place(i16* %p) { ; A53-LABEL: rotate32_in_place: ; A53: // %bb.0: -; A53-NEXT: ldrh w8, [x0, #2] -; A53-NEXT: ldrh w9, [x0] -; A53-NEXT: strh w8, [x0] -; A53-NEXT: strh w9, [x0, #2] +; A53-NEXT: ldr w8, [x0] +; A53-NEXT: ror w8, w8, #16 +; A53-NEXT: str w8, [x0] ; A53-NEXT: ret %p0 = getelementptr i16, i16* %p, i64 0 %p1 = getelementptr i16, i16* %p, i64 1 @@ -151,10 +154,9 @@ define void @rotate32_in_place(i16* %p) { define void @rotate32(i16* %p) { ; A53-LABEL: rotate32: ; A53: // %bb.0: -; A53-NEXT: ldrh w8, [x0, #2] -; A53-NEXT: ldrh w9, [x0] -; A53-NEXT: strh w8, [x0, #84] -; A53-NEXT: strh w9, [x0, #86] +; A53-NEXT: ldr w8, [x0] +; A53-NEXT: ror w8, w8, #16 +; A53-NEXT: str w8, [x0, #84] ; A53-NEXT: ret %p0 = getelementptr i16, i16* %p, i64 0 %p1 = getelementptr i16, i16* %p, i64 1 @@ -167,6 +169,8 @@ define void @rotate32(i16* %p) { ret void } +; Prefer paired memops over rotate. + define void @rotate64_in_place(i32* %p) { ; A53-LABEL: rotate64_in_place: ; A53: // %bb.0: @@ -182,6 +186,8 @@ define void @rotate64_in_place(i32* %p) { ret void } +; Prefer paired memops over rotate. + define void @rotate64(i32* %p) { ; A53-LABEL: rotate64: ; A53: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/partial-pipeline-execution.ll b/llvm/test/CodeGen/AArch64/partial-pipeline-execution.ll new file mode 100644 index 0000000000000..82cb0a35f2a58 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/partial-pipeline-execution.ll @@ -0,0 +1,87 @@ +; RUN: llc -O3 %s -o %t.s +; RUN: llc -O3 -stop-after=atomic-expand %s -o %t.mir +; RUN: llc -O3 -start-after=atomic-expand %s -o %t2.s + +; If we add tti pass correctly files should be identical +; Otherwise LSR will use default TargetTransformInfo and +; optimize the loop differently +; RUN: cmp %t.s %t2.s + +; ModuleID = 'loop.c' +source_filename = "loop.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-none-linux-gnu" + +@q = dso_local local_unnamed_addr global i32* null, align 8 + +; Function Attrs: nofree norecurse nounwind +define dso_local i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 { +entry: + %cmp5 = icmp sgt i32 %argc, 0 + br i1 %cmp5, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: ; preds = %entry + %0 = load i32*, i32** @q, align 8, !tbaa !2 + %1 = zext i32 %argc to i64 + %2 = add nsw i64 %1, -1 + %3 = lshr i64 %2, 5 + %4 = add nuw nsw i64 %3, 1 + %min.iters.check = icmp eq i64 %3, 0 + br i1 %min.iters.check, label %for.body.preheader, label %vector.ph + +for.body.preheader: ; preds = %middle.block, %for.body.lr.ph + %indvars.iv.ph = phi i64 [ 0, %for.body.lr.ph ], [ %ind.end, %middle.block ] + br label %for.body + +vector.ph: ; preds = %for.body.lr.ph + %n.vec = and i64 %4, 1152921504606846974 + %ind.end = shl i64 %n.vec, 5 + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %offset.idx = shl i64 %index, 5 + %induction7 = or i64 %offset.idx, 32 + %5 = getelementptr inbounds i32, i32* %0, i64 %offset.idx + %6 = getelementptr inbounds i32, i32* %0, i64 %induction7 + %7 = trunc i64 %offset.idx to i32 + %8 = trunc i64 %induction7 to i32 + store i32 %7, i32* %5, align 4, !tbaa !6 + store i32 %8, i32* %6, align 4, !tbaa !6 + %index.next = add i64 %index, 2 + %9 = icmp eq i64 %index.next, %n.vec + br i1 %9, label %middle.block, label %vector.body, !llvm.loop !8 + +middle.block: ; preds = %vector.body + %cmp.n = icmp eq i64 %4, %n.vec + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader + +for.cond.cleanup: ; preds = %for.body, %middle.block, %entry + ret i32 0 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %0, i64 %indvars.iv + %10 = trunc i64 %indvars.iv to i32 + store i32 %10, i32* %arrayidx, align 4, !tbaa !6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 32 + %cmp = icmp ult i64 %indvars.iv.next, %1 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !10 +} + +attributes #0 = { nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git d9943e7f0ce888733ee7ba91da432e5f01f7aa85)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"any pointer", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7, !7, i64 0} +!7 = !{!"int", !4, i64 0} +!8 = distinct !{!8, !9} +!9 = !{!"llvm.loop.isvectorized", i32 1} +!10 = distinct !{!10, !9} diff --git a/llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll b/llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll index f5ea33d273ecb..4e9ac5a50eb93 100644 --- a/llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll +++ b/llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll @@ -2,6 +2,7 @@ ; RUN: llc -mtriple=aarch64 -mattr=+sve -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECKISEL ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; CHECKCG-LABEL: foo: diff --git a/llvm/test/CodeGen/AArch64/sve-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-bitcast.ll index 670c9864c85ab..bb96e996df024 100644 --- a/llvm/test/CodeGen/AArch64/sve-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/sve-bitcast.ll @@ -3,6 +3,7 @@ ; RUN: not --crash llc -mtriple=aarch64_be -mattr=+sve < %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @bitcast_i16_to_i8( %v) { diff --git a/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll index 92395a6502746..ad97b9bb54798 100644 --- a/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll +++ b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; Test that scalable vectors that are a multiple of the legal vector size diff --git a/llvm/test/CodeGen/AArch64/sve-callbyref-notailcall.ll b/llvm/test/CodeGen/AArch64/sve-callbyref-notailcall.ll index caa8d32186f4c..ff339f5e92885 100644 --- a/llvm/test/CodeGen/AArch64/sve-callbyref-notailcall.ll +++ b/llvm/test/CodeGen/AArch64/sve-callbyref-notailcall.ll @@ -3,6 +3,7 @@ ; RUN: llc -mtriple=aarch64 -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; CHECK-LABEL: caller: diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll index d579ba08b59b6..bceb39af0beec 100644 --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -stop-after=finalize-isel < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; Test that z8 and z9, passed in by reference, are correctly loaded from x0 and x1. diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll index 6700c27eb1096..e535afdd07786 100644 --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention.ll index 1906abe6645e3..767a3cd8acfe9 100644 --- a/llvm/test/CodeGen/AArch64/sve-calling-convention.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -stop-after=finalize-isel < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; CHECK-LABEL: name: nosve_signature diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll index e63ae4db7648c..cd40f66a16c9a 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll @@ -2,6 +2,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define i8 @test_lane0_16xi8( %a) { diff --git a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll index 29ad1273e352b..40c147c31ff84 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll @@ -2,6 +2,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; Test that DAGCombiner doesn't drop the scalable flag when it tries to fold: diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll index 703e86d9f4539..86fff734f1883 100644 --- a/llvm/test/CodeGen/AArch64/sve-fcmp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcmp.ll @@ -2,6 +2,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @oeq( %x, %x2) { diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-converts.ll new file mode 100644 index 0000000000000..4ffb56abe5f18 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-converts.ll @@ -0,0 +1,168 @@ +; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE +; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK +; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK +; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048 + +target triple = "aarch64-unknown-linux-gnu" + +; Don't use SVE when its registers are no bigger than NEON. +; NO_SVE-NOT: z{0-9} + +; NOTE: fptrunc operations bigger than NEON are expanded. These tests just +; ensure we've correctly set the operation action for fixed length vector types +; that require SVE. They'll be updated to protect their expected code generation +; when lowering it implemented. + +; +; fptrunc f32 -> f16 +; + +define <8 x half> @fptrunc_v8f32_v8f16(<8 x float>* %in) #0 { +; CHECK-LABEL: fptrunc_v8f32_v8f16: +; CHECK-COUNT-8: fcvt h{{[0-9]}}, s{{[0-9]}} +; CHECK-NOT: fcvt +; CHECK: ret + %a = load <8 x float>, <8 x float>* %in + %b = fptrunc <8 x float> %a to <8 x half> + ret <8 x half> %b +} + +define void @fptrunc_v16f32_v16f16(<16 x float>* %in, <16 x half>* %out) #0 { +; CHECK-LABEL: fptrunc_v16f32_v16f16: +; CHECK-COUNT-16: fcvt h{{[0-9]}}, s{{[0-9]}} +; CHECK-NOT: fcvt +; CHECK: ret + %a = load <16 x float>, <16 x float>* %in + %b = fptrunc <16 x float> %a to <16 x half> + store <16 x half> %b, <16 x half>* %out + ret void +} + +define void @fptrunc_v32f32_v32f16(<32 x float>* %in, <32 x half>* %out) #0 { +; CHECK-LABEL: fptrunc_v32f32_v32f16: +; CHECK-COUNT-32: fcvt h{{[0-9]}}, s{{[0-9]}} +; CHECK-NOT: fcvt +; CHECK: ret + %a = load <32 x float>, <32 x float>* %in + %b = fptrunc <32 x float> %a to <32 x half> + store <32 x half> %b, <32 x half>* %out + ret void +} + +define void @fptrunc_v64f32_v64f16(<64 x float>* %in, <64 x half>* %out) #0 { +; CHECK-LABEL: fptrunc_v64f32_v64f16: +; CHECK-COUNT-64: fcvt h{{[0-9]}}, s{{[0-9]}} +; CHECK-NOT: fcvt +; CHECK: ret + %a = load <64 x float>, <64 x float>* %in + %b = fptrunc <64 x float> %a to <64 x half> + store <64 x half> %b, <64 x half>* %out + ret void +} + +; +; fptrunc f64 -> f16 +; + +define <4 x half> @fptrunc_v4f64_v4f16(<4 x double>* %in) #0 { +; CHECK-LABEL: fptrunc_v4f64_v4f16: +; CHECK-COUNT-4: fcvt h{{[0-9]}}, d{{[0-9]}} +; CHECK-NOT: fcvt +; CHECK: ret + %a = load <4 x double>, <4 x double>* %in + %b = fptrunc <4 x double> %a to <4 x half> + ret <4 x half> %b +} + +define <8 x half> @fptrunc_v8f64_v8f16(<8 x double>* %in) #0 { +; CHECK-LABEL: fptrunc_v8f64_v8f16: +; CHECK-COUNT-8: fcvt h{{[0-9]}}, d{{[0-9]}} +; CHECK-NOT: fcvt +; CHECK: ret + %a = load <8 x double>, <8 x double>* %in + %b = fptrunc <8 x double> %a to <8 x half> + ret <8 x half> %b +} + +define void @fptrunc_v16f64_v16f16(<16 x double>* %in, <16 x half>* %out) #0 { +; CHECK-LABEL: fptrunc_v16f64_v16f16: +; CHECK-COUNT-16: fcvt h{{[0-9]}}, d{{[0-9]}} +; CHECK-NOT: fcvt +; CHECK: ret + %a = load <16 x double>, <16 x double>* %in + %b = fptrunc <16 x double> %a to <16 x half> + store <16 x half> %b, <16 x half>* %out + ret void +} + +define void @fptrunc_v32f64_v32f16(<32 x double>* %in, <32 x half>* %out) #0 { +; CHECK-LABEL: fptrunc_v32f64_v32f16: +; CHECK-COUNT-32: fcvt h{{[0-9]}}, d{{[0-9]}} +; CHECK-NOT: fcvt +; CHECK: ret + %a = load <32 x double>, <32 x double>* %in + %b = fptrunc <32 x double> %a to <32 x half> + store <32 x half> %b, <32 x half>* %out + ret void +} + +; +; fptrunc f64 -> f32 +; + +define <4 x float> @fptrunc_v4f64_v4f32(<4 x double>* %in) #0 { +; CHECK-LABEL: fptrunc_v4f64_v4f32: +; CHECK-COUNT-4: fcvt s{{[0-9]}}, d{{[0-9]}} +; CHECK-NOT: fcvt +; CHECK: ret + %a = load <4 x double>, <4 x double>* %in + %b = fptrunc <4 x double> %a to <4 x float> + ret <4 x float> %b +} + +define void @fptrunc_v8f64_v8f32(<8 x double>* %in, <8 x float>* %out) #0 { +; CHECK-LABEL: fptrunc_v8f64_v8f32: +; CHECK-COUNT-8: fcvt s{{[0-9]}}, d{{[0-9]}} +; CHECK-NOT: fcvt +; CHECK: ret + %a = load <8 x double>, <8 x double>* %in + %b = fptrunc <8 x double> %a to <8 x float> + store <8 x float> %b, <8 x float>* %out + ret void +} + +define void @fptrunc_v16f64_v16f32(<16 x double>* %in, <16 x float>* %out) #0 { +; CHECK-LABEL: fptrunc_v16f64_v16f32: +; CHECK-COUNT-16: fcvt s{{[0-9]}}, d{{[0-9]}} +; CHECK-NOT: fcvt +; CHECK: ret + %a = load <16 x double>, <16 x double>* %in + %b = fptrunc <16 x double> %a to <16 x float> + store <16 x float> %b, <16 x float>* %out + ret void +} + +define void @fptrunc_v32f64_v32f32(<32 x double>* %in, <32 x float>* %out) #0 { +; CHECK-LABEL: fptrunc_v32f64_v32f32: +; CHECK-COUNT-32: fcvt s{{[0-9]}}, d{{[0-9]}} +; CHECK-NOT: fcvt +; CHECK: ret + %a = load <32 x double>, <32 x double>* %in + %b = fptrunc <32 x double> %a to <32 x float> + store <32 x float> %b, <32 x float>* %out + ret void +} + +attributes #0 = { nounwind "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll index 45ebdc78784e4..9fe2b86402f15 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll @@ -13,7 +13,10 @@ ; RUN: llc -aarch64-sve-vector-bits-min=1664 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 ; RUN: llc -aarch64-sve-vector-bits-min=1792 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 ; RUN: llc -aarch64-sve-vector-bits-min=1920 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 -; RUN: llc -aarch64-sve-vector-bits-min=2048 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048 +; RUN: llc -aarch64-sve-vector-bits-min=2048 -aarch64-enable-atomic-cfg-tidy=false < %s 2>%t | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048 +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; WARN-NOT: warning ; Test we can code generater patterns of the form: ; fixed_length_vector = ISD::EXTRACT_SUBVECTOR scalable_vector, 0 @@ -85,4 +88,19 @@ bb1: ret void } +; +define <8 x i1> @no_warn_dropped_scalable(<8 x i32>* %in) #0 { +; CHECK-LABEL: no_warn_dropped_scalable: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0] +; CHECK-COUNT-8: cmp w{{[0-9]+}}, #0 +; CHECK: ret + %a = load <8 x i32>, <8 x i32>* %in + br label %bb1 + +bb1: + %cond = icmp sgt <8 x i32> %a, zeroinitializer + ret <8 x i1> %cond +} + attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll index e3c0ba72bda1f..891a5c144234d 100644 --- a/llvm/test/CodeGen/AArch64/sve-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp.ll @@ -2,10 +2,11 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning -define @fadd_h( %a, %b) { -; CHECK-LABEL: fadd_h: +define @fadd_nxv8f16( %a, %b) { +; CHECK-LABEL: fadd_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: fadd z0.h, z0.h, z1.h ; CHECK-NEXT: ret @@ -13,8 +14,28 @@ define @fadd_h( %a, % ret %res } -define @fadd_s( %a, %b) { -; CHECK-LABEL: fadd_s: +define @fadd_nxv4f16( %a, %b) { +; CHECK-LABEL: fadd_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %res = fadd %a, %b + ret %res +} + +define @fadd_nxv2f16( %a, %b) { +; CHECK-LABEL: fadd_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %res = fadd %a, %b + ret %res +} + +define @fadd_nxv4f32( %a, %b) { +; CHECK-LABEL: fadd_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fadd z0.s, z0.s, z1.s ; CHECK-NEXT: ret @@ -22,8 +43,18 @@ define @fadd_s( %a, %res } -define @fadd_d( %a, %b) { -; CHECK-LABEL: fadd_d: +define @fadd_nxv2f32( %a, %b) { +; CHECK-LABEL: fadd_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %res = fadd %a, %b + ret %res +} + +define @fadd_nxv2f64( %a, %b) { +; CHECK-LABEL: fadd_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: fadd z0.d, z0.d, z1.d ; CHECK-NEXT: ret @@ -31,8 +62,68 @@ define @fadd_d( %a, %res } -define @fsub_h( %a, %b) { -; CHECK-LABEL: fsub_h: +define @fdiv_nxv8f16( %a, %b) { +; CHECK-LABEL: fdiv_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %res = fdiv %a, %b + ret %res +} + +define @fdiv_nxv4f16( %a, %b) { +; CHECK-LABEL: fdiv_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %res = fdiv %a, %b + ret %res +} + +define @fdiv_nxv2f16( %a, %b) { +; CHECK-LABEL: fdiv_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %res = fdiv %a, %b + ret %res +} + +define @fdiv_nxv4f32( %a, %b) { +; CHECK-LABEL: fdiv_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %res = fdiv %a, %b + ret %res +} + +define @fdiv_nxv2f32( %a, %b) { +; CHECK-LABEL: fdiv_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %res = fdiv %a, %b + ret %res +} + +define @fdiv_nxv2f64( %a, %b) { +; CHECK-LABEL: fdiv_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %res = fdiv %a, %b + ret %res +} + +define @fsub_nxv8f16( %a, %b) { +; CHECK-LABEL: fsub_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: fsub z0.h, z0.h, z1.h ; CHECK-NEXT: ret @@ -40,8 +131,28 @@ define @fsub_h( %a, % ret %res } -define @fsub_s( %a, %b) { -; CHECK-LABEL: fsub_s: +define @fsub_nxv4f16( %a, %b) { +; CHECK-LABEL: fsub_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %res = fsub %a, %b + ret %res +} + +define @fsub_nxv2f16( %a, %b) { +; CHECK-LABEL: fsub_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %res = fsub %a, %b + ret %res +} + +define @fsub_nxv4f32( %a, %b) { +; CHECK-LABEL: fsub_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fsub z0.s, z0.s, z1.s ; CHECK-NEXT: ret @@ -49,8 +160,18 @@ define @fsub_s( %a, %res } -define @fsub_d( %a, %b) { -; CHECK-LABEL: fsub_d: +define @fsub_nxv2f32( %a, %b) { +; CHECK-LABEL: fsub_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %res = fsub %a, %b + ret %res +} + +define @fsub_nxv2f64( %a, %b) { +; CHECK-LABEL: fsub_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: fsub z0.d, z0.d, z1.d ; CHECK-NEXT: ret @@ -58,8 +179,8 @@ define @fsub_d( %a, %res } -define @fmul_h( %a, %b) { -; CHECK-LABEL: fmul_h: +define @fmul_nxv8f16( %a, %b) { +; CHECK-LABEL: fmul_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: fmul z0.h, z0.h, z1.h ; CHECK-NEXT: ret @@ -67,8 +188,28 @@ define @fmul_h( %a, % ret %res } -define @fmul_s( %a, %b) { -; CHECK-LABEL: fmul_s: +define @fmul_nxv4f16( %a, %b) { +; CHECK-LABEL: fmul_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %res = fmul %a, %b + ret %res +} + +define @fmul_nxv2f16( %a, %b) { +; CHECK-LABEL: fmul_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %res = fmul %a, %b + ret %res +} + +define @fmul_nxv4f32( %a, %b) { +; CHECK-LABEL: fmul_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmul z0.s, z0.s, z1.s ; CHECK-NEXT: ret @@ -76,8 +217,18 @@ define @fmul_s( %a, %res } -define @fmul_d( %a, %b) { -; CHECK-LABEL: fmul_d: +define @fmul_nxv2f32( %a, %b) { +; CHECK-LABEL: fmul_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %res = fmul %a, %b + ret %res +} + +define @fmul_nxv2f64( %a, %b) { +; CHECK-LABEL: fmul_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: fmul z0.d, z0.d, z1.d ; CHECK-NEXT: ret @@ -85,8 +236,8 @@ define @fmul_d( %a, %res } -define @fma_half( %a, %b, %c) { -; CHECK-LABEL: fma_half: +define @fma_nxv8f16( %a, %b, %c) { +; CHECK-LABEL: fma_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fmla z2.h, p0/m, z0.h, z1.h @@ -95,8 +246,31 @@ define @fma_half( %a, %r = call @llvm.fma.nxv8f16( %a, %b, %c) ret %r } -define @fma_float( %a, %b, %c) { -; CHECK-LABEL: fma_float: + +define @fma_nxv4f16( %a, %b, %c) { +; CHECK-LABEL: fma_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmla z2.h, p0/m, z0.h, z1.h +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %r = call @llvm.fma.nxv4f16( %a, %b, %c) + ret %r +} + +define @fma_nxv2f16( %a, %b, %c) { +; CHECK-LABEL: fma_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmla z2.h, p0/m, z0.h, z1.h +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %r = call @llvm.fma.nxv2f16( %a, %b, %c) + ret %r +} + +define @fma_nxv4f32( %a, %b, %c) { +; CHECK-LABEL: fma_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fmla z2.s, p0/m, z0.s, z1.s @@ -105,8 +279,20 @@ define @fma_float( %a, @llvm.fma.nxv4f32( %a, %b, %c) ret %r } -define @fma_double_1( %a, %b, %c) { -; CHECK-LABEL: fma_double_1: + +define @fma_nxv2f32( %a, %b, %c) { +; CHECK-LABEL: fma_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmla z2.s, p0/m, z0.s, z1.s +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %r = call @llvm.fma.nxv2f32( %a, %b, %c) + ret %r +} + +define @fma_nxv2f64_1( %a, %b, %c) { +; CHECK-LABEL: fma_nxv2f64_1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fmla z2.d, p0/m, z0.d, z1.d @@ -115,8 +301,9 @@ define @fma_double_1( %a, @llvm.fma.nxv2f64( %a, %b, %c) ret %r } -define @fma_double_2( %a, %b, %c) { -; CHECK-LABEL: fma_double_2: + +define @fma_nxv2f64_2( %a, %b, %c) { +; CHECK-LABEL: fma_nxv2f64_2: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fmla z2.d, p0/m, z1.d, z0.d @@ -125,8 +312,9 @@ define @fma_double_2( %a, @llvm.fma.nxv2f64( %b, %a, %c) ret %r } -define @fma_double_3( %a, %b, %c) { -; CHECK-LABEL: fma_double_3: + +define @fma_nxv2f64_3( %a, %b, %c) { +; CHECK-LABEL: fma_nxv2f64_3: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fmla z0.d, p0/m, z2.d, z1.d @@ -208,6 +396,18 @@ define void @scalar_to_vector(%complex* %outval, %pred, * %P1, * %P2) { +; CHECK-LABEL: float_copy: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: st1w { z0.s }, p0, [x1] +; CHECK-NEXT: ret + %A = load , * %P1, align 16 + store %A, * %P2, align 16 + ret void +} + declare @llvm.aarch64.sve.frecps.x.nxv8f16(, ) declare @llvm.aarch64.sve.frecps.x.nxv4f32( , ) declare @llvm.aarch64.sve.frecps.x.nxv2f64(, ) @@ -218,7 +418,10 @@ declare @llvm.aarch64.sve.frsqrts.x.nxv2f64( @llvm.fma.nxv2f64(, , ) declare @llvm.fma.nxv4f32(, , ) +declare @llvm.fma.nxv2f32(, , ) declare @llvm.fma.nxv8f16(, , ) +declare @llvm.fma.nxv4f16(, , ) +declare @llvm.fma.nxv2f16(, , ) ; Function Attrs: nounwind readnone declare double @llvm.aarch64.sve.faddv.nxv2f64(, ) #2 diff --git a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll index e9e34ada83d19..43e6adf1f94a6 100644 --- a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; Verify that DAG combine rules for LD1 + sext/zext don't apply when the diff --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll index 48fc3ccb48bbc..4230a7fa28716 100644 --- a/llvm/test/CodeGen/AArch64/sve-gep.ll +++ b/llvm/test/CodeGen/AArch64/sve-gep.ll @@ -2,6 +2,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define * @scalar_of_scalable_1(* %base) { diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll index e37fd8346780b..a2a4a8e1ba74c 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -2,6 +2,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @test_lane0_16xi8( %a) { diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll index b721cc7b00c55..2e9620f113fa0 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll index 35b6f52884189..e2bfff75b72bb 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @add_i8( %pg, %a, %b) { diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-int-arith.ll index a1b4f73d09ba2..d70e817085500 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @add_i64( %a, %b) { diff --git a/llvm/test/CodeGen/AArch64/sve-int-div-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-div-pred.ll index 36f83284d45f5..1b7f131f7928e 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-div-pred.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-div-pred.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @sdiv_i32( %pg, %a, %b) { diff --git a/llvm/test/CodeGen/AArch64/sve-int-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-imm.ll index 13c634e8fc1fc..3694a58a2dead 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-imm.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-int-log-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-log-imm.ll index 617b649a06b3a..13d98b526f2cd 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-log-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-log-imm.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll index 8d97844da2be8..b1e7eb982cb9e 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @and_pred_i8( %pg, %a, %b) { diff --git a/llvm/test/CodeGen/AArch64/sve-int-log.ll b/llvm/test/CodeGen/AArch64/sve-int-log.ll index 8c286ad4c8d5b..fa4cc3c212001 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-log.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-log.ll @@ -2,6 +2,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @and_d( %a, %b) { diff --git a/llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll index 4b6a9365b9d09..bcd66346d8f7a 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @mad_i8( %pg, %a, %b, %c) { diff --git a/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll index b6f665274f4e9..65f2ba3d8f4af 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @mul_i8( %pg, %a, %b) { diff --git a/llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll index 116e76f419dfd..360e22d4a777e 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define i64 @saddv_i8( %pg, %a) { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll b/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll index af3572a5c9e9b..191fddacffd1d 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll @@ -1,6 +1,7 @@ ; RUN: opt -S -sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck --check-prefix OPT %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define i1 @ptest_any1( %a) { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll index d225396b6e73a..723ffd8c17330 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll @@ -1,6 +1,7 @@ ; RUN: opt -S -sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck --check-prefix OPT %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @reinterpret_test_h( %a) { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-adr.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-adr.ll index 3262de23baf47..71bf76735f6a4 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-adr.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-adr.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll index 4c397c9db1577..11573a790c585 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll index 81d7fc8006162..7c1ebcb903251 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll index 40826c26af0d0..179a7f1e37471 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll index 8a60a2667c28e..640f8152d2768 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll index 2f2b8f5af3b17..218d9fcad52e0 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=1 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll index 788592c131bc8..9df1635e41533 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll index 5637a6982c2ab..eacd75655c28a 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll index 27aa5622160d4..339862ac0a711 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll index 6cfbddf031daf..73d4b6d1b1c7d 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll index 9e17b470037a2..3cfc7c984075f 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll index d591614b964ca..03fae3ff8fe0b 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll index 6534f32cfbb10..1f0828a7cda64 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll index e9ad9f12b5378..723a46251850a 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll index 1bcae7135fd36..e6f63284ee49e 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll index fff5290af8e32..64c0d2f581101 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll index 194ffe383340c..6ba496fae5371 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll index fee3edaa74457..0cadd2e029d52 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll index d1d1e3c55507a..5f5d0dda61c06 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll index b03e1a25f5bf6..0b7b009326cf8 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll index cf38473237340..0fc340eddeab7 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll index 3818c6178faad..64cb89edd679e 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll index 87580c92e710c..7cf641a264274 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll index 856d29aec7a48..f6afdbe2b5de5 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll index f877d24111da5..4fded79d9cc1a 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scalar-base-vector-indexes.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scalar-base-vector-indexes.ll index 7feba87aecc2c..7948daadf4346 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scalar-base-vector-indexes.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scalar-base-vector-indexes.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; PRFB , , [, .S, ] -> 32-bit indexes diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll index a9c2110aa2f2e..1b30a551f6d59 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; PRFB , , [.S{, #}] -> 32-bit element diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll index a9abad9f1fddf..4a323a2cfc40d 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; PRFB , , [.S{, #}] -> 32-bit element, imm = 0, 1, ..., 31 diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll index 8b70778e867dd..99e79a6408cc0 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll index 18d7f3515756a..0a6842921cbe3 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; SMAX diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll index 50bb76d2ffb9e..5b0d2faf923e2 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll index bc73bd49b0fd1..8a5d669e4241e 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll index b34f8f4269baf..b20c6a8bd9c51 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll index 2572262110848..80edb71cc50d1 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll index 2aaf222504a70..7d60df04493cd 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll index e66b84a741033..62b710016af63 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll index e5d200945098c..a594686c8918f 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll @@ -2,6 +2,7 @@ ; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll index 1a4a25c83b347..f5607f91a3e44 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+f64mm,+bf16 -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro.ll index 94634f47cfa36..27ae95cced19e 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+f64mm -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll index e56ebcbde8e6a..951c80ac8e9d4 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll index 31f5c6797bbc5..be1c03a754fee 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; Range testing for the immediate in the reg+imm(mulvl) addressing diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll index 5ff497c20ef5c..0c8031d1852ad 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll index deca4dee84179..0c56dac0276c0 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp32.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp32.ll index 44cba612adba3..37433ab7329c6 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp32.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp32.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+f32mm -asm-verbose=0 < %s -o - 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @fmmla_s( %r, %a, %b) nounwind { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp64.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp64.ll index 024e1b7bc3ce9..caf406f2a5de0 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp64.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp64.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+f64mm -asm-verbose=0 < %s -o - 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @fmmla_d( %r, %a, %b) nounwind { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll index 32627f79b8b21..dfdfb5c7a7081 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+i8mm -asm-verbose=0 < %s -o - 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @smmla( %r, %a, %b) nounwind { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll index 433a1cdfd8e91..b248d209f44aa 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll index 22d4bbe73bcdb..b8cb4c3bf18b4 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll index 49b333460f01a..b291c6e5b2577 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll index ad615d76e903b..e1be2541c1152 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll index 3c1b8cbcd46b7..d0f1ab9255fc5 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll index 64987847eb7a9..ae3fa5c419ba0 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll index 9f679aa6dc4f8..8e0fb70875a0a 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll index 26ebf38cf85cd..75e0ff37da1c7 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll index 5b2612d8fa39b..2bf2848a64bb0 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll index c0000a3d36780..8b40e5d76556f 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll index 862b6fb2c0a91..3f3cbbb92f2d2 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll index 7b632c8be727d..34625365fe1a8 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll index cd441e69efece..bdc64f62ff601 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll index 28341cb522b85..df0ea818f1dcb 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll index 615caf18d3286..8044c2c3e0f77 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll index dd884d5577f09..d160ae03864c0 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll index d9b193382bc3f..50bbbd4fed4a3 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; Since SQDEC{B|H|W|D|P} and SQINC{B|H|W|D|P} have identical semantics, the tests for diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll index 98362836dd0bc..8801e1cd7aea2 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; Since SQDEC{B|H|W|D|P} and SQINC{B|H|W|D|P} have identical semantics, the tests for diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll index e4e0ed8738f4f..3eb838ab91b38 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll index 1af4ce746c8a3..72d1f35cfe0f1 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll index c541415f0c3bf..54c35adc056b1 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll @@ -2,6 +2,7 @@ ; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll index 8d8743eb58c87..1da557f5ea47d 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; NOTE: invalid, upper and lower bound immediate values of the reg+imm diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll index 98967e3418bb9..d64d1dd19bd4f 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll index bda72ab2ab3a1..d26ab2980cccc 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll index b1ef24ddbe655..716533155c6f6 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; Since UQDEC{B|H|W|D|P} and UQINC{B|H|W|D|P} have identical semantics, the tests for diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll index f774e2fc7a58d..a0d91ff82f4b7 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; Since UQDEC{B|H|W|D|P} and UQINC{B|H|W|D|P} have identical semantics, the tests for diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll index 715d68f69217b..78f2a7f5ed92e 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll index 13bd864c1f23f..abb7513b2a587 100644 --- a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll @@ -2,6 +2,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; LD1B diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll index c709cfa82373e..f5047a7bcbaff 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll index c4c94aa7a3af1..667798754d9e1 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll index 8f86aee14dbe0..b92aa45fc01cc 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll index 106f426fcf4a4..a1a12c6c11c74 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-imm.ll index 2a6c1ec532f9b..6065dbdd2765e 100644 --- a/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-imm.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; Range checks: for all the instruction tested in this file, the diff --git a/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-reg.ll index d3bd88a1f9d42..03a0ce77fe6d5 100644 --- a/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-reg.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; 2-lane contiguous load/stores diff --git a/llvm/test/CodeGen/AArch64/sve-pred-log.ll b/llvm/test/CodeGen/AArch64/sve-pred-log.ll index 1411900130607..c25fbdceccb46 100644 --- a/llvm/test/CodeGen/AArch64/sve-pred-log.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-log.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @vselect_16( %Pg, %Pn, %Pd) { diff --git a/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll index 91c252585c662..6917d1d549ab4 100644 --- a/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; Range checks: for all the instruction tested in this file, the diff --git a/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-reg.ll index b78dfa41eb098..e066acef2c5a2 100644 --- a/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-reg.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; 2-lane non-temporal load/stores diff --git a/llvm/test/CodeGen/AArch64/sve-select.ll b/llvm/test/CodeGen/AArch64/sve-select.ll index fccb0dc11b04c..dccfd0afd0276 100644 --- a/llvm/test/CodeGen/AArch64/sve-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-select.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; Integer vector select diff --git a/llvm/test/CodeGen/AArch64/sve-setcc.ll b/llvm/test/CodeGen/AArch64/sve-setcc.ll index 1b0865b1bc3ed..3dbe0eb422832 100644 --- a/llvm/test/CodeGen/AArch64/sve-setcc.ll +++ b/llvm/test/CodeGen/AArch64/sve-setcc.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; Ensure we use the inverted CC result of SVE compare instructions when branching. diff --git a/llvm/test/CodeGen/AArch64/sve-split-load.ll b/llvm/test/CodeGen/AArch64/sve-split-load.ll index a76b27e635574..5e64ff9812860 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-load.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-load.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; LOAD +; UNPREDICATED -define @load_promote_4i8(* %a) { -; CHECK-LABEL: load_promote_4i8: +define @load_promote_4i16(* %a) { +; CHECK-LABEL: load_promote_4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] @@ -53,3 +53,82 @@ define @load_split_16i64(* %a) { %load = load , * %a ret %load } + +; MASKED + +define @masked_load_promote_2i32( *%a, %pg) { +; CHECK-LABEL: masked_load_promote_2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv2i32( *%a, i32 1, %pg, undef) + ret %load +} + +define @masked_load_split_32i8( *%a, %pg) { +; CHECK-LABEL: masked_load_split_32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] +; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0, #1, mul vl] +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv32i8( *%a, i32 1, %pg, undef) + ret %load +} + +define @masked_load_split_32i16( *%a, %pg) { +; CHECK-LABEL: masked_load_split_32i16: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p2.b +; CHECK-NEXT: zip1 p3.b, p0.b, p2.b +; CHECK-NEXT: zip2 p0.b, p0.b, p2.b +; CHECK-NEXT: ld1h { z0.h }, p3/z, [x0] +; CHECK-NEXT: zip1 p3.b, p1.b, p2.b +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: zip2 p0.b, p1.b, p2.b +; CHECK-NEXT: ld1h { z2.h }, p3/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1h { z3.h }, p0/z, [x0, #3, mul vl] +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv32i16( *%a, i32 1, %pg, undef) + ret %load +} + +define @masked_load_split_8i32( *%a, %pg) { +; CHECK-LABEL: masked_load_split_8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip1 p2.h, p0.h, p1.h +; CHECK-NEXT: zip2 p0.h, p0.h, p1.h +; CHECK-NEXT: ld1w { z0.s }, p2/z, [x0] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv8i32( *%a, i32 1, %pg, undef) + ret %load +} + +define @masked_load_split_8i64( *%a, %pg) { +; CHECK-LABEL: masked_load_split_8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip1 p2.h, p0.h, p1.h +; CHECK-NEXT: zip2 p0.h, p0.h, p1.h +; CHECK-NEXT: zip1 p3.s, p2.s, p1.s +; CHECK-NEXT: zip2 p2.s, p2.s, p1.s +; CHECK-NEXT: ld1d { z0.d }, p3/z, [x0] +; CHECK-NEXT: ld1d { z1.d }, p2/z, [x0, #1, mul vl] +; CHECK-NEXT: zip1 p2.s, p0.s, p1.s +; CHECK-NEXT: zip2 p0.s, p0.s, p1.s +; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #3, mul vl] +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv8i64( *%a, i32 1, %pg, undef) + ret %load +} + +declare @llvm.masked.load.nxv32i8(*, i32, , ) + +declare @llvm.masked.load.nxv32i16(*, i32, , ) + +declare @llvm.masked.load.nxv2i32(*, i32, , ) +declare @llvm.masked.load.nxv8i32(*, i32, , ) + +declare @llvm.masked.load.nxv8i64(*, i32, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-split-store.ll b/llvm/test/CodeGen/AArch64/sve-split-store.ll index 2fba0404ef348..a3a9b8b53ec70 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-store.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-store.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; UNPREDICATED + define void @store_promote_4i8( %data, * %a) { ; CHECK-LABEL: store_promote_4i8: ; CHECK: // %bb.0: @@ -51,3 +53,82 @@ define void @store_split_16i64( %data, * % store %data, * %a ret void } + +; MASKED + +define void @masked_store_promote_2i8( %data, *%a, %pg) { +; CHECK-LABEL: masked_store_promote_2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: st1b { z0.d }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv2i8( %data, *%a, i32 1, %pg) + ret void +} + +define void @masked_store_split_32i8( %data, *%a, %pg) { +; CHECK-LABEL: masked_store_split_32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: st1b { z1.b }, p1, [x0, #1, mul vl] +; CHECK-NEXT: st1b { z0.b }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv32i8( %data, *%a, i32 1, %pg) + ret void +} + +define void @masked_store_split_32i16( %data, *%a, %pg) { +; CHECK-LABEL: masked_store_split_32i16: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p2.b +; CHECK-NEXT: zip2 p3.b, p1.b, p2.b +; CHECK-NEXT: zip1 p1.b, p1.b, p2.b +; CHECK-NEXT: st1h { z3.h }, p3, [x0, #3, mul vl] +; CHECK-NEXT: zip2 p3.b, p0.b, p2.b +; CHECK-NEXT: zip1 p0.b, p0.b, p2.b +; CHECK-NEXT: st1h { z2.h }, p1, [x0, #2, mul vl] +; CHECK-NEXT: st1h { z1.h }, p3, [x0, #1, mul vl] +; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv32i16( %data, *%a, i32 1, %pg) + ret void +} + +define void @masked_store_split_8i32( %data, *%a, %pg) { +; CHECK-LABEL: masked_store_split_8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip2 p2.h, p0.h, p1.h +; CHECK-NEXT: zip1 p0.h, p0.h, p1.h +; CHECK-NEXT: st1w { z1.s }, p2, [x0, #1, mul vl] +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv8i32( %data, *%a, i32 1, %pg) + ret void +} + +define void @masked_store_split_8i64( %data, *%a, %pg) { +; CHECK-LABEL: masked_store_split_8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip2 p2.h, p0.h, p1.h +; CHECK-NEXT: zip1 p0.h, p0.h, p1.h +; CHECK-NEXT: zip2 p3.s, p2.s, p1.s +; CHECK-NEXT: zip1 p2.s, p2.s, p1.s +; CHECK-NEXT: st1d { z2.d }, p2, [x0, #2, mul vl] +; CHECK-NEXT: zip2 p2.s, p0.s, p1.s +; CHECK-NEXT: zip1 p0.s, p0.s, p1.s +; CHECK-NEXT: st1d { z3.d }, p3, [x0, #3, mul vl] +; CHECK-NEXT: st1d { z1.d }, p2, [x0, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv8i64( %data, *%a, i32 1, %pg) + ret void +} + +declare void @llvm.masked.store.nxv2i8(, *, i32, ) +declare void @llvm.masked.store.nxv32i8(, *, i32, ) + +declare void @llvm.masked.store.nxv32i16(, *, i32, ) + +declare void @llvm.masked.store.nxv8i32(, *, i32, ) + +declare void @llvm.masked.store.nxv8i64(, *, i32, ) diff --git a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll index 2e4f190145454..1d81a586826f1 100644 --- a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll @@ -2,6 +2,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; ST1B diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll index 50ce4d9660879..876003a3962c6 100644 --- a/llvm/test/CodeGen/AArch64/sve-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll @@ -2,6 +2,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; For all the functions below should the operation is a nop diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll index cd7ecbeb5ca13..7a53e5879c908 100644 --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ;; Splats of legal integer vector types diff --git a/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll b/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll index 2084b6a23adb8..4df6c16b71f14 100644 --- a/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t |FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning declare i32 @llvm.vscale.i32() diff --git a/llvm/test/CodeGen/AArch64/sve-vscale.ll b/llvm/test/CodeGen/AArch64/sve-vscale.ll index aed8f223c4715..61bafbd11a2aa 100644 --- a/llvm/test/CodeGen/AArch64/sve-vscale.ll +++ b/llvm/test/CodeGen/AArch64/sve-vscale.ll @@ -2,6 +2,7 @@ ; RUN: opt -mtriple=aarch64 -codegenprepare -S < %s | llc -mtriple=aarch64 -mattr=+sve -asm-verbose=0 | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; diff --git a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll index 4cb2cb2c43277..2598734392720 100644 --- a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll @@ -2,6 +2,7 @@ ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning define @sel_8_positive( %p) { diff --git a/llvm/test/CodeGen/AArch64/sve-zeroinit.ll b/llvm/test/CodeGen/AArch64/sve-zeroinit.ll index 56ac0111f42b8..de2e4885f7883 100644 --- a/llvm/test/CodeGen/AArch64/sve-zeroinit.ll +++ b/llvm/test/CodeGen/AArch64/sve-zeroinit.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll index ff0de0d1f6090..c815220ef97d9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll @@ -1,294 +1,182 @@ -; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=regbankselect -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=regbankselect -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s ; Make sure we don't violate the constant bus restriction -; FIXME: Make this test isa output when div.fmas works. - define amdgpu_ps float @fmul_s_s(float inreg %src0, float inreg %src1) { - ; GFX9-LABEL: name: fmul_s_s - ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3 - ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY3]] - ; GFX9: $vgpr0 = COPY [[FMUL]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 - ; GFX10-LABEL: name: fmul_s_s - ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3 - ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX10: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY3]] - ; GFX10: $vgpr0 = COPY [[FMUL]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 +; GFX9-LABEL: fmul_s_s: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-NEXT: v_mul_f32_e32 v0, s2, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: fmul_s_s: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_mul_f32_e64 v0, s2, s3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog %result = fmul float %src0, %src1 ret float %result } define amdgpu_ps float @fmul_ss(float inreg %src) { - ; GFX9-LABEL: name: fmul_ss - ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2 - ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY1]], [[COPY2]] - ; GFX9: $vgpr0 = COPY [[FMUL]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 - ; GFX10-LABEL: name: fmul_ss - ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2 - ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY1]], [[COPY2]] - ; GFX10: $vgpr0 = COPY [[FMUL]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 +; GFX9-LABEL: fmul_ss: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mul_f32_e64 v0, s2, s2 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: fmul_ss: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_mul_f32_e64 v0, s2, s2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog %result = fmul float %src, %src ret float %result } ; Ternary operation with 3 different SGPRs define amdgpu_ps float @fma_s_s_s(float inreg %src0, float inreg %src1, float inreg %src2) { - ; GFX9-LABEL: name: fma_s_s_s - ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4 - ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GFX9: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY5]] - ; GFX9: $vgpr0 = COPY [[FMA]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 - ; GFX10-LABEL: name: fma_s_s_s - ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4 - ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GFX10: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY5]] - ; GFX10: $vgpr0 = COPY [[FMA]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 +; GFX9-LABEL: fma_s_s_s: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_fma_f32 v0, s2, v0, v1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: fma_s_s_s: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_fma_f32 v0, s3, s2, v0 +; GFX10-NEXT: ; return to shader part epilog %result = call float @llvm.fma.f32(float %src0, float %src1, float %src2) ret float %result } ; Ternary operation with 3 identical SGPRs define amdgpu_ps float @fma_sss(float inreg %src) { - ; GFX9-LABEL: name: fma_sss - ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2 - ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY1]], [[COPY2]], [[COPY3]] - ; GFX9: $vgpr0 = COPY [[FMA]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 - ; GFX10-LABEL: name: fma_sss - ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2 - ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY1]], [[COPY2]], [[COPY3]] - ; GFX10: $vgpr0 = COPY [[FMA]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 +; GFX9-LABEL: fma_sss: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_fma_f32 v0, s2, s2, s2 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: fma_sss: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_fma_f32 v0, s2, s2, s2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog %result = call float @llvm.fma.f32(float %src, float %src, float %src) ret float %result } ; src0/1 are same SGPR define amdgpu_ps float @fma_ss_s(float inreg %src01, float inreg %src2) { - ; GFX9-LABEL: name: fma_ss_s - ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3 - ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]] - ; GFX9: $vgpr0 = COPY [[FMA]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 - ; GFX10-LABEL: name: fma_ss_s - ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3 - ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX10: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]] - ; GFX10: $vgpr0 = COPY [[FMA]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 +; GFX9-LABEL: fma_ss_s: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-NEXT: v_fma_f32 v0, s2, s2, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: fma_ss_s: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_fma_f32 v0, s2, s2, s3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog %result = call float @llvm.fma.f32(float %src01, float %src01, float %src2) ret float %result } ; src1/2 are same SGPR define amdgpu_ps float @fma_s_ss(float inreg %src0, float inreg %src12) { - ; GFX9-LABEL: name: fma_s_ss - ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3 - ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]] - ; GFX9: $vgpr0 = COPY [[FMA]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 - ; GFX10-LABEL: name: fma_s_ss - ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3 - ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX10: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]] - ; GFX10: $vgpr0 = COPY [[FMA]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 +; GFX9-LABEL: fma_s_ss: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-NEXT: v_fma_f32 v0, s2, v0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: fma_s_ss: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_fma_f32 v0, s2, s3, s3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog %result = call float @llvm.fma.f32(float %src0, float %src12, float %src12) ret float %result } ; src0/2 are same SGPR define amdgpu_ps float @fma_ss_s_same_outer(float inreg %src02, float inreg %src1) { - ; GFX9-LABEL: name: fma_ss_s_same_outer - ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3 - ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]] - ; GFX9: $vgpr0 = COPY [[FMA]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 - ; GFX10-LABEL: name: fma_ss_s_same_outer - ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3 - ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]] - ; GFX10: $vgpr0 = COPY [[FMA]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 +; GFX9-LABEL: fma_ss_s_same_outer: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-NEXT: v_fma_f32 v0, s2, v0, s2 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: fma_ss_s_same_outer: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_fma_f32 v0, s2, s3, s2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog %result = call float @llvm.fma.f32(float %src02, float %src1, float %src02) ret float %result } define amdgpu_ps float @fcmp_s_s(float inreg %src0, float inreg %src1) { - ; GFX9-LABEL: name: fcmp_s_s - ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3 - ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY2]] - ; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY3]], [[COPY4]] - ; GFX9: $vgpr0 = COPY [[SELECT]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 - ; GFX10-LABEL: name: fcmp_s_s - ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3 - ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX10: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY2]] - ; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX10: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY3]], [[COPY4]] - ; GFX10: $vgpr0 = COPY [[SELECT]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 +; GFX9-LABEL: fcmp_s_s: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, s2, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: fcmp_s_s: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_cmp_eq_f32_e64 s0, s2, s3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0 +; GFX10-NEXT: ; return to shader part epilog %cmp = fcmp oeq float %src0, %src1 %result = select i1 %cmp, float 1.0, float 0.0 ret float %result } define amdgpu_ps float @select_vcc_s_s(float %cmp0, float %cmp1, float inreg %src0, float inreg %src1) { - ; GFX9-LABEL: name: select_vcc_s_s - ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]] - ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32) - ; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]] - ; GFX9: $vgpr0 = COPY [[SELECT]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 - ; GFX10-LABEL: name: select_vcc_s_s - ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1 - ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX10: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]] - ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32) - ; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]] - ; GFX10: $vgpr0 = COPY [[SELECT]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 +; GFX9-LABEL: select_vcc_s_s: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: select_vcc_s_s: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_mov_b32_e32 v2, s3 +; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, s2, vcc_lo +; GFX10-NEXT: ; return to shader part epilog %cmp = fcmp oeq float %cmp0, %cmp1 %result = select i1 %cmp, float %src0, float %src1 ret float %result } define amdgpu_ps float @select_vcc_fneg_s_s(float %cmp0, float %cmp1, float inreg %src0, float inreg %src1) { - ; GFX9-LABEL: name: select_vcc_fneg_s_s - ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]] - ; GFX9: [[FNEG:%[0-9]+]]:sgpr(s32) = G_FNEG [[COPY2]] - ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[FNEG]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32) - ; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]] - ; GFX9: $vgpr0 = COPY [[SELECT]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 - ; GFX10-LABEL: name: select_vcc_fneg_s_s - ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1 - ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX10: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]] - ; GFX10: [[FNEG:%[0-9]+]]:sgpr(s32) = G_FNEG [[COPY2]] - ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[FNEG]](s32) - ; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32) - ; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]] - ; GFX10: $vgpr0 = COPY [[SELECT]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 +; GFX9-LABEL: select_vcc_fneg_s_s: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v2, s3 +; GFX9-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, -v3, vcc +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: select_vcc_fneg_s_s: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_mov_b32_e32 v2, s2 +; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_cndmask_b32_e64 v0, s3, -v2, vcc_lo +; GFX10-NEXT: ; return to shader part epilog %cmp = fcmp oeq float %cmp0, %cmp1 %neg.src0 = fneg float %src0 %result = select i1 %cmp, float %neg.src0, float %src1 @@ -297,122 +185,73 @@ define amdgpu_ps float @select_vcc_fneg_s_s(float %cmp0, float %cmp1, float inre ; Constant bus used by vcc define amdgpu_ps float @amdgcn_div_fmas_sss(float inreg %src, float %cmp.src) { - ; GFX9-LABEL: name: amdgcn_div_fmas_sss - ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $vgpr0 - ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX9: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY1]](s32), [[COPY2]] - ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[FCMP]](s1) - ; GFX9: $vgpr0 = COPY [[INT]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 - ; GFX10-LABEL: name: amdgcn_div_fmas_sss - ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $vgpr0 - ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX10: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY1]](s32), [[COPY2]] - ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[FCMP]](s1) - ; GFX10: $vgpr0 = COPY [[INT]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 +; GFX9-LABEL: amdgcn_div_fmas_sss: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: s_nop 2 +; GFX9-NEXT: v_div_fmas_f32 v0, v0, v0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: amdgcn_div_fmas_sss: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0 +; GFX10-NEXT: v_div_fmas_f32 v0, s2, s2, s2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog %vcc = fcmp oeq float %cmp.src, 0.0 %result = call float @llvm.amdgcn.div.fmas.f32(float %src, float %src, float %src, i1 %vcc) ret float %result } define amdgpu_ps float @class_s_s(float inreg %src0, i32 inreg %src1) { - ; GFX9-LABEL: name: class_s_s - ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3 - ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[INT]](s1), [[COPY4]], [[COPY5]] - ; GFX9: $vgpr0 = COPY [[SELECT]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 - ; GFX10-LABEL: name: class_s_s - ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3 - ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX10: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX10: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[INT]](s1), [[COPY4]], [[COPY5]] - ; GFX10: $vgpr0 = COPY [[SELECT]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 +; GFX9-LABEL: class_s_s: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: class_s_s: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_cmp_class_f32_e64 s0, s2, s3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0 +; GFX10-NEXT: ; return to shader part epilog %class = call i1 @llvm.amdgcn.class.f32(float %src0, i32 %src1) %result = select i1 %class, float 1.0, float 0.0 ret float %result } define amdgpu_ps float @div_scale_s_s_true(float inreg %src0, float inreg %src1) { - ; GFX9-LABEL: name: div_scale_s_s_true - ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3 - ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), -1 - ; GFX9: $vgpr0 = COPY [[INT]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 - ; GFX10-LABEL: name: div_scale_s_s_true - ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3 - ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX10: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), -1 - ; GFX10: $vgpr0 = COPY [[INT]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 +; GFX9-LABEL: div_scale_s_s_true: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-NEXT: v_div_scale_f32 v0, s[0:1], s2, v0, s2 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: div_scale_s_s_true: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_div_scale_f32 v0, s0, s2, s3, s2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 true) %result = extractvalue { float, i1 } %div.scale, 0 ret float %result } define amdgpu_ps float @div_scale_s_s_false(float inreg %src0, float inreg %src1) { - ; GFX9-LABEL: name: div_scale_s_s_false - ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3 - ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), 0 - ; GFX9: $vgpr0 = COPY [[INT]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 - ; GFX10-LABEL: name: div_scale_s_s_false - ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3 - ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX10: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), 0 - ; GFX10: $vgpr0 = COPY [[INT]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 +; GFX9-LABEL: div_scale_s_s_false: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-NEXT: v_div_scale_f32 v0, s[0:1], v0, v0, s2 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: div_scale_s_s_false: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_div_scale_f32 v0, s0, s3, s3, s2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 false) %result = extractvalue { float, i1 } %div.scale, 0 ret float %result diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll index 1622edace5b22..d5d991288ccee 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll @@ -590,21 +590,21 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1) ; SI-NEXT: buffer_load_ubyte v3, v[0:1], s[0:3], 0 addr64 offset:1 ; SI-NEXT: buffer_load_ubyte v4, v[0:1], s[0:3], 0 addr64 offset:2 ; SI-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64 offset:3 -; SI-NEXT: s_movk_i32 s6, 0xff +; SI-NEXT: s_movk_i32 s0, 0xff ; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_mov_b64 s[6:7], s[2:3] ; SI-NEXT: s_waitcnt vmcnt(3) -; SI-NEXT: v_and_b32_e32 v1, s6, v2 +; SI-NEXT: v_and_b32_e32 v1, s0, v2 ; SI-NEXT: s_waitcnt vmcnt(2) -; SI-NEXT: v_and_b32_e32 v2, s6, v3 +; SI-NEXT: v_and_b32_e32 v2, s0, v3 ; SI-NEXT: s_waitcnt vmcnt(1) -; SI-NEXT: v_and_b32_e32 v3, s6, v4 +; SI-NEXT: v_and_b32_e32 v3, s0, v4 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_and_b32_e32 v4, s6, v0 +; SI-NEXT: v_and_b32_e32 v4, s0, v0 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v1 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v2 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v2, v3 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v3, v4 -; SI-NEXT: s_mov_b64 s[6:7], s[2:3] ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; @@ -839,21 +839,21 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* no ; SI-NEXT: buffer_load_ubyte v3, v[0:1], s[0:3], 0 addr64 offset:1 ; SI-NEXT: buffer_load_ubyte v4, v[0:1], s[0:3], 0 addr64 offset:2 ; SI-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64 offset:3 -; SI-NEXT: s_movk_i32 s6, 0xff +; SI-NEXT: s_movk_i32 s0, 0xff ; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_mov_b64 s[6:7], s[2:3] ; SI-NEXT: s_waitcnt vmcnt(3) -; SI-NEXT: v_and_b32_e32 v1, s6, v2 +; SI-NEXT: v_and_b32_e32 v1, s0, v2 ; SI-NEXT: s_waitcnt vmcnt(2) -; SI-NEXT: v_and_b32_e32 v2, s6, v3 +; SI-NEXT: v_and_b32_e32 v2, s0, v3 ; SI-NEXT: s_waitcnt vmcnt(1) -; SI-NEXT: v_and_b32_e32 v3, s6, v4 +; SI-NEXT: v_and_b32_e32 v3, s0, v4 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_and_b32_e32 v4, s6, v0 +; SI-NEXT: v_and_b32_e32 v4, s0, v0 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v1 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v2 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v2, v3 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v3, v4 -; SI-NEXT: s_mov_b64 s[6:7], s[2:3] ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll index 9c47fab05aa05..2695952bfd193 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -141,18 +141,18 @@ define void @constrained_if_register_class() { ; CHECK-NEXT: s_cmp_lg_u32 s4, 0 ; CHECK-NEXT: s_cbranch_scc0 BB4_6 ; CHECK-NEXT: ; %bb.1: ; %bb2 -; CHECK-NEXT: s_getpc_b64 s[6:7] -; CHECK-NEXT: s_add_u32 s6, s6, const.ptr@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s7, s7, const.ptr@gotpcrel32@hi+4 -; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 -; CHECK-NEXT: s_mov_b32 s4, -1 +; CHECK-NEXT: s_getpc_b64 s[4:5] +; CHECK-NEXT: s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4 +; CHECK-NEXT: s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+4 +; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, 1 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 +; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v0, s6 -; CHECK-NEXT: v_mov_b32_e32 v1, s7 +; CHECK-NEXT: v_mov_b32_e32 v0, s4 +; CHECK-NEXT: v_mov_b32_e32 v1, s5 ; CHECK-NEXT: flat_load_dword v0, v[0:1] -; CHECK-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, 1 +; CHECK-NEXT: s_mov_b32 s4, -1 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0 ; CHECK-NEXT: s_xor_b64 s[8:9], vcc, s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir index a9e1124d10266..e4bd1b43e880f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir @@ -70,7 +70,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: %src0:vgpr_32 = COPY $vgpr0 ; GFX6: %zero:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX6: %ineg:vgpr_32, dead %4:sreg_64_xexec = V_SUB_I32_e64 %zero, %src0, 0, implicit $exec + ; GFX6: %ineg:vgpr_32, dead %4:sreg_64_xexec = V_SUB_CO_U32_e64 %zero, %src0, 0, implicit $exec ; GFX6: %smax:vgpr_32 = V_MAX_I32_e64 %src0, %ineg, implicit $exec ; GFX6: S_ENDPGM 0, implicit %smax ; GFX9-LABEL: name: smax_neg_abs_pattern_s32_vv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir index 79c9f98880335..51a116a944ad6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir @@ -18,9 +18,9 @@ body: | ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec - ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_I32_]], %7, 0, implicit $exec - ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_I32_e64 %8, [[COPY2]], 0, implicit $exec + ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec + ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_I32_]], %7, 0, implicit $exec + ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_CO_U32_e64 %8, [[COPY2]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit %7, implicit %8, implicit %9 ; GFX9-LABEL: name: add_s32 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 @@ -95,7 +95,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967232, implicit $exec - ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit %2 ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_v ; GFX9: liveins: $vgpr0 @@ -152,7 +152,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec - ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit %2 ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_v ; GFX9: liveins: $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir index f59fe2b87aaea..eaafe1285a303 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir @@ -69,9 +69,9 @@ body: | ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 @@ -96,9 +96,9 @@ body: | ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 @@ -177,9 +177,9 @@ body: | ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 @@ -204,9 +204,9 @@ body: | ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 @@ -242,9 +242,9 @@ body: | ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 @@ -260,9 +260,9 @@ body: | ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 @@ -279,9 +279,9 @@ body: | ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir index ef9ee940bcd69..ff80f873ff0af 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir @@ -135,9 +135,9 @@ body: | ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7-FLAT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 @@ -153,9 +153,9 @@ body: | ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 @@ -315,9 +315,9 @@ body: | ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7-FLAT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 @@ -333,9 +333,9 @@ body: | ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 @@ -388,9 +388,9 @@ body: | ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX6: %18:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %18, %subreg.sub1 + ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX6: %18:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %18, %subreg.sub1 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -412,9 +412,9 @@ body: | ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7: %18:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %18, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX7: %18:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %18, %subreg.sub1 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -436,9 +436,9 @@ body: | ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7-FLAT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 @@ -454,9 +454,9 @@ body: | ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir index b134008cfa591..bf4db71346f31 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir @@ -61,7 +61,7 @@ body: | ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX6: %4:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %4:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: $m0 = S_MOV_B32 -1 ; GFX6: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 %4, [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) ; GFX6: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir index 94f0d2ba9e1e7..2258c1bf308c1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir @@ -90,9 +90,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047 @@ -113,9 +113,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 @@ -147,9 +147,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -168,9 +168,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -200,9 +200,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048 @@ -223,9 +223,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 @@ -257,9 +257,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -278,9 +278,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -310,9 +310,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095 @@ -333,9 +333,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 @@ -367,9 +367,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -388,9 +388,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -420,9 +420,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097 @@ -436,9 +436,9 @@ body: | ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097 @@ -453,9 +453,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 @@ -487,9 +487,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -502,9 +502,9 @@ body: | ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -518,9 +518,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -616,9 +616,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095 @@ -639,9 +639,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 @@ -673,9 +673,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) ; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -694,9 +694,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir index a1c853f7e5e91..e232e5032c534 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir @@ -123,9 +123,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047 @@ -180,9 +180,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -234,9 +234,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048 @@ -257,9 +257,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -301,9 +301,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -322,9 +322,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -365,9 +365,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095 @@ -388,9 +388,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -432,9 +432,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -453,9 +453,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -497,9 +497,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097 @@ -513,9 +513,9 @@ body: | ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097 @@ -530,9 +530,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -575,9 +575,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -590,9 +590,9 @@ body: | ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -606,9 +606,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -736,9 +736,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095 @@ -759,9 +759,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -803,9 +803,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -824,9 +824,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir index 0c922c04c9bae..16e01429f68c4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir @@ -56,7 +56,7 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX6: %3:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %3:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: $m0 = S_MOV_B32 -1 ; GFX6: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 %3, [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) ; GFX6: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir new file mode 100644 index 0000000000000..1fd95b5b7947a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir @@ -0,0 +1,744 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=instruction-select %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=instruction-select %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select %s -o - | FileCheck -check-prefix=GFX10 %s + +--- +name: test_freeze_s1_vgpr_to_vgpr +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: test_freeze_s1_vgpr_to_vgpr + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $vgpr0 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_s1_vgpr_to_vgpr + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: $vgpr0 = COPY [[COPY]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s1) = G_TRUNC %0(s32) + %2:vgpr(s1) = G_FREEZE %1 + %3:vgpr(s32) = G_ANYEXT %2(s1) + $vgpr0 = COPY %3(s32) + +... + +--- +name: test_freeze_s1_vgpr_to_agpr +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: test_freeze_s1_vgpr_to_agpr + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $agpr0 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_s1_vgpr_to_agpr + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: $agpr0 = COPY [[COPY]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s1) = G_TRUNC %0(s32) + %2:vgpr(s1) = G_FREEZE %1 + %3:vgpr(s32) = G_ANYEXT %2(s1) + $agpr0 = COPY %3(s32) + +... + +--- +name: test_freeze_s1_vcc +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: test_freeze_s1_vcc + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_EQ_U32_e64_]] + ; GFX6: S_ENDPGM 0, implicit [[COPY2]] + ; GFX10-LABEL: name: test_freeze_s1_vcc + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX10: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[V_CMP_EQ_U32_e64_]] + ; GFX10: S_ENDPGM 0, implicit [[COPY2]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vcc(s1) = G_ICMP intpred(eq), %0(s32), %1 + %3:vcc(s1) = G_FREEZE %2 + S_ENDPGM 0, implicit %3(s1) + +... + +--- +name: test_freeze_s16_vgpr_to_vgpr +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: test_freeze_s16_vgpr_to_vgpr + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $vgpr0 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_s16_vgpr_to_vgpr + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: $vgpr0 = COPY [[COPY]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0(s32) + %2:vgpr(s16) = G_FREEZE %1 + %3:vgpr(s32) = G_ANYEXT %2(s16) + $vgpr0 = COPY %3(s32) + +... + +--- +name: test_freeze_s32_vgpr_to_vgpr +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: test_freeze_s32_vgpr_to_vgpr + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $vgpr0 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_s32_vgpr_to_vgpr + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: $vgpr0 = COPY [[COPY]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_FREEZE %0 + $vgpr0 = COPY %1(s32) + +... + +--- +name: test_freeze_s32_sgpr_to_sgpr +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0 + ; GFX6-LABEL: name: test_freeze_s32_sgpr_to_sgpr + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: $sgpr0 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_s32_sgpr_to_sgpr + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10: $sgpr0 = COPY [[COPY]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_FREEZE %0 + $sgpr0 = COPY %1(s32) + +... + +--- +name: test_freeze_s32_sgpr_to_vgpr +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0 + ; GFX6-LABEL: name: test_freeze_s32_sgpr_to_vgpr + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: $vgpr0 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_s32_sgpr_to_vgpr + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10: $vgpr0 = COPY [[COPY]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_FREEZE %0 + $vgpr0 = COPY %1(s32) + +... + +--- +name: test_freeze_s32_vgpr_to_agpr +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: test_freeze_s32_vgpr_to_agpr + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $agpr0 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_s32_vgpr_to_agpr + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: $agpr0 = COPY [[COPY]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_FREEZE %0 + $agpr0 = COPY %1(s32) + +... + +--- +name: test_freeze_s32_sgpr_to_agpr +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0 + ; GFX6-LABEL: name: test_freeze_s32_sgpr_to_agpr + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: $agpr0 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_s32_sgpr_to_agpr + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10: $agpr0 = COPY [[COPY]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_FREEZE %0 + $agpr0 = COPY %1(s32) + +... + +--- +name: test_freeze_s32_agpr_to_vgpr +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $agpr0 + ; GFX6-LABEL: name: test_freeze_s32_agpr_to_vgpr + ; GFX6: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 + ; GFX6: $vgpr0 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_s32_agpr_to_vgpr + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 + ; GFX10: $vgpr0 = COPY [[COPY]] + %0:agpr(s32) = COPY $agpr0 + %1:agpr(s32) = G_FREEZE %0 + $vgpr0 = COPY %1(s32) + +... + +--- +name: test_freeze_s32_agpr_to_agpr +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $agpr0 + ; GFX6-LABEL: name: test_freeze_s32_agpr_to_agpr + ; GFX6: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 + ; GFX6: $agpr0 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_s32_agpr_to_agpr + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 + ; GFX10: $agpr0 = COPY [[COPY]] + %0:agpr(s32) = COPY $agpr0 + %1:agpr(s32) = G_FREEZE %0 + $agpr0 = COPY %1(s32) + +... + +--- +name: test_freeze_s64 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GFX6-LABEL: name: test_freeze_s64 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_s64 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: $vgpr0_vgpr1 = COPY [[COPY]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(s64) + +... + +--- +name: test_freeze_s128 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-LABEL: name: test_freeze_s128 + ; GFX6: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_s128 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + %0:vgpr(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:vgpr(s128) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(s128) + +... + +--- +name: test_freeze_256 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-LABEL: name: test_freeze_256 + ; GFX6: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_256 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] + %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(s256) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(s256) + +... + +--- +name: test_freeze_s512 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX6-LABEL: name: test_freeze_s512 + ; GFX6: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_s512 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] + %0:vgpr(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:vgpr(s512) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(s512) + +... + +--- +name: test_freeze_v2s32 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GFX6-LABEL: name: test_freeze_v2s32 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_v2s32 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: $vgpr0_vgpr1 = COPY [[COPY]] + %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s32>) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(<2 x s32>) + +... + +--- +name: test_freeze_v3s32 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + ; GFX6-LABEL: name: test_freeze_v3s32 + ; GFX6: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_v3s32 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] + %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:vgpr(<3 x s32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x s32>) + +... + +--- +name: test_freeze_v4s32 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-LABEL: name: test_freeze_v4s32 + ; GFX6: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_v4s32 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:vgpr(<4 x s32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x s32>) + +... + +--- +name: test_freeze_v5s32 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; GFX6-LABEL: name: test_freeze_v5s32 + ; GFX6: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_v5s32 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[COPY]] + %0:vgpr(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + %1:vgpr(<5 x s32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1(<5 x s32>) + +... + +--- +name: test_freeze_v8s32 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-LABEL: name: test_freeze_v8s32 + ; GFX6: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_v8s32 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] + %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(<8 x s32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x s32>) + +... + +--- +name: test_freeze_v16s32 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX6-LABEL: name: test_freeze_v16s32 + ; GFX6: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_v16s32 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] + %0:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:vgpr(<16 x s32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x s32>) + +... + +--- +name: test_freeze_v2s16 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: test_freeze_v2s16 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $vgpr0 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_v2s16 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: $vgpr0 = COPY [[COPY]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = G_FREEZE %0 + $vgpr0 = COPY %1(<2 x s16>) + +... + +--- +name: test_freeze_v4s16 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GFX6-LABEL: name: test_freeze_v4s16 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_v4s16 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: $vgpr0_vgpr1 = COPY [[COPY]] + %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x s16>) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(<4 x s16>) + +... + +--- +name: test_freeze_v6s16 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + ; GFX6-LABEL: name: test_freeze_v6s16 + ; GFX6: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_v6s16 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] + %0:vgpr(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:vgpr(<6 x s16>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x s16>) + +... + +--- +name: test_freeze_v8s16 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-LABEL: name: test_freeze_v8s16 + ; GFX6: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_v8s16 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + %0:vgpr(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:vgpr(<8 x s16>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x s16>) + +... + +--- +name: test_freeze_v2s64 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-LABEL: name: test_freeze_v2s64 + ; GFX6: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_v2s64 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + %0:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:vgpr(<2 x s64>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x s64>) + +... + +--- +name: test_freeze_p0 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GFX6-LABEL: name: test_freeze_p0 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_p0 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: $vgpr0_vgpr1 = COPY [[COPY]] + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(p0) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(p0) + +... + +--- +name: test_freeze_p1 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GFX6-LABEL: name: test_freeze_p1 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_p1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: $vgpr0_vgpr1 = COPY [[COPY]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p1) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(p1) + +... + +--- +name: test_freeze_p2 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: test_freeze_p2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $vgpr0 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_p2 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: $vgpr0 = COPY [[COPY]] + %0:vgpr(p2) = COPY $vgpr0 + %1:vgpr(p2) = G_FREEZE %0 + $vgpr0 = COPY %1(p2) + +... + +--- +name: test_freeze_p3 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: test_freeze_p3 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $vgpr0 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_p3 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: $vgpr0 = COPY [[COPY]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p3) = G_FREEZE %0 + $vgpr0 = COPY %1(p3) + +... + +--- +name: test_freeze_p4 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GFX6-LABEL: name: test_freeze_p4 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_p4 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: $vgpr0_vgpr1 = COPY [[COPY]] + %0:vgpr(p4) = COPY $vgpr0_vgpr1 + %1:vgpr(p4) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(p4) + +... + +--- +name: test_freeze_p5 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: test_freeze_p5 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $vgpr0 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_p5 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: $vgpr0 = COPY [[COPY]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(p5) = G_FREEZE %0 + $vgpr0 = COPY %1(p5) + +... + +--- +name: test_freeze_p999 +alignment: 1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GFX6-LABEL: name: test_freeze_p999 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-LABEL: name: test_freeze_p999 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: $vgpr0_vgpr1 = COPY [[COPY]] + %0:vgpr(p999) = COPY $vgpr0_vgpr1 + %1:vgpr(p999) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(p999) + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir index ad2f418980aaa..d31d8ac361ece 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir @@ -239,9 +239,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 @@ -254,9 +254,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 @@ -288,9 +288,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir index deb59ffa10c40..0d225dc7dab62 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir @@ -341,9 +341,9 @@ body: | ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -361,9 +361,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -381,9 +381,9 @@ body: | ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 @@ -396,9 +396,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -450,9 +450,9 @@ body: | ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir index b7283ecfade89..ee72309b16792 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir @@ -290,7 +290,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: $m0 = S_MOV_B32 -1 ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 %2, 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3) ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir index e0783e9b636d8..d45ef60f5a36c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -738,9 +738,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2047 @@ -753,9 +753,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2047 @@ -774,9 +774,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -808,9 +808,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2048 @@ -823,9 +823,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2048 @@ -844,9 +844,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -878,9 +878,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2047 @@ -893,9 +893,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2047 @@ -908,9 +908,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2047 @@ -924,9 +924,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -958,9 +958,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2048 @@ -973,9 +973,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2048 @@ -988,9 +988,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2048 @@ -1004,9 +1004,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1038,9 +1038,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4095 @@ -1053,9 +1053,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4095 @@ -1074,9 +1074,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1108,9 +1108,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4096 @@ -1123,9 +1123,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4096 @@ -1138,9 +1138,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4096 @@ -1154,9 +1154,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1188,9 +1188,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4095 @@ -1203,9 +1203,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4095 @@ -1218,9 +1218,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4095 @@ -1234,9 +1234,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1268,9 +1268,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4096 @@ -1283,9 +1283,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4096 @@ -1298,9 +1298,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4096 @@ -1314,9 +1314,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1348,9 +1348,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8191 @@ -1363,9 +1363,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8191 @@ -1378,9 +1378,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8191 @@ -1394,9 +1394,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1428,9 +1428,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8192 @@ -1443,9 +1443,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8192 @@ -1458,9 +1458,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8192 @@ -1474,9 +1474,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1508,9 +1508,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8191 @@ -1523,9 +1523,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8191 @@ -1538,9 +1538,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8191 @@ -1554,9 +1554,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1588,9 +1588,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8192 @@ -1603,9 +1603,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8192 @@ -1618,9 +1618,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8192 @@ -1634,9 +1634,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir index eb3de7b1e7658..a9c0560369548 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir @@ -867,9 +867,9 @@ body: | ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_2047 @@ -882,9 +882,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_2047 @@ -947,9 +947,9 @@ body: | ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_2048 @@ -962,9 +962,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_2048 @@ -983,9 +983,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1017,9 +1017,9 @@ body: | ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -1037,9 +1037,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -1057,9 +1057,9 @@ body: | ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047 @@ -1072,9 +1072,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047 @@ -1117,9 +1117,9 @@ body: | ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -1137,9 +1137,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -1157,9 +1157,9 @@ body: | ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048 @@ -1172,9 +1172,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048 @@ -1237,9 +1237,9 @@ body: | ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_4095 @@ -1252,9 +1252,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_4095 @@ -1273,9 +1273,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1329,9 +1329,9 @@ body: | ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_4096 @@ -1344,9 +1344,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_4096 @@ -1359,9 +1359,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_4096 @@ -1375,9 +1375,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1409,9 +1409,9 @@ body: | ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -1429,9 +1429,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -1449,9 +1449,9 @@ body: | ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095 @@ -1464,9 +1464,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095 @@ -1485,9 +1485,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1519,9 +1519,9 @@ body: | ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -1539,9 +1539,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -1559,9 +1559,9 @@ body: | ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096 @@ -1574,9 +1574,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096 @@ -1595,9 +1595,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1651,9 +1651,9 @@ body: | ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_8191 @@ -1666,9 +1666,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_8191 @@ -1681,9 +1681,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_8191 @@ -1697,9 +1697,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1753,9 +1753,9 @@ body: | ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_8192 @@ -1768,9 +1768,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_8192 @@ -1783,9 +1783,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_8192 @@ -1799,9 +1799,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1833,9 +1833,9 @@ body: | ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -1853,9 +1853,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -1873,9 +1873,9 @@ body: | ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191 @@ -1888,9 +1888,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191 @@ -1903,9 +1903,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191 @@ -1919,9 +1919,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 @@ -1953,9 +1953,9 @@ body: | ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -1973,9 +1973,9 @@ body: | ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -1993,9 +1993,9 @@ body: | ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192 @@ -2008,9 +2008,9 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192 @@ -2023,9 +2023,9 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192 @@ -2039,9 +2039,9 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir index 1382434fe0a74..a80ad208b5898 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir @@ -568,7 +568,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: $m0 = S_MOV_B32 -1 ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] @@ -640,7 +640,7 @@ body: | ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec - ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] @@ -655,7 +655,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: $m0 = S_MOV_B32 -1 ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] @@ -682,7 +682,7 @@ body: | ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] @@ -697,7 +697,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: $m0 = S_MOV_B32 -1 ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] @@ -762,7 +762,7 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec - ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 %2, 0, 1, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir index 2a93510f237ba..162dd01de66d1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir @@ -205,7 +205,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047 @@ -278,7 +278,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_2048 @@ -312,7 +312,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047 @@ -348,7 +348,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048 @@ -384,7 +384,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_4095 @@ -418,7 +418,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_4096 @@ -454,7 +454,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095 @@ -490,7 +490,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096 @@ -526,7 +526,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_8191 @@ -562,7 +562,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_8192 @@ -598,7 +598,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191 @@ -634,7 +634,7 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192 @@ -828,7 +828,7 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4096 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4096 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir index a33e4c3b313f4..12e75bb32d39c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir @@ -61,8 +61,8 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 %3, [[COPY2]], 0, implicit $exec + ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec ; GFX8: S_ENDPGM 0, implicit %4 ; GFX9-LABEL: name: add_s32_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -102,8 +102,8 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 %3, [[COPY2]], 0, implicit $exec + ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec ; GFX8: S_ENDPGM 0, implicit %4, implicit %3 ; GFX9-LABEL: name: add_s32_vgpr_vgpr_vgpr_multi_use ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -146,8 +146,8 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 %3, [[COPY2]], 0, implicit $exec + ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec ; GFX8: S_ENDPGM 0, implicit %4 ; GFX9-LABEL: name: add_p3_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -190,8 +190,8 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 %3, [[COPY2]], 0, implicit $exec + ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec ; GFX8: S_ENDPGM 0, implicit %4 ; GFX9-LABEL: name: add_p5_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -234,8 +234,8 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], %3, 0, implicit $exec + ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], %3, 0, implicit $exec ; GFX8: S_ENDPGM 0, implicit %4 ; GFX9-LABEL: name: add_p3_s32_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -278,8 +278,8 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], %3, 0, implicit $exec + ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], %3, 0, implicit $exec ; GFX8: S_ENDPGM 0, implicit %4 ; GFX9-LABEL: name: add_p5_s32_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir index f2a30dd5b0e64..98fdcac99d4aa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir @@ -91,9 +91,9 @@ body: | ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX8-LABEL: name: gep_p0_vgpr_vgpr ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -102,9 +102,9 @@ body: | ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX8: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: gep_p0_vgpr_vgpr ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -113,9 +113,9 @@ body: | ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX9: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE64-LABEL: name: gep_p0_vgpr_vgpr ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -124,9 +124,9 @@ body: | ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; GFX10-WAVE64: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-WAVE64: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE64: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE64: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE32-LABEL: name: gep_p0_vgpr_vgpr ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF @@ -136,9 +136,9 @@ body: | ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; GFX10-WAVE32: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-WAVE32: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE32: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE32: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -162,9 +162,9 @@ body: | ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX8-LABEL: name: gep_p0_sgpr_vgpr ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 @@ -173,9 +173,9 @@ body: | ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX8: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: gep_p0_sgpr_vgpr ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 @@ -184,9 +184,9 @@ body: | ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX9: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_vgpr ; GFX10-WAVE64: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 @@ -195,9 +195,9 @@ body: | ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; GFX10-WAVE64: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-WAVE64: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE64: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE64: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_vgpr ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF @@ -207,9 +207,9 @@ body: | ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; GFX10-WAVE32: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-WAVE32: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE32: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE32: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = COPY $vgpr0_vgpr1 @@ -270,12 +270,12 @@ body: | ; GFX6-LABEL: name: gep_p3_vgpr_vgpr ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit %2 ; GFX8-LABEL: name: gep_p3_vgpr_vgpr ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX8: S_ENDPGM 0, implicit %2 ; GFX9-LABEL: name: gep_p3_vgpr_vgpr ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -311,12 +311,12 @@ body: | ; GFX6-LABEL: name: gep_p3_sgpr_vgpr ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit %2 ; GFX8-LABEL: name: gep_p3_sgpr_vgpr ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX8: S_ENDPGM 0, implicit %2 ; GFX9-LABEL: name: gep_p3_sgpr_vgpr ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 @@ -509,9 +509,9 @@ body: | ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX8-LABEL: name: gep_p999_vgpr_vgpr ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -520,9 +520,9 @@ body: | ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX8: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: gep_p999_vgpr_vgpr ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -531,9 +531,9 @@ body: | ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX9: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE64-LABEL: name: gep_p999_vgpr_vgpr ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -542,9 +542,9 @@ body: | ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; GFX10-WAVE64: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-WAVE64: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE64: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE64: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE32-LABEL: name: gep_p999_vgpr_vgpr ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF @@ -554,9 +554,9 @@ body: | ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; GFX10-WAVE32: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-WAVE32: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE32: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE32: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p999) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir index 86026edb25725..f961ba3b65495 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -771,9 +771,9 @@ body: | ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) ; GFX8-LABEL: name: store_flat_s32_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 @@ -786,9 +786,9 @@ body: | ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) ; GFX9-LABEL: name: store_flat_s32_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -807,9 +807,9 @@ body: | ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir index 87ccd3de32e49..814a051cbc7d4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir @@ -904,9 +904,9 @@ body: | ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7-FLAT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX8-LABEL: name: store_global_s32_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 @@ -919,9 +919,9 @@ body: | ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX9-LABEL: name: store_global_s32_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir index 440c34f101633..f918818117363 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir @@ -631,7 +631,7 @@ body: | ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec - ; GFX7: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir index de09d91eb9984..5a1d8b8cda1f2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir @@ -21,9 +21,9 @@ body: | ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_SUB_I32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec - ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_SUB_I32_e64 [[S_SUB_I32_]], %7, 0, implicit $exec - ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_SUB_I32_e64 %8, [[COPY2]], 0, implicit $exec + ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec + ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_SUB_CO_U32_e64 [[S_SUB_I32_]], %7, 0, implicit $exec + ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_SUB_CO_U32_e64 %8, [[COPY2]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit %9 ; GFX9-LABEL: name: sub_s32 ; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir index ef395a7465be9..8c774a54577a9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir @@ -65,28 +65,28 @@ body: | ; GFX6-LABEL: name: uaddo_s32_s1_vvv ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_I32_e64_1]], implicit $exec - ; GFX6: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX8-LABEL: name: uaddo_s32_s1_vvv ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_I32_e64_1]], implicit $exec - ; GFX8: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX9-LABEL: name: uaddo_s32_s1_vvv ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_I32_e64_1]], implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX10-LABEL: name: uaddo_s32_s1_vvv ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_I32_e64_1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32), %3:vcc(s1) = G_UADDO %0, %1 @@ -106,36 +106,36 @@ body: | ; GFX6-LABEL: name: uaddo_s32_s1_vsv ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec - ; GFX6: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX8-LABEL: name: uaddo_s32_s1_vsv ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec - ; GFX8: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX9-LABEL: name: uaddo_s32_s1_vsv ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX10-LABEL: name: uaddo_s32_s1_vsv ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32), %3:vcc(s1) = G_UADDO %0, %1 @@ -157,36 +157,36 @@ body: | ; GFX6-LABEL: name: uaddo_s32_s1_vvs ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec - ; GFX6: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX8-LABEL: name: uaddo_s32_s1_vvs ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec - ; GFX8: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX9-LABEL: name: uaddo_s32_s1_vvs ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX10-LABEL: name: uaddo_s32_s1_vvs ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s32), %3:vcc(s1) = G_UADDO %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir index 3113c7d90cf0d..6112845f89e30 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir @@ -65,28 +65,28 @@ body: | ; GFX6-LABEL: name: usubo_s32_s1_vvv ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_I32_e64_1]], implicit $exec - ; GFX6: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX6: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX8-LABEL: name: usubo_s32_s1_vvv ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_I32_e64_1]], implicit $exec - ; GFX8: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX8: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX9-LABEL: name: usubo_s32_s1_vvv ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_I32_e64_1]], implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX9: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX10-LABEL: name: usubo_s32_s1_vvv ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_I32_e64_1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX10: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32), %3:vcc(s1) = G_USUBO %0, %1 @@ -106,36 +106,36 @@ body: | ; GFX6-LABEL: name: usubo_s32_s1_vsv ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec - ; GFX6: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX8-LABEL: name: usubo_s32_s1_vsv ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec - ; GFX8: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX9-LABEL: name: usubo_s32_s1_vsv ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX10-LABEL: name: usubo_s32_s1_vsv ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32), %3:vcc(s1) = G_USUBO %0, %1 @@ -157,36 +157,36 @@ body: | ; GFX6-LABEL: name: usubo_s32_s1_vvs ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec - ; GFX6: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX8-LABEL: name: usubo_s32_s1_vvs ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec - ; GFX8: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX9-LABEL: name: usubo_s32_s1_vvs ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX10-LABEL: name: usubo_s32_s1_vvs ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s32), %3:vcc(s1) = G_USUBO %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir index 42a3f0547343d..175144958cd99 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir @@ -507,3 +507,45 @@ body: | # %5:_(s64) = G_ANYEXT %4 # $vgpr0_vgpr1 = COPY %5 # ... + +--- +name: test_add_s96 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 + + ; GFX6-LABEL: name: test_add_s96 + ; GFX6: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) + ; GFX6: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV3]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV4]], [[UADDO1]] + ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]] + ; GFX6: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX8-LABEL: name: test_add_s96 + ; GFX8: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) + ; GFX8: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV3]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV4]], [[UADDO1]] + ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]] + ; GFX8: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX9-LABEL: name: test_add_s96 + ; GFX9: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) + ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV3]] + ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV4]], [[UADDO1]] + ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]] + ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(s96) = G_ADD %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir index f6456cd57f01e..8b9b0e972e6f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -4,7 +4,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- -name: test_fminnum_s32_ieee_mode_on +name: test_fmaxnum_s32_ieee_mode_on machineFunctionInfo: mode: ieee: true @@ -12,35 +12,35 @@ body: | bb.0: liveins: $vgpr0, $vgpr1 - ; SI-LABEL: name: test_fminnum_s32_ieee_mode_on + ; SI-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - ; VI-LABEL: name: test_fminnum_s32_ieee_mode_on + ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - ; GFX9-LABEL: name: test_fminnum_s32_ieee_mode_on + ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMINNUM %0, %1 + %2:_(s32) = G_FMAXNUM %0, %1 $vgpr0 = COPY %2 ... --- -name: test_fminnum_s32_ieee_mode_off +name: test_fmaxnum_s32_ieee_mode_off machineFunctionInfo: mode: ieee: false @@ -48,280 +48,280 @@ body: | bb.0: liveins: $vgpr0, $vgpr1 - ; SI-LABEL: name: test_fminnum_s32_ieee_mode_off + ; SI-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMINNUM]](s32) - ; VI-LABEL: name: test_fminnum_s32_ieee_mode_off + ; SI: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] + ; SI: $vgpr0 = COPY [[FMAXNUM]](s32) + ; VI-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMINNUM]](s32) - ; GFX9-LABEL: name: test_fminnum_s32_ieee_mode_off + ; VI: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] + ; VI: $vgpr0 = COPY [[FMAXNUM]](s32) + ; GFX9-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM]](s32) + ; GFX9: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] + ; GFX9: $vgpr0 = COPY [[FMAXNUM]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMINNUM %0, %1 + %2:_(s32) = G_FMAXNUM %0, %1 $vgpr0 = COPY %2 ... --- -name: test_fminnum_s32_nnan +name: test_fmaxnum_s32_nnan body: | bb.0: liveins: $vgpr0, $vgpr1 - ; SI-LABEL: name: test_fminnum_s32_nnan + ; SI-LABEL: name: test_fmaxnum_s32_nnan ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - ; VI-LABEL: name: test_fminnum_s32_nnan + ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-LABEL: name: test_fmaxnum_s32_nnan ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - ; GFX9-LABEL: name: test_fminnum_s32_nnan + ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-LABEL: name: test_fmaxnum_s32_nnan ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 - %2:_(s32) = nnan G_FMINNUM %0, %1 + %2:_(s32) = nnan G_FMAXNUM %0, %1 $vgpr0 = COPY %2 ... --- -name: test_fminnum_s32_nnan_lhs +name: test_fmaxnum_s32_nnan_lhs body: | bb.0: liveins: $vgpr0, $vgpr1 - ; SI-LABEL: name: test_fminnum_s32_nnan_lhs + ; SI-LABEL: name: test_fmaxnum_s32_nnan_lhs ; SI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - ; VI-LABEL: name: test_fminnum_s32_nnan_lhs + ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] + ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-LABEL: name: test_fmaxnum_s32_nnan_lhs ; VI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - ; GFX9-LABEL: name: test_fminnum_s32_nnan_lhs + ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] + ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_lhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] + ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = nnan COPY $vgpr0 %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_FMINNUM %0, %1 + %2:_(s32) = G_FMAXNUM %0, %1 $vgpr0 = COPY %2 ... --- -name: test_fminnum_s32_nnan_rhs +name: test_fmaxnum_s32_nnan_rhs body: | bb.0: liveins: $vgpr0, $vgpr1 - ; SI-LABEL: name: test_fminnum_s32_nnan_rhs + ; SI-LABEL: name: test_fmaxnum_s32_nnan_rhs ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - ; VI-LABEL: name: test_fminnum_s32_nnan_rhs + ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-LABEL: name: test_fmaxnum_s32_nnan_rhs ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - ; GFX9-LABEL: name: test_fminnum_s32_nnan_rhs + ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = nnan COPY $vgpr1 - %2:_(s32) = G_FMINNUM %0, %1 + %2:_(s32) = G_FMAXNUM %0, %1 $vgpr0 = COPY %2 ... --- -name: test_fminnum_s32_nnan_lhs_rhs +name: test_fmaxnum_s32_nnan_lhs_rhs body: | bb.0: liveins: $vgpr0, $vgpr1 - ; SI-LABEL: name: test_fminnum_s32_nnan_lhs_rhs + ; SI-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; SI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - ; VI-LABEL: name: test_fminnum_s32_nnan_lhs_rhs + ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; VI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) - ; GFX9-LABEL: name: test_fminnum_s32_nnan_lhs_rhs + ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = nnan COPY $vgpr0 %1:_(s32) = nnan COPY $vgpr1 - %2:_(s32) = G_FMINNUM %0, %1 + %2:_(s32) = G_FMAXNUM %0, %1 $vgpr0 = COPY %2 ... --- -name: test_fminnum_s64 +name: test_fmaxnum_s64 body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; SI-LABEL: name: test_fminnum_s64 + ; SI-LABEL: name: test_fmaxnum_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) - ; VI-LABEL: name: test_fminnum_s64 + ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) + ; VI-LABEL: name: test_fmaxnum_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) - ; GFX9-LABEL: name: test_fminnum_s64 + ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) + ; GFX9-LABEL: name: test_fmaxnum_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) + ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = G_FMINNUM %0, %1 + %2:_(s64) = G_FMAXNUM %0, %1 $vgpr0_vgpr1 = COPY %2 ... --- -name: test_fminnum_s16 +name: test_fmaxnum_s16 body: | bb.0: liveins: $vgpr0, $vgpr1 - ; SI-LABEL: name: test_fminnum_s16 + ; SI-LABEL: name: test_fmaxnum_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) + ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI: $vgpr0 = COPY [[ANYEXT]](s32) - ; VI-LABEL: name: test_fminnum_s16 + ; VI-LABEL: name: test_fmaxnum_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) + ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-LABEL: name: test_fminnum_s16 + ; GFX9-LABEL: name: test_fmaxnum_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) + ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 %3:_(s16) = G_TRUNC %1 - %4:_(s16) = G_FMINNUM %2, %3 + %4:_(s16) = G_FMAXNUM %2, %3 %5:_(s32) = G_ANYEXT %4 $vgpr0 = COPY %5 ... --- -name: test_fminnum_v2s32 +name: test_fmaxnum_v2s32 body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; SI-LABEL: name: test_fminnum_v2s32 + ; SI-LABEL: name: test_fmaxnum_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) + ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; VI-LABEL: name: test_fminnum_v2s32 + ; VI-LABEL: name: test_fmaxnum_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) + ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX9-LABEL: name: test_fminnum_v2s32 + ; GFX9-LABEL: name: test_fmaxnum_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) + ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = G_FMINNUM %0, %1 + %2:_(<2 x s32>) = G_FMAXNUM %0, %1 $vgpr0_vgpr1 = COPY %2 ... --- -name: test_fminnum_v2s16 +name: test_fmaxnum_v2s16 body: | bb.0: liveins: $vgpr0, $vgpr1 - ; SI-LABEL: name: test_fminnum_v2s16 + ; SI-LABEL: name: test_fmaxnum_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) @@ -335,19 +335,19 @@ body: | ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) + ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) + ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; VI-LABEL: name: test_fminnum_v2s16 + ; VI-LABEL: name: test_fmaxnum_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) @@ -361,36 +361,36 @@ body: | ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; GFX9-LABEL: name: test_fminnum_v2s16 + ; GFX9-LABEL: name: test_fmaxnum_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) + ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_FMINNUM %0, %1 + %2:_(<2 x s16>) = G_FMAXNUM %0, %1 $vgpr0 = COPY %2 ... --- -name: test_fminnum_v3s16 +name: test_fmaxnum_v3s16 body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; SI-LABEL: name: test_fminnum_v3s16 + ; SI-LABEL: name: test_fmaxnum_v3s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 @@ -417,16 +417,16 @@ body: | ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) + ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) + ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) + ; SI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) @@ -441,7 +441,7 @@ body: | ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) - ; VI-LABEL: name: test_fminnum_v3s16 + ; VI-LABEL: name: test_fmaxnum_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 @@ -468,19 +468,19 @@ body: | ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] - ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] @@ -489,7 +489,7 @@ body: | ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) - ; GFX9-LABEL: name: test_fminnum_v3s16 + ; GFX9-LABEL: name: test_fmaxnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 @@ -501,11 +501,11 @@ body: | ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMINNUM_IEEE]](<2 x s16>), [[FMINNUM_IEEE1]](<2 x s16>) + ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMAXNUM_IEEE]](<2 x s16>), [[FMAXNUM_IEEE1]](<2 x s16>) ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT2]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) @@ -513,19 +513,19 @@ body: | %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %0, 0 %3:_(<3 x s16>) = G_EXTRACT %1, 0 - %4:_(<3 x s16>) = G_FMINNUM %2, %3 + %4:_(<3 x s16>) = G_FMAXNUM %2, %3 %5:_(<4 x s16>) = G_IMPLICIT_DEF %6:_(<4 x s16>) = G_INSERT %5, %4, 0 $vgpr0_vgpr1 = COPY %6 ... --- -name: test_fminnum_v4s16 +name: test_fmaxnum_v4s16 body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; SI-LABEL: name: test_fminnum_v4s16 + ; SI-LABEL: name: test_fmaxnum_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -549,20 +549,20 @@ body: | ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) + ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) + ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) + ; SI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT6]], [[FPEXT7]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE3]](s32) + ; SI: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT6]], [[FPEXT7]] + ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE3]](s32) ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) @@ -575,7 +575,7 @@ body: | ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; VI-LABEL: name: test_fminnum_v4s16 + ; VI-LABEL: name: test_fmaxnum_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -599,43 +599,43 @@ body: | ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] - ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; VI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] - ; VI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE3]](s16) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE3]](s16) ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-LABEL: name: test_fminnum_v4s16 + ; GFX9-LABEL: name: test_fmaxnum_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMINNUM_IEEE]](<2 x s16>), [[FMINNUM_IEEE1]](<2 x s16>) + ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMAXNUM_IEEE]](<2 x s16>), [[FMAXNUM_IEEE1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - %2:_(<4 x s16>) = G_FMINNUM %0, %1 + %2:_(<4 x s16>) = G_FMAXNUM %0, %1 $vgpr0_vgpr1 = COPY %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir index a1499050f831c..63db6ec0d0b34 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir @@ -561,3 +561,93 @@ body: | # %5:_(s64) = G_ANYEXT %4 # $vgpr0_vgpr1 = COPY %5 # ... + +--- +name: test_mul_s96 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 + + ; GFX6-LABEL: name: test_mul_s96 + ; GFX6: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) + ; GFX6: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]] + ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]] + ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]] + ; GFX6: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX8-LABEL: name: test_mul_s96 + ; GFX8: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) + ; GFX8: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]] + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]] + ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]] + ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]] + ; GFX8: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX9-LABEL: name: test_mul_s96 + ; GFX9: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) + ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] + ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]] + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]] + ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]] + ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]] + ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(s96) = G_MUL %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir new file mode 100644 index 0000000000000..8b3fbdaa73eba --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -0,0 +1,528 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s + +--- +name: saddsat_s7 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: saddsat_s7 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX6: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SADDSAT]](s16) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s7) = G_TRUNC [[TRUNC2]](s16) + ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s7) + ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX8-LABEL: name: saddsat_s7 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX8: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[ASHR]](s16) + ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX9-LABEL: name: saddsat_s7 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[ASHR]](s16) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s7) = G_TRUNC %0 + %3:_(s7) = G_TRUNC %1 + %4:_(s7) = G_SADDSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: saddsat_s8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: saddsat_s8 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX6: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SADDSAT]](s16) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) + ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s8) + ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX8-LABEL: name: saddsat_s8 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX8: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16) + ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8) + ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX9-LABEL: name: saddsat_s8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8) + ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s8) = G_TRUNC %0 + %3:_(s8) = G_TRUNC %1 + %4:_(s8) = G_SADDSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: saddsat_v2s8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: saddsat_v2s8 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX6: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX6: [[SADDSAT:%[0-9]+]]:_(s8) = G_SADDSAT [[UV]], [[UV2]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX6: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SADDSAT1]](s16) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[SADDSAT]](s8), [[TRUNC3]](s8) + ; GFX6: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX8-LABEL: name: saddsat_v2s8 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX8: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX8: [[SADDSAT:%[0-9]+]]:_(s8) = G_SADDSAT [[UV]], [[UV2]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX8: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT1]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16) + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[SADDSAT]](s8), [[TRUNC2]](s8) + ; GFX8: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX9-LABEL: name: saddsat_v2s8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX9: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX9: [[SADDSAT:%[0-9]+]]:_(s8) = G_SADDSAT [[UV]], [[UV2]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX9: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT1]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[SADDSAT]](s8), [[TRUNC2]](s8) + ; GFX9: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(<2 x s8>) = G_BITCAST %2 + %5:_(<2 x s8>) = G_BITCAST %3 + %6:_(<2 x s8>) = G_SADDSAT %4, %5 + %7:_(s16) = G_BITCAST %6 + %8:_(s32) = G_ANYEXT %7 + $vgpr0 = COPY %8 +... + +--- +name: saddsat_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: saddsat_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: saddsat_s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: saddsat_s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_SADDSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: saddsat_v2s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: saddsat_v2s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX6: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV]], [[UV2]] + ; GFX6: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV1]], [[UV3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16) + ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8-LABEL: name: saddsat_v2s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX8: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV]], [[UV2]] + ; GFX8: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV1]], [[UV3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16) + ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-LABEL: name: saddsat_v2s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV]], [[UV2]] + ; GFX9: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = G_SADDSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: saddsat_v3s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; GFX6-LABEL: name: saddsat_v3s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX6: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX6: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV2]], [[UV5]] + ; GFX6: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV3]], [[UV6]] + ; GFX6: [[SADDSAT2:%[0-9]+]]:_(s16) = G_SADDSAT [[UV4]], [[UV7]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16), [[SADDSAT2]](s16) + ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-LABEL: name: saddsat_v3s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX8: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX8: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV2]], [[UV5]] + ; GFX8: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV3]], [[UV6]] + ; GFX8: [[SADDSAT2:%[0-9]+]]:_(s16) = G_SADDSAT [[UV4]], [[UV7]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16), [[SADDSAT2]](s16) + ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-LABEL: name: saddsat_v3s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX9: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV2]], [[UV5]] + ; GFX9: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV3]], [[UV6]] + ; GFX9: [[SADDSAT2:%[0-9]+]]:_(s16) = G_SADDSAT [[UV4]], [[UV7]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16), [[SADDSAT2]](s16) + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 + %3:_(<3 x s16>) = G_SADDSAT %1, %2 + %4:_(<3 x s16>) = G_IMPLICIT_DEF + %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4 + $vgpr0_vgpr1_vgpr2 = COPY %5 +... + +--- +name: saddsat_v4s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: saddsat_v4s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV]], [[UV4]] + ; GFX6: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV1]], [[UV5]] + ; GFX6: [[SADDSAT2:%[0-9]+]]:_(s16) = G_SADDSAT [[UV2]], [[UV6]] + ; GFX6: [[SADDSAT3:%[0-9]+]]:_(s16) = G_SADDSAT [[UV3]], [[UV7]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16), [[SADDSAT2]](s16), [[SADDSAT3]](s16) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX8-LABEL: name: saddsat_v4s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV]], [[UV4]] + ; GFX8: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV1]], [[UV5]] + ; GFX8: [[SADDSAT2:%[0-9]+]]:_(s16) = G_SADDSAT [[UV2]], [[UV6]] + ; GFX8: [[SADDSAT3:%[0-9]+]]:_(s16) = G_SADDSAT [[UV3]], [[UV7]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16), [[SADDSAT2]](s16), [[SADDSAT3]](s16) + ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9-LABEL: name: saddsat_v4s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV]], [[UV4]] + ; GFX9: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV1]], [[UV5]] + ; GFX9: [[SADDSAT2:%[0-9]+]]:_(s16) = G_SADDSAT [[UV2]], [[UV6]] + ; GFX9: [[SADDSAT3:%[0-9]+]]:_(s16) = G_SADDSAT [[UV3]], [[UV7]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16), [[SADDSAT2]](s16), [[SADDSAT3]](s16) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x s16>) = G_SADDSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: saddsat_s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: saddsat_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] + ; GFX6: $vgpr0 = COPY [[SADDSAT]](s32) + ; GFX8-LABEL: name: saddsat_s32 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] + ; GFX8: $vgpr0 = COPY [[SADDSAT]](s32) + ; GFX9-LABEL: name: saddsat_s32 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] + ; GFX9: $vgpr0 = COPY [[SADDSAT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_SADDSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: saddsat_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: saddsat_v2s32 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[UV]], [[UV2]] + ; GFX6: [[SADDSAT1:%[0-9]+]]:_(s32) = G_SADDSAT [[UV1]], [[UV3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SADDSAT]](s32), [[SADDSAT1]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-LABEL: name: saddsat_v2s32 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[UV]], [[UV2]] + ; GFX8: [[SADDSAT1:%[0-9]+]]:_(s32) = G_SADDSAT [[UV1]], [[UV3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SADDSAT]](s32), [[SADDSAT1]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-LABEL: name: saddsat_v2s32 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[UV]], [[UV2]] + ; GFX9: [[SADDSAT1:%[0-9]+]]:_(s32) = G_SADDSAT [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SADDSAT]](s32), [[SADDSAT1]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = G_SADDSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: saddsat_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: saddsat_s64 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[COPY]], [[COPY1]] + ; GFX6: $vgpr0_vgpr1 = COPY [[SADDSAT]](s64) + ; GFX8-LABEL: name: saddsat_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[COPY]], [[COPY1]] + ; GFX8: $vgpr0_vgpr1 = COPY [[SADDSAT]](s64) + ; GFX9-LABEL: name: saddsat_s64 + ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[COPY]], [[COPY1]] + ; GFX9: $vgpr0_vgpr1 = COPY [[SADDSAT]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_SADDSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: saddsat_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + + ; GFX6-LABEL: name: saddsat_v2s64 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[UV]], [[UV2]] + ; GFX6: [[SADDSAT1:%[0-9]+]]:_(s64) = G_SADDSAT [[UV1]], [[UV3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SADDSAT]](s64), [[SADDSAT1]](s64) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-LABEL: name: saddsat_v2s64 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[UV]], [[UV2]] + ; GFX8: [[SADDSAT1:%[0-9]+]]:_(s64) = G_SADDSAT [[UV1]], [[UV3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SADDSAT]](s64), [[SADDSAT1]](s64) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-LABEL: name: saddsat_v2s64 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[UV]], [[UV2]] + ; GFX9: [[SADDSAT1:%[0-9]+]]:_(s64) = G_SADDSAT [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SADDSAT]](s64), [[SADDSAT1]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x s64>) = G_SADDSAT %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir index 276029cd1fdd3..53a90c318be8f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir @@ -535,3 +535,262 @@ body: | %2:_(s32) = G_SITOFP %1 $vgpr0 = COPY %2 ... + +--- +name: test_sitofp_s64_to_s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: test_sitofp_s64_to_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64) + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]] + ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]] + ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]] + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C6]](s32) + ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]] + ; GFX6: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]] + ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX6: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[ADD]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[ADD]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[SELECT3]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: test_sitofp_s64_to_s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64) + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]] + ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]] + ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]] + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C6]](s32) + ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]] + ; GFX8: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]] + ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX8: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[ADD]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[ADD]] + ; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[SELECT3]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s16) = G_SITOFP %0 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_sitofp_v2s64_to_v2s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; GFX6-LABEL: name: test_sitofp_v2s64_to_v2s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV2]], [[UV4]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV5]], [[UADDO1]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64) + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]] + ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]] + ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]] + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C6]](s32) + ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]] + ; GFX6: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]] + ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX6: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[ADD]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[ADD]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[SELECT3]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) + ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV6]], [[UV8]] + ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV7]], [[UV9]], [[UADDO3]] + ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX6: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR1]](s64) + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF1]] + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR1]](s64), [[C2]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[SUB1]], [[C1]] + ; GFX6: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[XOR1]], [[CTLZ_ZERO_UNDEF1]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL2]], [[C4]] + ; GFX6: [[AND4:%[0-9]+]]:_(s64) = G_AND [[AND3]], [[C5]] + ; GFX6: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND3]], [[C6]](s32) + ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SELECT4]], [[C7]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64) + ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[TRUNC1]] + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND4]](s64), [[C8]] + ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND4]](s64), [[C8]] + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[AND5]], [[C1]] + ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[C9]], [[SELECT5]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[OR1]], [[SELECT6]] + ; GFX6: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[ADD1]] + ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR1]](s64), [[C2]] + ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[FNEG1]], [[ADD1]] + ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[SELECT7]](s32) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX6: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; GFX8-LABEL: name: test_sitofp_v2s64_to_v2s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV2]], [[UV4]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV5]], [[UADDO1]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64) + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]] + ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]] + ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]] + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C6]](s32) + ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]] + ; GFX8: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]] + ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX8: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[ADD]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[ADD]] + ; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[SELECT3]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) + ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV6]], [[UV8]] + ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV7]], [[UV9]], [[UADDO3]] + ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX8: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX8: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR1]](s64) + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF1]] + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR1]](s64), [[C2]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[SUB1]], [[C1]] + ; GFX8: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[XOR1]], [[CTLZ_ZERO_UNDEF1]](s32) + ; GFX8: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL2]], [[C4]] + ; GFX8: [[AND4:%[0-9]+]]:_(s64) = G_AND [[AND3]], [[C5]] + ; GFX8: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND3]], [[C6]](s32) + ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SELECT4]], [[C7]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64) + ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[TRUNC1]] + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND4]](s64), [[C8]] + ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND4]](s64), [[C8]] + ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[AND5]], [[C1]] + ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[C9]], [[SELECT5]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[OR1]], [[SELECT6]] + ; GFX8: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[ADD1]] + ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR1]](s64), [[C2]] + ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[FNEG1]], [[ADD1]] + ; GFX8: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[SELECT7]](s32) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX8: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s16>) = G_SITOFP %0 + $vgpr0 = COPY %1 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir new file mode 100644 index 0000000000000..31f119c13e5e1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -0,0 +1,528 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s + +--- +name: ssubsat_s7 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: ssubsat_s7 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SSUBSAT]](s16) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s7) = G_TRUNC [[TRUNC2]](s16) + ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s7) + ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX8-LABEL: name: ssubsat_s7 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[ASHR]](s16) + ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX9-LABEL: name: ssubsat_s7 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[ASHR]](s16) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s7) = G_TRUNC %0 + %3:_(s7) = G_TRUNC %1 + %4:_(s7) = G_SSUBSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: ssubsat_s8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: ssubsat_s8 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SSUBSAT]](s16) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) + ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s8) + ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX8-LABEL: name: ssubsat_s8 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16) + ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8) + ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX9-LABEL: name: ssubsat_s8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8) + ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s8) = G_TRUNC %0 + %3:_(s8) = G_TRUNC %1 + %4:_(s8) = G_SSUBSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: ssubsat_v2s8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: ssubsat_v2s8 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX6: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s8) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX6: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SSUBSAT1]](s16) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[SSUBSAT]](s8), [[TRUNC3]](s8) + ; GFX6: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX8-LABEL: name: ssubsat_v2s8 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX8: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s8) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX8: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT1]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16) + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[SSUBSAT]](s8), [[TRUNC2]](s8) + ; GFX8: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX9-LABEL: name: ssubsat_v2s8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX9: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s8) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT1]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[SSUBSAT]](s8), [[TRUNC2]](s8) + ; GFX9: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(<2 x s8>) = G_BITCAST %2 + %5:_(<2 x s8>) = G_BITCAST %3 + %6:_(<2 x s8>) = G_SSUBSAT %4, %5 + %7:_(s16) = G_BITCAST %6 + %8:_(s32) = G_ANYEXT %7 + $vgpr0 = COPY %8 +... + +--- +name: ssubsat_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: ssubsat_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: ssubsat_s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: ssubsat_s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_SSUBSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: ssubsat_v2s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: ssubsat_v2s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX6: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV1]], [[UV3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16) + ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8-LABEL: name: ssubsat_v2s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX8: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV1]], [[UV3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16) + ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-LABEL: name: ssubsat_v2s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = G_SSUBSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: ssubsat_v3s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; GFX6-LABEL: name: ssubsat_v3s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX6: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV2]], [[UV5]] + ; GFX6: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV3]], [[UV6]] + ; GFX6: [[SSUBSAT2:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV4]], [[UV7]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16), [[SSUBSAT2]](s16) + ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-LABEL: name: ssubsat_v3s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX8: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV2]], [[UV5]] + ; GFX8: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV3]], [[UV6]] + ; GFX8: [[SSUBSAT2:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV4]], [[UV7]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16), [[SSUBSAT2]](s16) + ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-LABEL: name: ssubsat_v3s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX9: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV2]], [[UV5]] + ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV3]], [[UV6]] + ; GFX9: [[SSUBSAT2:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV4]], [[UV7]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16), [[SSUBSAT2]](s16) + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 + %3:_(<3 x s16>) = G_SSUBSAT %1, %2 + %4:_(<3 x s16>) = G_IMPLICIT_DEF + %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4 + $vgpr0_vgpr1_vgpr2 = COPY %5 +... + +--- +name: ssubsat_v4s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: ssubsat_v4s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV]], [[UV4]] + ; GFX6: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV1]], [[UV5]] + ; GFX6: [[SSUBSAT2:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV2]], [[UV6]] + ; GFX6: [[SSUBSAT3:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV3]], [[UV7]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16), [[SSUBSAT2]](s16), [[SSUBSAT3]](s16) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX8-LABEL: name: ssubsat_v4s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV]], [[UV4]] + ; GFX8: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV1]], [[UV5]] + ; GFX8: [[SSUBSAT2:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV2]], [[UV6]] + ; GFX8: [[SSUBSAT3:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV3]], [[UV7]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16), [[SSUBSAT2]](s16), [[SSUBSAT3]](s16) + ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9-LABEL: name: ssubsat_v4s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV]], [[UV4]] + ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV1]], [[UV5]] + ; GFX9: [[SSUBSAT2:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV2]], [[UV6]] + ; GFX9: [[SSUBSAT3:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV3]], [[UV7]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16), [[SSUBSAT2]](s16), [[SSUBSAT3]](s16) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x s16>) = G_SSUBSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: ssubsat_s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: ssubsat_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] + ; GFX6: $vgpr0 = COPY [[SSUBSAT]](s32) + ; GFX8-LABEL: name: ssubsat_s32 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] + ; GFX8: $vgpr0 = COPY [[SSUBSAT]](s32) + ; GFX9-LABEL: name: ssubsat_s32 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] + ; GFX9: $vgpr0 = COPY [[SSUBSAT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_SSUBSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: ssubsat_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: ssubsat_v2s32 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX6: [[SSUBSAT1:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV1]], [[UV3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SSUBSAT]](s32), [[SSUBSAT1]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-LABEL: name: ssubsat_v2s32 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX8: [[SSUBSAT1:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV1]], [[UV3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SSUBSAT]](s32), [[SSUBSAT1]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-LABEL: name: ssubsat_v2s32 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SSUBSAT]](s32), [[SSUBSAT1]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = G_SSUBSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: ssubsat_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: ssubsat_s64 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[COPY]], [[COPY1]] + ; GFX6: $vgpr0_vgpr1 = COPY [[SSUBSAT]](s64) + ; GFX8-LABEL: name: ssubsat_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[COPY]], [[COPY1]] + ; GFX8: $vgpr0_vgpr1 = COPY [[SSUBSAT]](s64) + ; GFX9-LABEL: name: ssubsat_s64 + ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[COPY]], [[COPY1]] + ; GFX9: $vgpr0_vgpr1 = COPY [[SSUBSAT]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_SSUBSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: ssubsat_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + + ; GFX6-LABEL: name: ssubsat_v2s64 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX6: [[SSUBSAT1:%[0-9]+]]:_(s64) = G_SSUBSAT [[UV1]], [[UV3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SSUBSAT]](s64), [[SSUBSAT1]](s64) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-LABEL: name: ssubsat_v2s64 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX8: [[SSUBSAT1:%[0-9]+]]:_(s64) = G_SSUBSAT [[UV1]], [[UV3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SSUBSAT]](s64), [[SSUBSAT1]](s64) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-LABEL: name: ssubsat_v2s64 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(s64) = G_SSUBSAT [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SSUBSAT]](s64), [[SSUBSAT1]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x s64>) = G_SSUBSAT %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir index 8cb346a761882..3fb34a4edc923 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir @@ -501,3 +501,45 @@ body: | # %5:_(s64) = G_ANYEXT %4 # $vgpr0_vgpr1 = COPY %5 # ... + +--- +name: test_sub_s96 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 + + ; GFX6-LABEL: name: test_sub_s96 + ; GFX6: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) + ; GFX6: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV3]] + ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV4]], [[USUBO1]] + ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV2]], [[UV5]], [[USUBE1]] + ; GFX6: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32), [[USUBE2]](s32) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX8-LABEL: name: test_sub_s96 + ; GFX8: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) + ; GFX8: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV3]] + ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV4]], [[USUBO1]] + ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV2]], [[UV5]], [[USUBE1]] + ; GFX8: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32), [[USUBE2]](s32) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX9-LABEL: name: test_sub_s96 + ; GFX9: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) + ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV3]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV4]], [[USUBO1]] + ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV2]], [[UV5]], [[USUBE1]] + ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32), [[USUBE2]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(s96) = G_SUB %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir new file mode 100644 index 0000000000000..e080bde81b3a5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -0,0 +1,528 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s + +--- +name: uaddsat_s7 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: uaddsat_s7 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX6: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s7) = G_TRUNC [[TRUNC2]](s16) + ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s7) + ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX8-LABEL: name: uaddsat_s7 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[LSHR]](s16) + ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX9-LABEL: name: uaddsat_s7 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[LSHR]](s16) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s7) = G_TRUNC %0 + %3:_(s7) = G_TRUNC %1 + %4:_(s7) = G_UADDSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: uaddsat_s8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: uaddsat_s8 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX6: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) + ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s8) + ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX8-LABEL: name: uaddsat_s8 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16) + ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8) + ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX9-LABEL: name: uaddsat_s8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8) + ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s8) = G_TRUNC %0 + %3:_(s8) = G_TRUNC %1 + %4:_(s8) = G_UADDSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: uaddsat_v2s8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: uaddsat_v2s8 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX6: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX6: [[UADDSAT:%[0-9]+]]:_(s8) = G_UADDSAT [[UV]], [[UV2]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX6: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[UADDSAT]](s8), [[TRUNC3]](s8) + ; GFX6: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX8-LABEL: name: uaddsat_v2s8 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX8: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX8: [[UADDSAT:%[0-9]+]]:_(s8) = G_UADDSAT [[UV]], [[UV2]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT1]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16) + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[UADDSAT]](s8), [[TRUNC2]](s8) + ; GFX8: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX9-LABEL: name: uaddsat_v2s8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX9: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX9: [[UADDSAT:%[0-9]+]]:_(s8) = G_UADDSAT [[UV]], [[UV2]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX9: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT1]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[UADDSAT]](s8), [[TRUNC2]](s8) + ; GFX9: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(<2 x s8>) = G_BITCAST %2 + %5:_(<2 x s8>) = G_BITCAST %3 + %6:_(<2 x s8>) = G_UADDSAT %4, %5 + %7:_(s16) = G_BITCAST %6 + %8:_(s32) = G_ANYEXT %7 + $vgpr0 = COPY %8 +... + +--- +name: uaddsat_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: uaddsat_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: uaddsat_s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: uaddsat_s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_UADDSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: uaddsat_v2s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: uaddsat_v2s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX6: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV]], [[UV2]] + ; GFX6: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16) + ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8-LABEL: name: uaddsat_v2s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV]], [[UV2]] + ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16) + ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-LABEL: name: uaddsat_v2s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV]], [[UV2]] + ; GFX9: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = G_UADDSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: uaddsat_v3s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; GFX6-LABEL: name: uaddsat_v3s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX6: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX6: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV2]], [[UV5]] + ; GFX6: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV3]], [[UV6]] + ; GFX6: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[UV4]], [[UV7]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16), [[UADDSAT2]](s16) + ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-LABEL: name: uaddsat_v3s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX8: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV2]], [[UV5]] + ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV3]], [[UV6]] + ; GFX8: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[UV4]], [[UV7]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16), [[UADDSAT2]](s16) + ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-LABEL: name: uaddsat_v3s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX9: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV2]], [[UV5]] + ; GFX9: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV3]], [[UV6]] + ; GFX9: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[UV4]], [[UV7]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16), [[UADDSAT2]](s16) + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 + %3:_(<3 x s16>) = G_UADDSAT %1, %2 + %4:_(<3 x s16>) = G_IMPLICIT_DEF + %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4 + $vgpr0_vgpr1_vgpr2 = COPY %5 +... + +--- +name: uaddsat_v4s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: uaddsat_v4s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV]], [[UV4]] + ; GFX6: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV1]], [[UV5]] + ; GFX6: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[UV2]], [[UV6]] + ; GFX6: [[UADDSAT3:%[0-9]+]]:_(s16) = G_UADDSAT [[UV3]], [[UV7]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16), [[UADDSAT2]](s16), [[UADDSAT3]](s16) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX8-LABEL: name: uaddsat_v4s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV]], [[UV4]] + ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV1]], [[UV5]] + ; GFX8: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[UV2]], [[UV6]] + ; GFX8: [[UADDSAT3:%[0-9]+]]:_(s16) = G_UADDSAT [[UV3]], [[UV7]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16), [[UADDSAT2]](s16), [[UADDSAT3]](s16) + ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9-LABEL: name: uaddsat_v4s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV]], [[UV4]] + ; GFX9: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV1]], [[UV5]] + ; GFX9: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[UV2]], [[UV6]] + ; GFX9: [[UADDSAT3:%[0-9]+]]:_(s16) = G_UADDSAT [[UV3]], [[UV7]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16), [[UADDSAT2]](s16), [[UADDSAT3]](s16) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x s16>) = G_UADDSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: uaddsat_s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: uaddsat_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] + ; GFX6: $vgpr0 = COPY [[UADDSAT]](s32) + ; GFX8-LABEL: name: uaddsat_s32 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] + ; GFX8: $vgpr0 = COPY [[UADDSAT]](s32) + ; GFX9-LABEL: name: uaddsat_s32 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] + ; GFX9: $vgpr0 = COPY [[UADDSAT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_UADDSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: uaddsat_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: uaddsat_v2s32 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]] + ; GFX6: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-LABEL: name: uaddsat_v2s32 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]] + ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-LABEL: name: uaddsat_v2s32 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]] + ; GFX9: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = G_UADDSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: uaddsat_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: uaddsat_s64 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[COPY]], [[COPY1]] + ; GFX6: $vgpr0_vgpr1 = COPY [[UADDSAT]](s64) + ; GFX8-LABEL: name: uaddsat_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[COPY]], [[COPY1]] + ; GFX8: $vgpr0_vgpr1 = COPY [[UADDSAT]](s64) + ; GFX9-LABEL: name: uaddsat_s64 + ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[COPY]], [[COPY1]] + ; GFX9: $vgpr0_vgpr1 = COPY [[UADDSAT]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_UADDSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: uaddsat_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + + ; GFX6-LABEL: name: uaddsat_v2s64 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[UV]], [[UV2]] + ; GFX6: [[UADDSAT1:%[0-9]+]]:_(s64) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UADDSAT]](s64), [[UADDSAT1]](s64) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-LABEL: name: uaddsat_v2s64 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[UV]], [[UV2]] + ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s64) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UADDSAT]](s64), [[UADDSAT1]](s64) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-LABEL: name: uaddsat_v2s64 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[UV]], [[UV2]] + ; GFX9: [[UADDSAT1:%[0-9]+]]:_(s64) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UADDSAT]](s64), [[UADDSAT1]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x s64>) = G_UADDSAT %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir index a0cd0d2cc0d0b..214900c18ed96 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir @@ -479,3 +479,198 @@ body: | %2:_(s32) = G_UITOFP %1 $vgpr0 = COPY %2 ... + +--- +name: test_uitofp_s64_to_s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: test_uitofp_s64_to_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[C1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C]] + ; GFX6: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C3]] + ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C4]] + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C5]](s32) + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C6]](s32) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]] + ; GFX6: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C7]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C7]] + ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C8]], [[SELECT1]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[ADD]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: test_uitofp_s64_to_s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[C1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C]] + ; GFX8: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C3]] + ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C4]] + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C5]](s32) + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C6]](s32) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]] + ; GFX8: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C7]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C7]] + ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C8]], [[SELECT1]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[ADD]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s16) = G_UITOFP %0 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_sitofp_v2s64_to_v2s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; GFX6-LABEL: name: test_sitofp_v2s64_to_v2s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s64) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[C1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C]] + ; GFX6: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C3]] + ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C4]] + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C5]](s32) + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C6]](s32) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]] + ; GFX6: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C7]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C7]] + ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C8]], [[SELECT1]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[ADD]](s32) + ; GFX6: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s64) + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF1]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[C1]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB1]], [[C]] + ; GFX6: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[CTLZ_ZERO_UNDEF1]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL2]], [[C3]] + ; GFX6: [[AND4:%[0-9]+]]:_(s64) = G_AND [[AND3]], [[C4]] + ; GFX6: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND3]], [[C5]](s32) + ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SELECT3]], [[C6]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64) + ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[TRUNC1]] + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND4]](s64), [[C7]] + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND4]](s64), [[C7]] + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[AND5]], [[C]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C8]], [[SELECT4]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[OR1]], [[SELECT5]] + ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[ADD1]](s32) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; GFX8-LABEL: name: test_sitofp_v2s64_to_v2s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s64) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[C1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C]] + ; GFX8: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C3]] + ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C4]] + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C5]](s32) + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C6]](s32) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]] + ; GFX8: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C7]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C7]] + ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C8]], [[SELECT1]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[ADD]](s32) + ; GFX8: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s64) + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF1]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[C1]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB1]], [[C]] + ; GFX8: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[CTLZ_ZERO_UNDEF1]](s32) + ; GFX8: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL2]], [[C3]] + ; GFX8: [[AND4:%[0-9]+]]:_(s64) = G_AND [[AND3]], [[C4]] + ; GFX8: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND3]], [[C5]](s32) + ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SELECT3]], [[C6]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64) + ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[TRUNC1]] + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND4]](s64), [[C7]] + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND4]](s64), [[C7]] + ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[AND5]], [[C]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C8]], [[SELECT4]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[OR1]], [[SELECT5]] + ; GFX8: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[ADD1]](s32) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s16>) = G_UITOFP %0 + $vgpr0 = COPY %1 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir new file mode 100644 index 0000000000000..56a1f1baded55 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -0,0 +1,528 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s + +--- +name: usubsat_s7 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: usubsat_s7 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX6: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s7) = G_TRUNC [[TRUNC2]](s16) + ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s7) + ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX8-LABEL: name: usubsat_s7 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[LSHR]](s16) + ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX9-LABEL: name: usubsat_s7 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[LSHR]](s16) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s7) = G_TRUNC %0 + %3:_(s7) = G_TRUNC %1 + %4:_(s7) = G_USUBSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: usubsat_s8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: usubsat_s8 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX6: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) + ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s8) + ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX8-LABEL: name: usubsat_s8 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16) + ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8) + ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX9-LABEL: name: usubsat_s8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8) + ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s8) = G_TRUNC %0 + %3:_(s8) = G_TRUNC %1 + %4:_(s8) = G_USUBSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: usubsat_v2s8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: usubsat_v2s8 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX6: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX6: [[USUBSAT:%[0-9]+]]:_(s8) = G_USUBSAT [[UV]], [[UV2]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX6: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[USUBSAT]](s8), [[TRUNC3]](s8) + ; GFX6: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX8-LABEL: name: usubsat_v2s8 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX8: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX8: [[USUBSAT:%[0-9]+]]:_(s8) = G_USUBSAT [[UV]], [[UV2]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT1]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16) + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[USUBSAT]](s8), [[TRUNC2]](s8) + ; GFX8: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32) + ; GFX9-LABEL: name: usubsat_v2s8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX9: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX9: [[USUBSAT:%[0-9]+]]:_(s8) = G_USUBSAT [[UV]], [[UV2]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16) + ; GFX9: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT1]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[USUBSAT]](s8), [[TRUNC2]](s8) + ; GFX9: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(<2 x s8>) = G_BITCAST %2 + %5:_(<2 x s8>) = G_BITCAST %3 + %6:_(<2 x s8>) = G_USUBSAT %4, %5 + %7:_(s16) = G_BITCAST %6 + %8:_(s32) = G_ANYEXT %7 + $vgpr0 = COPY %8 +... + +--- +name: usubsat_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: usubsat_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: usubsat_s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: usubsat_s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_USUBSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: usubsat_v2s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: usubsat_v2s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX6: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV]], [[UV2]] + ; GFX6: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16) + ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8-LABEL: name: usubsat_v2s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV]], [[UV2]] + ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16) + ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-LABEL: name: usubsat_v2s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV]], [[UV2]] + ; GFX9: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = G_USUBSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: usubsat_v3s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; GFX6-LABEL: name: usubsat_v3s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX6: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX6: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV2]], [[UV5]] + ; GFX6: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV3]], [[UV6]] + ; GFX6: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[UV4]], [[UV7]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16), [[USUBSAT2]](s16) + ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-LABEL: name: usubsat_v3s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX8: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV2]], [[UV5]] + ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV3]], [[UV6]] + ; GFX8: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[UV4]], [[UV7]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16), [[USUBSAT2]](s16) + ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-LABEL: name: usubsat_v3s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX9: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV2]], [[UV5]] + ; GFX9: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV3]], [[UV6]] + ; GFX9: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[UV4]], [[UV7]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16), [[USUBSAT2]](s16) + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 + %3:_(<3 x s16>) = G_USUBSAT %1, %2 + %4:_(<3 x s16>) = G_IMPLICIT_DEF + %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4 + $vgpr0_vgpr1_vgpr2 = COPY %5 +... + +--- +name: usubsat_v4s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: usubsat_v4s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV]], [[UV4]] + ; GFX6: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV1]], [[UV5]] + ; GFX6: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[UV2]], [[UV6]] + ; GFX6: [[USUBSAT3:%[0-9]+]]:_(s16) = G_USUBSAT [[UV3]], [[UV7]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16), [[USUBSAT2]](s16), [[USUBSAT3]](s16) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX8-LABEL: name: usubsat_v4s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV]], [[UV4]] + ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV1]], [[UV5]] + ; GFX8: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[UV2]], [[UV6]] + ; GFX8: [[USUBSAT3:%[0-9]+]]:_(s16) = G_USUBSAT [[UV3]], [[UV7]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16), [[USUBSAT2]](s16), [[USUBSAT3]](s16) + ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9-LABEL: name: usubsat_v4s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV]], [[UV4]] + ; GFX9: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV1]], [[UV5]] + ; GFX9: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[UV2]], [[UV6]] + ; GFX9: [[USUBSAT3:%[0-9]+]]:_(s16) = G_USUBSAT [[UV3]], [[UV7]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16), [[USUBSAT2]](s16), [[USUBSAT3]](s16) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x s16>) = G_USUBSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: usubsat_s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: usubsat_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] + ; GFX6: $vgpr0 = COPY [[USUBSAT]](s32) + ; GFX8-LABEL: name: usubsat_s32 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] + ; GFX8: $vgpr0 = COPY [[USUBSAT]](s32) + ; GFX9-LABEL: name: usubsat_s32 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] + ; GFX9: $vgpr0 = COPY [[USUBSAT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_USUBSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: usubsat_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: usubsat_v2s32 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]] + ; GFX6: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-LABEL: name: usubsat_v2s32 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]] + ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-LABEL: name: usubsat_v2s32 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]] + ; GFX9: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = G_USUBSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: usubsat_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: usubsat_s64 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[COPY]], [[COPY1]] + ; GFX6: $vgpr0_vgpr1 = COPY [[USUBSAT]](s64) + ; GFX8-LABEL: name: usubsat_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[COPY]], [[COPY1]] + ; GFX8: $vgpr0_vgpr1 = COPY [[USUBSAT]](s64) + ; GFX9-LABEL: name: usubsat_s64 + ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[COPY]], [[COPY1]] + ; GFX9: $vgpr0_vgpr1 = COPY [[USUBSAT]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_USUBSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: usubsat_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + + ; GFX6-LABEL: name: usubsat_v2s64 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[UV]], [[UV2]] + ; GFX6: [[USUBSAT1:%[0-9]+]]:_(s64) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[USUBSAT]](s64), [[USUBSAT1]](s64) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-LABEL: name: usubsat_v2s64 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[UV]], [[UV2]] + ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s64) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[USUBSAT]](s64), [[USUBSAT1]](s64) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-LABEL: name: usubsat_v2s64 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[UV]], [[UV2]] + ; GFX9: [[USUBSAT1:%[0-9]+]]:_(s64) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[USUBSAT]](s64), [[USUBSAT1]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x s64>) = G_USUBSAT %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll index de462105dc481..74c6fe270b794 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll @@ -1555,40 +1555,40 @@ define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; CI-NEXT: s_load_dword s4, s[4:5], 0x4 -; CI-NEXT: v_mov_b32_e32 v2, 42 +; CI-NEXT: v_mov_b32_e32 v0, 42 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_mov_b32_e32 v0, s2 -; CI-NEXT: v_mov_b32_e32 v3, s4 -; CI-NEXT: ds_inc_rtn_u32 v4, v3, v2 -; CI-NEXT: ds_inc_rtn_u32 v5, v3, v2 -; CI-NEXT: v_mov_b32_e32 v3, s1 -; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: v_mov_b32_e32 v1, s3 +; CI-NEXT: v_mov_b32_e32 v2, s2 +; CI-NEXT: v_mov_b32_e32 v1, s4 +; CI-NEXT: ds_inc_rtn_u32 v4, v1, v0 +; CI-NEXT: ds_inc_rtn_u32 v5, v1, v0 +; CI-NEXT: v_mov_b32_e32 v0, s0 +; CI-NEXT: v_mov_b32_e32 v1, s1 +; CI-NEXT: v_mov_b32_e32 v3, s3 ; CI-NEXT: s_waitcnt lgkmcnt(1) -; CI-NEXT: flat_store_dword v[2:3], v4 +; CI-NEXT: flat_store_dword v[0:1], v4 ; CI-NEXT: s_waitcnt lgkmcnt(1) -; CI-NEXT: flat_store_dword v[0:1], v5 +; CI-NEXT: flat_store_dword v[2:3], v5 ; CI-NEXT: s_endpgm ; ; VI-LABEL: nocse_lds_atomic_inc_ret_i32: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; VI-NEXT: s_load_dword s4, s[4:5], 0x10 -; VI-NEXT: v_mov_b32_e32 v2, 42 +; VI-NEXT: v_mov_b32_e32 v0, 42 ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: v_mov_b32_e32 v3, s4 -; VI-NEXT: ds_inc_rtn_u32 v4, v3, v2 -; VI-NEXT: ds_inc_rtn_u32 v5, v3, v2 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: ds_inc_rtn_u32 v4, v1, v0 +; VI-NEXT: ds_inc_rtn_u32 v5, v1, v0 +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_mov_b32_e32 v3, s3 ; VI-NEXT: s_waitcnt lgkmcnt(1) -; VI-NEXT: flat_store_dword v[2:3], v4 +; VI-NEXT: flat_store_dword v[0:1], v4 ; VI-NEXT: s_waitcnt lgkmcnt(1) -; VI-NEXT: flat_store_dword v[0:1], v5 +; VI-NEXT: flat_store_dword v[2:3], v5 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: nocse_lds_atomic_inc_ret_i32: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll new file mode 100644 index 0000000000000..6627804bdf76a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -global-isel -verify-machineinstrs < %s | FileCheck %s + +declare i32 @llvm.amdgcn.ballot.i32(i1) + +; Test ballot(0) + +define amdgpu_cs i32 @constant_false() { +; CHECK-LABEL: constant_false: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_mov_b32 s0, 0 +; CHECK-NEXT: ; implicit-def: $vcc_hi +; CHECK-NEXT: ; return to shader part epilog + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 0) + ret i32 %ballot +} + +; Test ballot(1) + +define amdgpu_cs i32 @constant_true() { +; CHECK-LABEL: constant_true: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_mov_b32 s0, exec_lo +; CHECK-NEXT: ; implicit-def: $vcc_hi +; CHECK-NEXT: ; return to shader part epilog + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 1) + ret i32 %ballot +} + +; Test ballot of a non-comparison operation + +define amdgpu_cs i32 @non_compare(i32 %x) { +; CHECK-LABEL: non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: ; implicit-def: $vcc_hi +; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 +; CHECK-NEXT: ; return to shader part epilog + %trunc = trunc i32 %x to i1 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %trunc) + ret i32 %ballot +} + +; Test ballot of comparisons + +define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) { +; CHECK-LABEL: compare_ints: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_eq_u32_e64 s0, v0, v1 +; CHECK-NEXT: ; implicit-def: $vcc_hi +; CHECK-NEXT: ; return to shader part epilog + %cmp = icmp eq i32 %x, %y + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp) + ret i32 %ballot +} + +define amdgpu_cs i32 @compare_int_with_constant(i32 %x) { +; CHECK-LABEL: compare_int_with_constant: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_le_i32_e64 s0, 0x63, v0 +; CHECK-NEXT: ; implicit-def: $vcc_hi +; CHECK-NEXT: ; return to shader part epilog + %cmp = icmp sge i32 %x, 99 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp) + ret i32 %ballot +} + +define amdgpu_cs i32 @compare_floats(float %x, float %y) { +; CHECK-LABEL: compare_floats: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_f32_e64 s0, v0, v1 +; CHECK-NEXT: ; implicit-def: $vcc_hi +; CHECK-NEXT: ; return to shader part epilog + %cmp = fcmp ogt float %x, %y + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp) + ret i32 %ballot +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll new file mode 100644 index 0000000000000..5f5af2954ff56 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel -verify-machineinstrs < %s | FileCheck %s + +declare i64 @llvm.amdgcn.ballot.i64(i1) + +; Test ballot(0) + +define amdgpu_cs i64 @constant_false() { +; CHECK-LABEL: constant_false: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_mov_b32 s0, 0 +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: ; return to shader part epilog + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 0) + ret i64 %ballot +} + +; Test ballot(1) + +define amdgpu_cs i64 @constant_true() { +; CHECK-LABEL: constant_true: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_mov_b32 s0, exec_lo +; CHECK-NEXT: s_mov_b32 s1, exec_hi +; CHECK-NEXT: ; return to shader part epilog + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 1) + ret i64 %ballot +} + +; Test ballot of a non-comparison operation + +define amdgpu_cs i64 @non_compare(i32 %x) { +; CHECK-LABEL: non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v0 +; CHECK-NEXT: ; return to shader part epilog + %trunc = trunc i32 %x to i1 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %trunc) + ret i64 %ballot +} + +; Test ballot of comparisons + +define amdgpu_cs i64 @compare_ints(i32 %x, i32 %y) { +; CHECK-LABEL: compare_ints: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], v0, v1 +; CHECK-NEXT: ; return to shader part epilog + %cmp = icmp eq i32 %x, %y + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) + ret i64 %ballot +} + +define amdgpu_cs i64 @compare_int_with_constant(i32 %x) { +; CHECK-LABEL: compare_int_with_constant: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v1, 0x63 +; CHECK-NEXT: v_cmp_ge_i32_e64 s[0:1], v0, v1 +; CHECK-NEXT: ; return to shader part epilog + %cmp = icmp sge i32 %x, 99 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) + ret i64 %ballot +} + +define amdgpu_cs i64 @compare_floats(float %x, float %y) { +; CHECK-LABEL: compare_floats: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_f32_e64 s[0:1], v0, v1 +; CHECK-NEXT: ; return to shader part epilog + %cmp = fcmp ogt float %x, %y + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) + ret i64 %ballot +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll index 2a3034763087d..80f86c6b1f50a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll @@ -235,17 +235,17 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32] ; ; GFX8-LABEL: test_div_fmas_f32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s2, s[0:1], 0xb8 -; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c -; GFX8-NEXT: s_load_dword s4, s[0:1], 0x70 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0x94 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s2, 1, s2 -; GFX8-NEXT: v_mov_b32_e32 v0, s3 -; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: v_mov_b32_e32 v2, s5 -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 +; GFX8-NEXT: s_load_dword s2, s[0:1], 0x4c +; GFX8-NEXT: s_load_dword s3, s[0:1], 0x70 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX8-NEXT: s_load_dword s5, s[0:1], 0xb8 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: s_and_b32 s2, 1, s5 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX8-NEXT: s_nop 3 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 @@ -527,43 +527,43 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) { ; GFX7-LABEL: test_div_fmas_f64: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x9 -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x11 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, s6 -; GFX7-NEXT: v_mov_b32_e32 v2, s8 -; GFX7-NEXT: v_mov_b32_e32 v4, s10 -; GFX7-NEXT: s_and_b32 s0, 1, s0 -; GFX7-NEXT: v_mov_b32_e32 v1, s7 -; GFX7-NEXT: v_mov_b32_e32 v3, s9 -; GFX7-NEXT: v_mov_b32_e32 v5, s11 -; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX7-NEXT: s_nop 3 -; GFX7-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] -; GFX7-NEXT: v_mov_b32_e32 v2, s4 -; GFX7-NEXT: v_mov_b32_e32 v3, s5 -; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX7-NEXT: s_endpgm +; GFX7-NEXT: s_load_dword s8, s[0:1], 0x11 +; GFX7-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: v_mov_b32_e32 v2, s4 +; GFX7-NEXT: v_mov_b32_e32 v4, s6 +; GFX7-NEXT: s_and_b32 s2, 1, s8 +; GFX7-NEXT: v_mov_b32_e32 v3, s5 +; GFX7-NEXT: v_mov_b32_e32 v5, s7 +; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 +; GFX7-NEXT: s_nop 3 +; GFX7-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX7-NEXT: v_mov_b32_e32 v3, s1 +; GFX7-NEXT: v_mov_b32_e32 v2, s0 +; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_fmas_f64: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24 -; GFX8-NEXT: s_load_dword s0, s[0:1], 0x44 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, s6 -; GFX8-NEXT: v_mov_b32_e32 v2, s8 -; GFX8-NEXT: v_mov_b32_e32 v4, s10 -; GFX8-NEXT: s_and_b32 s0, 1, s0 -; GFX8-NEXT: v_mov_b32_e32 v1, s7 -; GFX8-NEXT: v_mov_b32_e32 v3, s9 -; GFX8-NEXT: v_mov_b32_e32 v5, s11 -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX8-NEXT: s_nop 3 -; GFX8-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_mov_b32_e32 v3, s5 -; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8-NEXT: s_endpgm +; GFX8-NEXT: s_load_dword s8, s[0:1], 0x44 +; GFX8-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_mov_b32_e32 v4, s6 +; GFX8-NEXT: s_and_b32 s2, 1, s8 +; GFX8-NEXT: v_mov_b32_e32 v3, s5 +; GFX8-NEXT: v_mov_b32_e32 v5, s7 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 +; GFX8-NEXT: s_nop 3 +; GFX8-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-NEXT: s_endpgm ; ; GFX10_W32-LABEL: test_div_fmas_f64: ; GFX10_W32: ; %bb.0: @@ -848,17 +848,17 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace ; GFX7-NEXT: s_mov_b64 s[0:1], s[6:7] ; GFX7-NEXT: buffer_load_dword v3, v[1:2], s[0:3], 0 addr64 ; GFX7-NEXT: buffer_load_dword v4, v[1:2], s[0:3], 0 addr64 offset:4 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX7-NEXT: buffer_load_dword v0, v[1:2], s[0:3], 0 addr64 offset:8 +; GFX7-NEXT: buffer_load_dword v1, v[1:2], s[0:3], 0 addr64 offset:8 ; GFX7-NEXT: s_cmp_lg_u32 s8, 0 -; GFX7-NEXT: s_cselect_b32 s6, 1, 0 -; GFX7-NEXT: s_and_b32 s0, 1, s6 +; GFX7-NEXT: s_cselect_b32 s0, 1, 0 +; GFX7-NEXT: s_and_b32 s0, 1, s0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX7-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_and_b64 vcc, vcc, s[0:1] ; GFX7-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_div_fmas_f32 v0, v3, v4, v0 +; GFX7-NEXT: v_div_fmas_f32 v0, v3, v4, v1 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8 ; GFX7-NEXT: s_endpgm ; @@ -1075,7 +1075,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out ; GFX10_W32-NEXT: s_cmp_lg_u32 s0, 0 ; GFX10_W32-NEXT: s_cselect_b32 s4, 1, 0 ; GFX10_W32-NEXT: BB13_2: ; %exit -; GFX10_W32-NEXT: v_nop +; GFX10_W32-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_W32-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 @@ -1113,7 +1113,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out ; GFX10_W64-NEXT: s_cmp_lg_u32 s0, 0 ; GFX10_W64-NEXT: s_cselect_b32 s6, 1, 0 ; GFX10_W64-NEXT: BB13_2: ; %exit -; GFX10_W64-NEXT: v_nop +; GFX10_W64-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_W64-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX10_W64-NEXT: s_and_b32 s0, 1, s6 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll index 8cba08f016daf..4193d976afd65 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll @@ -1,5 +1,4 @@ -; FIXME: Broken SI run line -; XUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll +; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll ; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll ; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll index 28c2c7a4e9bfb..e2c3b625395a7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll @@ -1,5 +1,4 @@ -; FIXME: Broken SI run line -; XUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll +; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll ; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll ; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll index 28c7d47e855f7..e38df28d23d36 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll @@ -18,7 +18,7 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: BB0_2: ; %bb -; GCN-NEXT: v_nop +; GCN-NEXT: s_waitcnt_depctr 0xffe3 ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: global_store_dword v[0:1], v0, off diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll index b8341e1183da1..9c71580ac3c4a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll @@ -612,7 +612,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK: %10:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; CHECK: %10:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7" + 4096, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -780,7 +780,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK: %13:vgpr_32, dead %35:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; CHECK: %13:vgpr_32, dead %35:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll index b5372ef76a7a5..86f177422ae4f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll @@ -380,7 +380,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED: %11:vgpr_32, dead %24:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; UNPACKED: %11:vgpr_32, dead %24:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec ; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec @@ -400,7 +400,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; PACKED: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; PACKED: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 @@ -426,7 +426,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED: %13:vgpr_32, dead %49:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; UNPACKED: %13:vgpr_32, dead %49:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec ; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY4]], implicit $exec @@ -473,7 +473,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; PACKED: %13:vgpr_32, dead %33:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; PACKED: %13:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec ; PACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; PACKED: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll index 94360c0d18689..a9f39605270fb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll @@ -243,7 +243,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK: %13:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; CHECK: %13:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7" + 4096, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 @@ -272,7 +272,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK: %15:vgpr_32, dead %35:sreg_64_xexec = V_ADD_I32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec + ; CHECK: %15:vgpr_32, dead %35:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec ; CHECK: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll index 4473d64dfa2a4..c2240dd355ea5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll @@ -575,7 +575,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 @@ -671,7 +671,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 @@ -695,7 +695,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK: %13:vgpr_32, dead %33:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK: %13:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 ; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll index 9c44181a888e5..66425c27a19fe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -4347,7 +4347,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -4363,7 +4363,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -4379,7 +4379,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -4402,7 +4402,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -4418,7 +4418,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -4434,7 +4434,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] @@ -4511,7 +4511,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX6: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX6: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -4527,7 +4527,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX7: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX7: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -4543,7 +4543,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX8: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll index 5389adf5a526e..7d116f8e8925f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll @@ -687,14 +687,48 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrs ret void } -; FIXME -; define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { -; %src = load i32, i32 addrspace(1)* %in, align 4 -; %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16) -; %div = sdiv i32 %bfe, 2 -; store i32 %div, i32 addrspace(1)* %out, align 4 -; ret void -; } +define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +; GFX6-LABEL: simplify_demanded_bfe_sdiv: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 2 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: v_mul_lo_u32 v1, -2, v0 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_bfe_i32 s2, s2, 0x100001 +; GFX6-NEXT: s_ashr_i32 s3, s2, 31 +; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX6-NEXT: s_add_i32 s2, s2, s3 +; GFX6-NEXT: s_xor_b32 s2, s2, s3 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s2, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_subrev_i32_e64 v2, s[0:1], 2, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_xor_b32_e32 v0, s3, v0 +; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 +; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: s_endpgm + %src = load i32, i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16) + %div = sdiv i32 %bfe, 2 + store i32 %div, i32 addrspace(1)* %out, align 4 + ret void +} define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: bfe_0_width: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll index bac18c1f6ce06..a5737e8233af3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll @@ -950,21 +950,21 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, ; GFX6-LABEL: simplify_bfe_u32_multi_use_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX6-NEXT: s_load_dword s8, s[2:3], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7] ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_and_b32 s0, s0, 63 -; GFX6-NEXT: s_bfe_u32 s1, s0, 0x20002 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: s_and_b32 s8, s8, 63 +; GFX6-NEXT: s_bfe_u32 s9, s8, 0x20002 +; GFX6-NEXT: v_mov_b32_e32 v1, s9 +; GFX6-NEXT: v_mov_b32_e32 v0, s8 ; GFX6-NEXT: buffer_store_dword v1, off, s[4:7], 0 -; GFX6-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll index 4ca0a1aa049fb..af8b4f0f9e5ae 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll @@ -20,53 +20,53 @@ define <3 x i32> @v_load_constant_v3i32_align1(<3 x i32> addrspace(4)* %ptr) { ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NOUNALIGNED-NEXT: v_add_co_u32_e32 v2, vcc, 11, v0 ; GFX9-NOUNALIGNED-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v6, v[2:3], off offset:-6 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v7, v[2:3], off offset:-5 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v8, v[2:3], off offset:-4 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v9, v[2:3], off offset:-3 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v10, v[2:3], off offset:-2 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v11, v[2:3], off offset:-1 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v12, v[2:3], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v4, v[2:3], off offset:-6 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v5, v[2:3], off offset:-5 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v6, v[2:3], off offset:-4 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v7, v[2:3], off offset:-3 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v8, v[2:3], off offset:-2 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v9, v[2:3], off offset:-1 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v10, v[2:3], off ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v0, v[0:1], off ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v1, v[2:3], off offset:-10 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v13, v[2:3], off offset:-9 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v14, v[2:3], off offset:-8 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v11, v[2:3], off offset:-9 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v12, v[2:3], off offset:-8 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v2, v[2:3], off offset:-7 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v4, 0xff +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v3, 0xff ; GFX9-NOUNALIGNED-NEXT: s_movk_i32 s4, 0xff +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v13, 8 ; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s5, 8 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(11) -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v6, v5, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v4, v13, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v7, v7, v4 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v5, v5, v3 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v8, v8, v4 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v6, v6, v3 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(7) -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v5, v5, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v8, v13, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v10, v11, v4 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v9, v9, v3 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v11, v12, v4 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v10, v10, v3 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s4, v13 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v11, s4, v11 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v13, s4, v14 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v12, s4, v12 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v2, v4, v6 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v2, v3, v4 ; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s4, v1 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v3 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 24, v13 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v7 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v8 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v4, v9, v4, v5 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v10 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 24, v11 -; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v0, v1, v3 -; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v2, v6, v7 -; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v4, v5, v8 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v11 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v11, 24, v12 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 16, v5 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 24, v6 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v3, v7, v3, v8 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v9 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v10 +; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v0, v1, v11 +; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v2, v4, v5 +; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v3, v6, v7 ; GFX9-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align1: @@ -85,61 +85,62 @@ define <3 x i32> @v_load_constant_v3i32_align1(<3 x i32> addrspace(4)* %ptr) { ; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s6, 0 ; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NOUNALIGNED-NEXT: s_mov_b64 s[4:5], 0 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v3, v[0:1], s[4:7], 0 addr64 offset:5 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v4, v[0:1], s[4:7], 0 addr64 offset:6 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v5, v[0:1], s[4:7], 0 addr64 offset:7 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v6, v[0:1], s[4:7], 0 addr64 offset:8 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v7, v[0:1], s[4:7], 0 addr64 offset:9 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v8, v[0:1], s[4:7], 0 addr64 offset:10 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v9, v[0:1], s[4:7], 0 addr64 offset:11 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v10, v[0:1], s[4:7], 0 addr64 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v11, v[0:1], s[4:7], 0 addr64 offset:1 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v12, v[0:1], s[4:7], 0 addr64 offset:2 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v13, v[0:1], s[4:7], 0 addr64 offset:3 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:4 -; GFX7-NOUNALIGNED-NEXT: v_mov_b32_e32 v2, 0xff -; GFX7-NOUNALIGNED-NEXT: s_movk_i32 s8, 0xff +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v2, v[0:1], s[4:7], 0 addr64 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v3, v[0:1], s[4:7], 0 addr64 offset:1 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v4, v[0:1], s[4:7], 0 addr64 offset:2 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v5, v[0:1], s[4:7], 0 addr64 offset:3 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v6, v[0:1], s[4:7], 0 addr64 offset:4 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v7, v[0:1], s[4:7], 0 addr64 offset:5 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v8, v[0:1], s[4:7], 0 addr64 offset:6 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v9, v[0:1], s[4:7], 0 addr64 offset:7 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v10, v[0:1], s[4:7], 0 addr64 offset:8 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v11, v[0:1], s[4:7], 0 addr64 offset:9 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v12, v[0:1], s[4:7], 0 addr64 offset:10 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:11 +; GFX7-NOUNALIGNED-NEXT: v_mov_b32_e32 v1, 0xff +; GFX7-NOUNALIGNED-NEXT: s_movk_i32 s4, 0xff ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(11) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, v3, v2 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v2, s4, v2 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, v4, v2 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s4, v3 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, v5, v2 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s4, v4 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v6, v6, v2 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, s4, v5 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(7) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v7, v7, v2 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v6, s4, v6 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v8, v8, v2 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v7, v7, v1 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v2, v9, v2 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v8, v8, v1 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s8, v10 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v9, v9, v1 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v10, s8, v11 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v10, v10, v1 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v11, s8, v12 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 8, v10 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v11, v11, v1 +; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v12, v12, v1 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, s8, v0 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 8, v7 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 8, v3 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v4 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 24, v5 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 8, v7 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 16, v8 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 24, v9 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 8, v11 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v11, 16, v12 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v12, 24, v0 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v2, v1 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v6, v5 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v10, v9 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v12, s8, v13 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v10, 16, v11 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v3, v6, v7 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v11, 24, v12 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v3, v3, v8 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v10 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v4, v0, v4 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v1, v11 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v4, v5 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v3, v2 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v2, v11 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v8 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v2, v12 ; GFX7-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 1 ret <3 x i32> %load @@ -158,27 +159,27 @@ define <3 x i32> @v_load_constant_v3i32_align2(<3 x i32> addrspace(4)* %ptr) { ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NOUNALIGNED-NEXT: v_add_co_u32_e32 v2, vcc, 10, v0 ; GFX9-NOUNALIGNED-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v5, v[2:3], off +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v4, v[2:3], off ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v0, v[0:1], off ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v1, v[2:3], off offset:-8 -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v6, v[2:3], off offset:-6 -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v7, v[2:3], off offset:-4 +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v5, v[2:3], off offset:-6 +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v6, v[2:3], off offset:-4 ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v2, v[2:3], off offset:-2 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v3, 0xffff ; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s4, 0xffff ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v5, v5, v4 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v4, v4, v3 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) ; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s4, v1 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v3, v7, v4 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v6, v6, v3 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s4, v1 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v1, v6, v4, v3 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v1, v5, v3, v6 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v2, v4, v5 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v2, v3, v4 ; GFX9-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align2: @@ -203,18 +204,18 @@ define <3 x i32> @v_load_constant_v3i32_align2(<3 x i32> addrspace(4)* %ptr) { ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 offset:4 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:6 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:8 -; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s8, 0xffff +; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s4, 0xffff ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s8, v3 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s4, v3 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s8, v4 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s4, v4 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s8, v5 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s4, v5 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, s8, v6 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, s4, v6 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v6, s8, v0 -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, s8, v2 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v6, s4, v0 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, s4, v2 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v5 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v0 @@ -432,58 +433,58 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(<3 x i32> addrspace(4)* ; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v16, s2 ; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 9 ; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v20, v[10:11], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v12, v[12:13], off -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v11, s3 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v10, s2 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v19, s3 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v18, s2 ; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 10 ; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 ; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NOUNALIGNED-NEXT: s_add_u32 s0, s0, 11 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v13, v[14:15], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v14, v[16:17], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v15, v[10:11], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v20, v[10:11], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v21, v[12:13], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v14, v[14:15], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v15, v[16:17], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v16, v[18:19], off +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s1, s1, 0 ; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v11, s3 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v13, s1 ; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v10, s2 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s1, s1, 0 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v16, v[10:11], off -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v11, s1 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v10, s0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v12, s0 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v10, v[10:11], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v11, v[12:13], off ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v0, v[0:1], off ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v1, v[2:3], off ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v2, v[4:5], off ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v3, v[6:7], off ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v4, v[8:9], off -; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s5, 8 -; GFX9-NOUNALIGNED-NEXT: s_movk_i32 s4, 0xff -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v18, 0xff -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v19, 8 +; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s1, 8 +; GFX9-NOUNALIGNED-NEXT: s_movk_i32 s0, 0xff +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v5, 0xff +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v6, 8 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s4, v1 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v1, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s0, v1 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s4, v2 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s0, v2 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, s4, v3 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, s0, v3 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v0, v1, v2 -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, v12, v18 -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, v13, v18 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, v21, v5 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, v14, v5 ; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v0, v19, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v0, v6, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v4, v18, v0 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v4, v5, v0 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v0, v1, v2 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v0, v19, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, v10, v18 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v0, v6, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, v11, v5 ; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, v16, v18 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v14, v18, v0 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, v10, v5 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v15, v5, v0 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v0, v1, v2 @@ -508,60 +509,59 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(<3 x i32> addrspace(4)* ; GFX7-NOUNALIGNED: ; %bb.0: ; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s2, -1 ; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 offset:5 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v2, off, s[0:3], 0 offset:6 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v3, off, s[0:3], 0 offset:7 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v4, off, s[0:3], 0 offset:8 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v5, off, s[0:3], 0 offset:9 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v6, off, s[0:3], 0 offset:10 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v7, off, s[0:3], 0 offset:11 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v8, off, s[0:3], 0 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v9, off, s[0:3], 0 offset:1 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v10, off, s[0:3], 0 offset:2 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v11, off, s[0:3], 0 offset:3 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v12, off, s[0:3], 0 offset:4 -; GFX7-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, 0xff -; GFX7-NOUNALIGNED-NEXT: s_movk_i32 s4, 0xff +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 offset:1 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v2, off, s[0:3], 0 offset:2 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v3, off, s[0:3], 0 offset:3 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v4, off, s[0:3], 0 offset:4 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v5, off, s[0:3], 0 offset:5 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v6, off, s[0:3], 0 offset:6 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v7, off, s[0:3], 0 offset:7 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v8, off, s[0:3], 0 offset:8 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v9, off, s[0:3], 0 offset:9 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v10, off, s[0:3], 0 offset:10 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v11, off, s[0:3], 0 offset:11 +; GFX7-NOUNALIGNED-NEXT: v_mov_b32_e32 v12, 0xff +; GFX7-NOUNALIGNED-NEXT: s_movk_i32 s0, 0xff ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(11) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v1, v1, v0 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, s0, v0 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v2, v2, v0 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s0, v1 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, v3, v0 -; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, v4, v0 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v2, s0, v2 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(7) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, v5, v0 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s0, v4 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v6, v6, v0 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, v5, v12 +; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v6, v6, v12 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 8, v5 -; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v8, s4, v8 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v9, s4, v9 -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, v7, v0 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v8, v8, v12 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v10, s4, v10 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 8, v9 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v9, v9, v12 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v11, s4, v11 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v10, v10, v12 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 8, v9 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s0, v3 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v7, v7, v12 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v12, s4, v12 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 16, v10 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v10, 24, v11 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v11, 24, v0 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v8, v7 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v11, v11, v12 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v12, v1 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v4, v5 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v4, v4, v5 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v9 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v4, v6 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v4, v8, v9 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v10 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v7 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v6 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v11, 24, v11 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v4, v10 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v7 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v2, v11 ; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 ; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 @@ -613,21 +613,21 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(<3 x i32> addrspace(4)* ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v2, v[4:5], off ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v3, v[6:7], off ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v4, v[8:9], off -; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s4, 0xffff -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v12, 0xffff +; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v5, 0xffff ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s0, v1 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s4, v1 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s0, v1 ; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v0, v3, v12 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v0, v3, v5 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v1, v2, v12, v0 -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v0, v10, v12 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v1, v2, v5, v0 +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v0, v10, v5 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v4, v12, v0 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v4, v5, v0 ; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 ; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 ; GFX9-NOUNALIGNED-NEXT: ; return to shader part epilog @@ -656,19 +656,19 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(<3 x i32> addrspace(4)* ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v3, off, s[0:3], 0 offset:4 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:6 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:8 -; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s4, 0xffff +; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s0, 0xffff ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, s0, v0 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s0, v1 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v2, s0, v2 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s0, v3 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s4, v4 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s0, v4 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, s4, v5 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, s0, v5 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v0 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 16, v4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll index d2e06fb449891..06bf7f7949309 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll @@ -96,8 +96,8 @@ define amdgpu_kernel void @localize_globals(i1 %cond) { ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, gv3@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, gv3@gotpcrel32@hi+4 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -116,10 +116,10 @@ define amdgpu_kernel void @localize_globals(i1 %cond) { ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, gv0@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, gv0@gotpcrel32@hi+4 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_getpc_b64 s[2:3] ; GFX9-NEXT: s_add_u32 s2, s2, gv1@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s3, s3, gv1@gotpcrel32@hi+4 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-freeze.mir new file mode 100644 index 0000000000000..83067f1e1c866 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-freeze.mir @@ -0,0 +1,559 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=regbankselect %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=regbankselect %s -o - | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -o - | FileCheck %s + +--- +name: test_freeze_s1_vgpr_to_vgpr +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_freeze_s1_vgpr_to_vgpr + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s1) = G_FREEZE [[TRUNC]] + ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[FREEZE]](s1) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s1) = G_TRUNC %0(s32) + %2:_(s1) = G_FREEZE %1 + %3:_(s32) = G_ANYEXT %2(s1) + $vgpr0 = COPY %3(s32) + +... + +--- +name: test_freeze_s1_vgpr_to_agpr +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_freeze_s1_vgpr_to_agpr + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s1) = G_FREEZE [[TRUNC]] + ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[FREEZE]](s1) + ; CHECK: $agpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s1) = G_TRUNC %0(s32) + %2:_(s1) = G_FREEZE %1 + %3:_(s32) = G_ANYEXT %2(s1) + $agpr0 = COPY %3(s32) + +... + +--- +name: test_freeze_s1_vcc +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: test_freeze_s1_vcc + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] + ; CHECK: [[FREEZE:%[0-9]+]]:vcc(s1) = G_FREEZE [[ICMP]] + ; CHECK: S_ENDPGM 0, implicit [[FREEZE]](s1) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(eq), %0(s32), %1 + %3:_(s1) = G_FREEZE %2 + S_ENDPGM 0, implicit %3 + +... + +--- +name: test_freeze_s16_vgpr_to_vgpr +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_freeze_s16_vgpr_to_vgpr + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s16) = G_FREEZE [[TRUNC]] + ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[FREEZE]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0(s32) + %2:_(s16) = G_FREEZE %1 + %3:_(s32) = G_ANYEXT %2(s16) + $vgpr0 = COPY %3(s32) + +... + +--- +name: test_freeze_s32_vgpr_to_vgpr +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_freeze_s32_vgpr_to_vgpr + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s32) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0 = COPY [[FREEZE]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_FREEZE %0 + $vgpr0 = COPY %1(s32) + +... + +--- +name: test_freeze_s32_sgpr_to_sgpr +legalized: true + +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: test_freeze_s32_sgpr_to_sgpr + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[FREEZE:%[0-9]+]]:sgpr(s32) = G_FREEZE [[COPY]] + ; CHECK: $sgpr0 = COPY [[FREEZE]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = G_FREEZE %0 + $sgpr0 = COPY %1(s32) + +... + +--- +name: test_freeze_s32_sgpr_to_vgpr +legalized: true + +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: test_freeze_s32_sgpr_to_vgpr + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[FREEZE:%[0-9]+]]:sgpr(s32) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0 = COPY [[FREEZE]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = G_FREEZE %0 + $vgpr0 = COPY %1(s32) + +... + +--- +name: test_freeze_s32_vgpr_to_agpr +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_freeze_s32_vgpr_to_agpr + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s32) = G_FREEZE [[COPY]] + ; CHECK: $agpr0 = COPY [[FREEZE]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_FREEZE %0 + $agpr0 = COPY %1(s32) + +... + +--- +name: test_freeze_s32_sgpr_to_agpr +legalized: true + +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: test_freeze_s32_sgpr_to_agpr + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[FREEZE:%[0-9]+]]:sgpr(s32) = G_FREEZE [[COPY]] + ; CHECK: $agpr0 = COPY [[FREEZE]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = G_FREEZE %0 + $agpr0 = COPY %1(s32) + +... + +--- +name: test_freeze_s32_agpr_to_vgpr +legalized: true + +body: | + bb.0: + liveins: $agpr0 + ; CHECK-LABEL: name: test_freeze_s32_agpr_to_vgpr + ; CHECK: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 + ; CHECK: [[FREEZE:%[0-9]+]]:agpr(s32) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0 = COPY [[FREEZE]](s32) + %0:_(s32) = COPY $agpr0 + %1:_(s32) = G_FREEZE %0 + $vgpr0 = COPY %1(s32) + +... + +--- +name: test_freeze_s32_agpr_to_agpr +legalized: true + +body: | + bb.0: + liveins: $agpr0 + ; CHECK-LABEL: name: test_freeze_s32_agpr_to_agpr + ; CHECK: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 + ; CHECK: [[FREEZE:%[0-9]+]]:agpr(s32) = G_FREEZE [[COPY]] + ; CHECK: $agpr0 = COPY [[FREEZE]](s32) + %0:_(s32) = COPY $agpr0 + %1:_(s32) = G_FREEZE %0 + $agpr0 = COPY %1(s32) + +... + +--- +name: test_freeze_s64 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_freeze_s64 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s64) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(s64) +... + +--- +name: test_freeze_s128 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-LABEL: name: test_freeze_s128 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s128) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](s128) + %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(s128) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(s128) +... + +--- +name: test_freeze_256 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-LABEL: name: test_freeze_256 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s256) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](s256) + %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(s256) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(s256) +... + +--- +name: test_freeze_s512 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-LABEL: name: test_freeze_s512 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s512) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](s512) + %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(s512) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(s512) +... + +--- +name: test_freeze_v2s32 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_freeze_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<2 x s32>) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(<2 x s32>) +... + +--- +name: test_freeze_v3s32 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + ; CHECK-LABEL: name: test_freeze_v3s32 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<3 x s32>) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<3 x s32>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x s32>) +... + +--- +name: test_freeze_v4s32 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-LABEL: name: test_freeze_v4s32 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<4 x s32>) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<4 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x s32>) +... + +--- +name: test_freeze_v5s32 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; CHECK-LABEL: name: test_freeze_v5s32 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<5 x s32>) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[FREEZE]](<5 x s32>) + %0:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + %1:_(<5 x s32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1(<5 x s32>) +... + +--- +name: test_freeze_v8s32 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-LABEL: name: test_freeze_v8s32 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<8 x s32>) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](<8 x s32>) + %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x s32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x s32>) +... + +--- +name: test_freeze_v16s32 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-LABEL: name: test_freeze_v16s32 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<16 x s32>) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](<16 x s32>) + %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(<16 x s32>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x s32>) +... + +--- +name: test_freeze_v2s16 +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_freeze_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<2 x s16>) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0 = COPY [[FREEZE]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = G_FREEZE %0 + $vgpr0 = COPY %1(<2 x s16>) +... + +--- +name: test_freeze_v4s16 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_freeze_v4s16 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<4 x s16>) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](<4 x s16>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(<4 x s16>) +... + +--- +name: test_freeze_v6s16 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + ; CHECK-LABEL: name: test_freeze_v6s16 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<6 x s16>) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<6 x s16>) + %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<6 x s16>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x s16>) +... + +--- +name: test_freeze_v8s16 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-LABEL: name: test_freeze_v8s16 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<8 x s16>) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<8 x s16>) + %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<8 x s16>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x s16>) +... + +--- +name: test_freeze_v2s64 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-LABEL: name: test_freeze_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<2 x s64>) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<2 x s64>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s64>) = G_FREEZE %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x s64>) +... + +--- +name: test_freeze_p0 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_freeze_p0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(p0) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p0) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(p0) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(p0) +... + +--- +name: test_freeze_p1 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_freeze_p1 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(p1) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(p1) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(p1) +... + +--- +name: test_freeze_p2 +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_freeze_p2 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(p2) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0 = COPY [[FREEZE]](p2) + %0:_(p2) = COPY $vgpr0 + %1:_(p2) = G_FREEZE %0 + $vgpr0 = COPY %1(p2) +... + +--- +name: test_freeze_p3 +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_freeze_p3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(p3) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0 = COPY [[FREEZE]](p3) + %0:_(p3) = COPY $vgpr0 + %1:_(p3) = G_FREEZE %0 + $vgpr0 = COPY %1(p3) +... + +--- +name: test_freeze_p4 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_freeze_p4 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p4) = COPY $vgpr0_vgpr1 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(p4) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p4) + %0:_(p4) = COPY $vgpr0_vgpr1 + %1:_(p4) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(p4) +... + +--- +name: test_freeze_p5 +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_freeze_p5 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(p5) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0 = COPY [[FREEZE]](p5) + %0:_(p5) = COPY $vgpr0 + %1:_(p5) = G_FREEZE %0 + $vgpr0 = COPY %1(p5) +... + +--- +name: test_freeze_p999 +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_freeze_p999 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p999) = COPY $vgpr0_vgpr1 + ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(p999) = G_FREEZE [[COPY]] + ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p999) + %0:_(p999) = COPY $vgpr0_vgpr1 + %1:_(p999) = G_FREEZE %0 + $vgpr0_vgpr1 = COPY %1(p999) +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll index a8631a18de3cd..2512aaaeb082c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll @@ -15,14 +15,58 @@ entry: ret i32 %r0.val } -; FIXME: -; define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> inreg %b) { -; entry: -; %xor = xor <2 x i16> %a, %b -; %r0.val = xor <2 x i16> %xor, -; %cast = bitcast <2 x i16> %r0.val to i32 -; ret i32 %cast -; } +; FIXME: fails to match +define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> inreg %b) { +; GFX7-LABEL: scalar_xnor_v2i16_one_use: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_mov_b32 s4, 0xffff +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_and_b32 s0, s0, s4 +; GFX7-NEXT: s_or_b32 s0, s1, s0 +; GFX7-NEXT: s_lshl_b32 s1, s3, 16 +; GFX7-NEXT: s_and_b32 s2, s2, s4 +; GFX7-NEXT: s_or_b32 s1, s1, s2 +; GFX7-NEXT: s_xor_b32 s0, s0, s1 +; GFX7-NEXT: s_xor_b32 s0, s0, -1 +; GFX7-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: scalar_xnor_v2i16_one_use: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_mov_b32 s2, 0xffff +; GFX8-NEXT: s_lshr_b32 s5, s0, 16 +; GFX8-NEXT: s_lshr_b32 s6, s1, 16 +; GFX8-NEXT: s_and_b32 s4, s0, s2 +; GFX8-NEXT: s_and_b32 s0, s1, s2 +; GFX8-NEXT: s_and_b32 s5, s5, s2 +; GFX8-NEXT: s_and_b32 s1, s6, s2 +; GFX8-NEXT: s_mov_b32 s3, s2 +; GFX8-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] +; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: s_and_b32 s0, s0, s2 +; GFX8-NEXT: s_or_b32 s0, s1, s0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX900-LABEL: scalar_xnor_v2i16_one_use: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_pack_ll_b32_b16 s2, -1, -1 +; GFX900-NEXT: s_xor_b32 s0, s0, s1 +; GFX900-NEXT: s_xor_b32 s0, s0, s2 +; GFX900-NEXT: ; return to shader part epilog +; +; GFX906-LABEL: scalar_xnor_v2i16_one_use: +; GFX906: ; %bb.0: ; %entry +; GFX906-NEXT: s_pack_ll_b32_b16 s2, -1, -1 +; GFX906-NEXT: s_xor_b32 s0, s0, s1 +; GFX906-NEXT: s_xor_b32 s0, s0, s2 +; GFX906-NEXT: ; return to shader part epilog +entry: + %xor = xor <2 x i16> %a, %b + %r0.val = xor <2 x i16> %xor, + %cast = bitcast <2 x i16> %r0.val to i32 + ret i32 %cast +} define amdgpu_ps <2 x i32> @scalar_xnor_i32_mul_use(i32 inreg %a, i32 inreg %b) { ; GCN-LABEL: scalar_xnor_i32_mul_use: @@ -51,13 +95,79 @@ define amdgpu_ps i64 @scalar_xnor_i64_one_use(i64 inreg %a, i64 inreg %b) { ret i64 %r0.val } -; FIXME: -; define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> inreg %b) { -; %xor = xor <4 x i16> %a, %b -; %ret = xor <4 x i16> %xor, -; %cast = bitcast <4 x i16> %ret to i64 -; ret i64 %cast -; } +; FIXME: fails to match +define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> inreg %b) { +; GFX7-LABEL: scalar_xnor_v4i16_one_use: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_mov_b32 s8, 0xffff +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_and_b32 s0, s0, s8 +; GFX7-NEXT: s_or_b32 s0, s1, s0 +; GFX7-NEXT: s_lshl_b32 s1, s3, 16 +; GFX7-NEXT: s_and_b32 s2, s2, s8 +; GFX7-NEXT: s_or_b32 s1, s1, s2 +; GFX7-NEXT: s_and_b32 s3, s4, s8 +; GFX7-NEXT: s_lshl_b32 s2, s5, 16 +; GFX7-NEXT: s_or_b32 s2, s2, s3 +; GFX7-NEXT: s_lshl_b32 s3, s7, 16 +; GFX7-NEXT: s_and_b32 s4, s6, s8 +; GFX7-NEXT: s_or_b32 s3, s3, s4 +; GFX7-NEXT: s_mov_b32 s4, -1 +; GFX7-NEXT: s_mov_b32 s5, s4 +; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] +; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] +; GFX7-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: scalar_xnor_v4i16_one_use: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_mov_b32 s4, 0xffff +; GFX8-NEXT: s_lshr_b32 s5, s0, 16 +; GFX8-NEXT: s_and_b32 s7, s5, s4 +; GFX8-NEXT: s_lshr_b32 s5, s1, 16 +; GFX8-NEXT: s_and_b32 s6, s0, s4 +; GFX8-NEXT: s_and_b32 s0, s1, s4 +; GFX8-NEXT: s_and_b32 s1, s5, s4 +; GFX8-NEXT: s_lshr_b32 s5, s2, 16 +; GFX8-NEXT: s_and_b32 s8, s2, s4 +; GFX8-NEXT: s_and_b32 s9, s5, s4 +; GFX8-NEXT: s_lshr_b32 s5, s3, 16 +; GFX8-NEXT: s_and_b32 s2, s3, s4 +; GFX8-NEXT: s_and_b32 s3, s5, s4 +; GFX8-NEXT: s_xor_b64 s[6:7], s[6:7], s[8:9] +; GFX8-NEXT: s_mov_b32 s5, s4 +; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] +; GFX8-NEXT: s_and_b64 s[2:3], s[6:7], s[4:5] +; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] +; GFX8-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX8-NEXT: s_xor_b64 s[6:7], s[0:1], s[4:5] +; GFX8-NEXT: s_and_b32 s1, s2, s4 +; GFX8-NEXT: s_lshl_b32 s0, s3, 16 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: s_lshl_b32 s1, s7, 16 +; GFX8-NEXT: s_and_b32 s2, s6, s4 +; GFX8-NEXT: s_or_b32 s1, s1, s2 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX900-LABEL: scalar_xnor_v4i16_one_use: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_pack_ll_b32_b16 s4, -1, -1 +; GFX900-NEXT: s_mov_b32 s5, s4 +; GFX900-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] +; GFX900-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] +; GFX900-NEXT: ; return to shader part epilog +; +; GFX906-LABEL: scalar_xnor_v4i16_one_use: +; GFX906: ; %bb.0: +; GFX906-NEXT: s_pack_ll_b32_b16 s4, -1, -1 +; GFX906-NEXT: s_mov_b32 s5, s4 +; GFX906-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] +; GFX906-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] +; GFX906-NEXT: ; return to shader part epilog + %xor = xor <4 x i16> %a, %b + %ret = xor <4 x i16> %xor, + %cast = bitcast <4 x i16> %ret to i64 + ret i64 %cast +} define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b) { ; GCN-LABEL: scalar_xnor_i64_mul_use: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll index 7a7967cca5da9..dc899ed8ba98a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll @@ -162,9 +162,9 @@ define i96 @zextload_global_i32_to_i96(i32 addrspace(1)* %ptr) { ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b64 s[4:5], 0 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 -; GFX6-NEXT: s_mov_b32 s8, 0 +; GFX6-NEXT: s_mov_b32 s4, 0 ; GFX6-NEXT: v_mov_b32_e32 v1, 0 -; GFX6-NEXT: v_mov_b32_e32 v2, s8 +; GFX6-NEXT: v_mov_b32_e32 v2, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] %load = load i32, i32 addrspace(1)* %ptr @@ -180,8 +180,8 @@ define i128 @zextload_global_i32_to_i128(i32 addrspace(1)* %ptr) { ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s5 -; GFX9-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -192,8 +192,8 @@ define i128 @zextload_global_i32_to_i128(i32 addrspace(1)* %ptr) { ; GFX8-NEXT: s_mov_b32 s4, 0 ; GFX8-NEXT: s_mov_b32 s5, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 -; GFX8-NEXT: v_mov_b32_e32 v2, s5 -; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_mov_b32_e32 v3, s5 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; @@ -204,11 +204,11 @@ define i128 @zextload_global_i32_to_i128(i32 addrspace(1)* %ptr) { ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b64 s[4:5], 0 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 -; GFX6-NEXT: s_mov_b32 s8, 0 ; GFX6-NEXT: s_mov_b32 s4, 0 +; GFX6-NEXT: s_mov_b32 s5, 0 ; GFX6-NEXT: v_mov_b32_e32 v1, 0 ; GFX6-NEXT: v_mov_b32_e32 v2, s4 -; GFX6-NEXT: v_mov_b32_e32 v3, s8 +; GFX6-NEXT: v_mov_b32_e32 v3, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] %load = load i32, i32 addrspace(1)* %ptr diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir index ca7012607aa1c..667a22ea0c1da 100644 --- a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir @@ -3,18 +3,33 @@ --- | define amdgpu_kernel void @a_to_v() #0 { ret void } + define amdgpu_kernel void @a2_to_v2() #0 { ret void } + define amdgpu_kernel void @a3_to_v3() #0 { ret void } define amdgpu_kernel void @a4_to_v4() #0 { ret void } + define amdgpu_kernel void @a8_to_v8() #0 { ret void } define amdgpu_kernel void @a16_to_v16() #0 { ret void } define amdgpu_kernel void @v_to_a() #0 { ret void } + define amdgpu_kernel void @v2_to_a2() #0 { ret void } + define amdgpu_kernel void @v3_to_a3() #0 { ret void } define amdgpu_kernel void @v4_to_a4() #0 { ret void } + define amdgpu_kernel void @v8_to_a8() #0 { ret void } define amdgpu_kernel void @v16_to_a16() #0 { ret void } define amdgpu_kernel void @s_to_a() #0 { ret void } define amdgpu_kernel void @s2_to_a2() #0 { ret void } + define amdgpu_kernel void @s3_to_a3() #0 { ret void } + define amdgpu_kernel void @s4_to_a4() #0 { ret void } + define amdgpu_kernel void @s6_to_a6() #0 { ret void } + define amdgpu_kernel void @s8_to_a8() #0 { ret void } + define amdgpu_kernel void @s16_to_a16() #0 { ret void } define amdgpu_kernel void @a_to_a() #0 { ret void } define amdgpu_kernel void @a2_to_a2() #0 { ret void } + define amdgpu_kernel void @a3_to_a3() #0 { ret void } + define amdgpu_kernel void @a4_to_a4() #0 { ret void } + define amdgpu_kernel void @a8_to_a8() #0 { ret void } + define amdgpu_kernel void @a16_to_a16() #0 { ret void } define amdgpu_kernel void @a_to_a_spill() #0 { ret void } attributes #0 = { "amdgpu-flat-work-group-size"="1,256" } @@ -34,6 +49,39 @@ body: | S_ENDPGM 0, implicit $vgpr0 ... +--- +name: a2_to_v2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0_agpr1 + + ; GCN-LABEL: name: a2_to_v2 + ; GCN: liveins: $agpr0_agpr1 + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr0_agpr1 + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit killed $agpr0_agpr1, implicit $exec + ; GCN: S_ENDPGM 0, implicit $vgpr0_vgpr1 + $vgpr0_vgpr1 = COPY killed $agpr0_agpr1, implicit $exec + S_ENDPGM 0, implicit $vgpr0_vgpr1 +... + +--- +name: a3_to_v3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0_agpr1_agpr2 + + ; GCN-LABEL: name: a3_to_v3 + ; GCN: liveins: $agpr0_agpr1_agpr2 + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $agpr0_agpr1_agpr2 + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2, implicit $exec + ; GCN: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2 + $vgpr0_vgpr1_vgpr2 = COPY killed $agpr0_agpr1_agpr2, implicit $exec + S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2 +... + --- name: a4_to_v4 tracksRegLiveness: true @@ -51,6 +99,28 @@ body: | S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ... +--- +name: a8_to_v8 +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + + ; GCN-LABEL: name: a8_to_v8 + ; GCN: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GCN: $vgpr3 = V_ACCVGPR_READ_B32 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GCN: $vgpr4 = V_ACCVGPR_READ_B32 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GCN: $vgpr5 = V_ACCVGPR_READ_B32 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GCN: $vgpr6 = V_ACCVGPR_READ_B32 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GCN: $vgpr7 = V_ACCVGPR_READ_B32 $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec + ; GCN: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec + S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 +... + --- name: a16_to_v16 tracksRegLiveness: true @@ -94,6 +164,37 @@ body: | S_ENDPGM 0, implicit $agpr0 ... +--- +name: v2_to_a2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: v2_to_a2 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $vgpr0_vgpr1 + ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1 + $agpr0_agpr1 = COPY killed $vgpr0_vgpr1, implicit $exec + S_ENDPGM 0, implicit $agpr0_agpr1 +... + +--- +name: v3_to_a3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + ; GCN-LABEL: name: v3_to_a3 + ; GCN: liveins: $vgpr0_vgpr1_vgpr2 + ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $vgpr0_vgpr1_vgpr2 + ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 + ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 + $agpr0_agpr1_agpr2 = COPY killed $vgpr0_vgpr1_vgpr2, implicit $exec + S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 +... + --- name: v4_to_a4 tracksRegLiveness: true @@ -111,6 +212,27 @@ body: | S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 ... +--- +name: v8_to_a8 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN-LABEL: name: v8_to_a8 + ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN: $agpr6 = V_ACCVGPR_WRITE_B32 $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN: $agpr7 = V_ACCVGPR_WRITE_B32 $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = COPY killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 +... + --- name: v16_to_a16 tracksRegLiveness: true @@ -172,6 +294,145 @@ body: | S_ENDPGM 0, implicit $agpr0_agpr1 ... +--- +name: s3_to_a3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2 + ; GCN-LABEL: name: s3_to_a3 + ; GCN: liveins: $sgpr0_sgpr1_sgpr2 + ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec + ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec + ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec + ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 + $agpr0_agpr1_agpr2 = COPY killed $sgpr0_sgpr1_sgpr2, implicit $exec + S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 +... + +--- +name: s4_to_a4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-LABEL: name: s4_to_a4 + ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec + ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec + ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec + ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr3, implicit $exec + ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 + $agpr0_agpr1_agpr2_agpr3 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec + S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +... + +--- +name: s6_to_a6 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GCN-LABEL: name: s6_to_a6 + ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec + ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec + ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec + ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr3, implicit $exec + ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec + ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec + ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit $exec + S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 +... + +--- +name: s8_to_a8 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-LABEL: name: s8_to_a8 + ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec + ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec + ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec + ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr3, implicit $exec + ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec + ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec + ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr6, implicit $exec + ; GCN: $agpr6 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr7, implicit $exec + ; GCN: $agpr7 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec + S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 +... + +--- +name: s16_to_a16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-LABEL: name: s16_to_a16 + ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec + ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec + ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec + ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr3, implicit $exec + ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec + ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec + ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr6, implicit $exec + ; GCN: $agpr6 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr7, implicit $exec + ; GCN: $agpr7 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr8, implicit $exec + ; GCN: $agpr8 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr9, implicit $exec + ; GCN: $agpr9 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr10, implicit $exec + ; GCN: $agpr10 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr11, implicit $exec + ; GCN: $agpr11 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr12, implicit $exec + ; GCN: $agpr12 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr13, implicit $exec + ; GCN: $agpr13 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr14, implicit $exec + ; GCN: $agpr14 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr15, implicit $exec + ; GCN: $agpr15 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $exec + S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 +... + --- name: a_to_a tracksRegLiveness: true @@ -204,6 +465,120 @@ body: | S_ENDPGM 0, implicit $agpr1_agpr2 ... +--- +name: a3_to_a3 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: a3_to_a3 + ; GCN: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec + ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec + ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec + ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4 + $agpr0_agpr1_agpr2 = IMPLICIT_DEF + $agpr2_agpr3_agpr4 = COPY killed $agpr0_agpr1_agpr2, implicit $exec + S_ENDPGM 0, implicit $agpr2_agpr3_agpr4 +... + +--- +name: a4_to_a4 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: a4_to_a4 + ; GCN: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec + ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec + ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec + ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec + ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5 + $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + $agpr2_agpr3_agpr4_agpr5 = COPY killed $agpr0_agpr1_agpr2_agpr3, implicit $exec + S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5 +... + +--- +name: a8_to_a8 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: a8_to_a8 + ; GCN: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr7, implicit $exec + ; GCN: $agpr15 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr6, implicit $exec + ; GCN: $agpr14 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr5, implicit $exec + ; GCN: $agpr13 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr4, implicit $exec + ; GCN: $agpr12 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec + ; GCN: $agpr11 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec + ; GCN: $agpr10 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec + ; GCN: $agpr9 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec + ; GCN: $agpr8 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec + S_ENDPGM 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 +... + +--- +name: a16_to_a16 +tracksRegLiveness: true +body: | + bb.0: + + ; GCN-LABEL: name: a16_to_a16 + ; GCN: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr15, implicit $exec + ; GCN: $agpr31 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr14, implicit $exec + ; GCN: $agpr30 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr13, implicit $exec + ; GCN: $agpr29 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr12, implicit $exec + ; GCN: $agpr28 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr11, implicit $exec + ; GCN: $agpr27 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr10, implicit $exec + ; GCN: $agpr26 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr9, implicit $exec + ; GCN: $agpr25 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr8, implicit $exec + ; GCN: $agpr24 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr7, implicit $exec + ; GCN: $agpr23 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr6, implicit $exec + ; GCN: $agpr22 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr5, implicit $exec + ; GCN: $agpr21 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr4, implicit $exec + ; GCN: $agpr20 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec + ; GCN: $agpr19 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec + ; GCN: $agpr18 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec + ; GCN: $agpr17 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec + ; GCN: $agpr16 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = COPY killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec + S_ENDPGM 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 +... # Using last vgpr255 will raise error about absence of emergency spill slot. diff --git a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll index be58cb8873f9c..09f9abc23a6f7 100644 --- a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll @@ -105,8 +105,8 @@ define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* ; GCN-LABEL: {{^}}v_test_add_v2i16_inline_neg1: ; GFX9: v_pk_sub_u16 v{{[0-9]+}}, v{{[0-9]+}}, 1 op_sel_hi:[1,0]{{$}} -; VI: v_mov_b32_e32 v[[SCONST:[0-9]+]], -1 -; VI: flat_load_dword [[LOAD:v[0-9]+]] +; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], -1 +; VI-DAG: flat_load_dword [[LOAD:v[0-9]+]] ; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[LOAD]], v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, -1, [[LOAD]] ; VI: v_or_b32_e32 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index 53e3005910cd5..f7d67b9f465ae 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -5059,16 +5059,16 @@ define amdgpu_kernel void @udiv_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) ; ; GCN-LABEL: udiv_i64_pow2k_denom: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_mov_b32 s3, 0xf000 -; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: s_lshr_b64 s[4:5], s[6:7], 12 -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: v_mov_b32_e32 v1, s5 -; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_lshr_b64 s[0:1], s[2:3], 12 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm %r = udiv i64 %x, 4096 store i64 %r, i64 addrspace(1)* %out @@ -5703,20 +5703,20 @@ define amdgpu_kernel void @sdiv_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) ; ; GCN-LABEL: sdiv_i64_pow2k_denom: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_mov_b32 s3, 0xf000 -; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: s_ashr_i32 s4, s7, 31 -; GCN-NEXT: s_lshr_b32 s4, s4, 20 -; GCN-NEXT: s_add_u32 s4, s6, s4 -; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: s_addc_u32 s5, s7, 0 -; GCN-NEXT: s_ashr_i64 s[4:5], s[4:5], 12 -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: v_mov_b32_e32 v1, s5 -; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_ashr_i32 s0, s3, 31 +; GCN-NEXT: s_lshr_b32 s0, s0, 20 +; GCN-NEXT: s_add_u32 s0, s2, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_addc_u32 s1, s3, 0 +; GCN-NEXT: s_ashr_i64 s[0:1], s[0:1], 12 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm %r = sdiv i64 %x, 4096 store i64 %r, i64 addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll b/llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll index 8f4f1c3915351..3d75eca93cb48 100644 --- a/llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll @@ -3,7 +3,7 @@ ; GCN-LABEL: {{^}}amdhsa_trap_num_sgprs ; TRAP-HANDLER-ENABLE: NumSgprs: 61 -; TRAP-HANDLER-DISABLE: NumSgprs: 77 +; TRAP-HANDLER-DISABLE: NumSgprs: 79 define amdgpu_kernel void @amdhsa_trap_num_sgprs( i32 addrspace(1)* %out0, i32 %in0, i32 addrspace(1)* %out1, i32 %in1, diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll index 284da9da36ee4..a13320bea7a13 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -124,7 +124,7 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB0_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: v_readfirstlane_b32 s2, v1 ; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 @@ -156,7 +156,7 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB0_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s3 ; GFX1032-NEXT: v_readfirstlane_b32 s2, v1 ; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 @@ -298,7 +298,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB1_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: v_mul_lo_u32 v0, s0, v0 @@ -334,7 +334,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB1_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: v_mul_lo_u32 v0, s0, v0 @@ -520,7 +520,7 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB2_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1064-NEXT: v_mov_b32_e32 v0, v1 @@ -572,7 +572,7 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB2_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1032-NEXT: v_mov_b32_e32 v0, v1 @@ -759,7 +759,7 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB3_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1064-NEXT: v_mov_b32_e32 v0, v1 @@ -811,7 +811,7 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB3_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1032-NEXT: v_mov_b32_e32 v0, v1 @@ -998,7 +998,7 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB4_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1064-NEXT: v_mov_b32_e32 v0, v1 @@ -1050,7 +1050,7 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB4_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1032-NEXT: v_mov_b32_e32 v0, v1 @@ -1194,7 +1194,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB5_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064-NEXT: v_readfirstlane_b32 s2, v1 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v2 @@ -1228,7 +1228,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB5_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032-NEXT: v_readfirstlane_b32 s2, v1 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v2 @@ -1406,7 +1406,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB6_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: v_mul_lo_u32 v3, s3, v0 @@ -1449,7 +1449,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB6_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: v_mul_lo_u32 v3, s3, v0 @@ -1675,7 +1675,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB8_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: v_readfirstlane_b32 s2, v1 ; GFX1064-NEXT: v_mul_u32_u24_e32 v0, 5, v0 @@ -1708,7 +1708,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB8_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s3 ; GFX1032-NEXT: v_readfirstlane_b32 s2, v1 ; GFX1032-NEXT: v_mul_u32_u24_e32 v0, 5, v0 @@ -1851,7 +1851,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 %subitive ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB9_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: v_mul_lo_u32 v0, s0, v0 @@ -1887,7 +1887,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 %subitive ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB9_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: v_mul_lo_u32 v0, s0, v0 @@ -2073,7 +2073,7 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB10_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1064-NEXT: v_mov_b32_e32 v0, v1 @@ -2125,7 +2125,7 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB10_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1032-NEXT: v_mov_b32_e32 v0, v1 @@ -2271,7 +2271,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB11_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064-NEXT: v_readfirstlane_b32 s2, v1 ; GFX1064-NEXT: v_mul_u32_u24_e32 v1, 5, v0 @@ -2307,7 +2307,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB11_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032-NEXT: v_readfirstlane_b32 s2, v1 ; GFX1032-NEXT: v_mul_u32_u24_e32 v1, 5, v0 @@ -2487,7 +2487,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB12_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: v_mul_lo_u32 v3, s3, v0 @@ -2530,7 +2530,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB12_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: v_mul_lo_u32 v3, s3, v0 @@ -2808,7 +2808,7 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB14_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1064-NEXT: v_mov_b32_e32 v0, v1 @@ -2859,7 +2859,7 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB14_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1032-NEXT: v_mov_b32_e32 v0, v1 @@ -3046,7 +3046,7 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB15_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1064-NEXT: v_mov_b32_e32 v0, v1 @@ -3098,7 +3098,7 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB15_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1032-NEXT: v_mov_b32_e32 v0, v1 @@ -3285,7 +3285,7 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB16_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1064-NEXT: v_mov_b32_e32 v0, v1 @@ -3337,7 +3337,7 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB16_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1032-NEXT: v_mov_b32_e32 v0, v1 @@ -3521,7 +3521,7 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB17_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1064-NEXT: v_mov_b32_e32 v0, v1 @@ -3572,7 +3572,7 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB17_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1032-NEXT: v_mov_b32_e32 v0, v1 @@ -3719,7 +3719,7 @@ define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB18_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v1 @@ -3754,7 +3754,7 @@ define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB18_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v1 @@ -3941,7 +3941,7 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB19_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1064-NEXT: v_mov_b32_e32 v0, v1 @@ -3992,7 +3992,7 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB19_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1032-NEXT: v_mov_b32_e32 v0, v1 @@ -4139,7 +4139,7 @@ define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB20_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v1 @@ -4174,7 +4174,7 @@ define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB20_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v1 @@ -4364,7 +4364,7 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB21_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1064-NEXT: v_mov_b32_e32 v0, v1 @@ -4416,7 +4416,7 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB21_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1032-NEXT: v_mov_b32_e32 v0, v1 @@ -4560,7 +4560,7 @@ define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB22_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v1 @@ -4595,7 +4595,7 @@ define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB22_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v1 @@ -4782,7 +4782,7 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB23_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1064-NEXT: v_mov_b32_e32 v0, v1 @@ -4833,7 +4833,7 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB23_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1032-NEXT: v_mov_b32_e32 v0, v1 @@ -4977,7 +4977,7 @@ define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) { ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv ; GFX1064-NEXT: BB24_2: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v1 @@ -5012,7 +5012,7 @@ define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) { ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv ; GFX1032-NEXT: BB24_2: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v1 diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll index a4094573f8abb..b9ad02a77bdf4 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll @@ -130,7 +130,7 @@ define amdgpu_ps void @add_i32_constant(<4 x i32> inreg %out, <4 x i32> inreg %i ; GFX1064-NEXT: v_mul_u32_u24_e64 v1, s12, 5 ; GFX1064-NEXT: buffer_atomic_add v1, off, s[4:7], 0 glc ; GFX1064-NEXT: BB0_3: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[30:31] ; GFX1064-NEXT: s_waitcnt vmcnt(0) ; GFX1064-NEXT: v_readfirstlane_b32 s4, v1 @@ -164,7 +164,7 @@ define amdgpu_ps void @add_i32_constant(<4 x i32> inreg %out, <4 x i32> inreg %i ; GFX1032-NEXT: v_mul_u32_u24_e64 v1, s10, 5 ; GFX1032-NEXT: buffer_atomic_add v1, off, s[4:7], 0 glc ; GFX1032-NEXT: BB0_3: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s9 ; GFX1032-NEXT: s_waitcnt vmcnt(0) ; GFX1032-NEXT: v_readfirstlane_b32 s4, v1 @@ -364,7 +364,7 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in ; GFX1064-NEXT: v_mov_b32_e32 v0, s12 ; GFX1064-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc ; GFX1064-NEXT: BB1_3: -; GFX1064-NEXT: v_nop +; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[30:31] ; GFX1064-NEXT: s_waitcnt vmcnt(0) ; GFX1064-NEXT: v_readfirstlane_b32 s4, v0 @@ -418,7 +418,7 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in ; GFX1032-NEXT: v_mov_b32_e32 v0, s10 ; GFX1032-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc ; GFX1032-NEXT: BB1_3: -; GFX1032-NEXT: v_nop +; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s9 ; GFX1032-NEXT: s_waitcnt vmcnt(0) ; GFX1032-NEXT: v_readfirstlane_b32 s4, v0 diff --git a/llvm/test/CodeGen/AMDGPU/bitreverse.ll b/llvm/test/CodeGen/AMDGPU/bitreverse.ll index 2fb68bda542fa..6d2106442ddca 100644 --- a/llvm/test/CodeGen/AMDGPU/bitreverse.ll +++ b/llvm/test/CodeGen/AMDGPU/bitreverse.ll @@ -339,35 +339,34 @@ define amdgpu_kernel void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrsp ; SI-NEXT: v_mov_b32_e32 v1, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64 -; SI-NEXT: s_mov_b32 s6, 0xff00ff -; SI-NEXT: s_mov_b32 s8, 0xf0f0f0f -; SI-NEXT: s_mov_b32 s9, 0xf0f0f0f0 -; SI-NEXT: s_mov_b32 s10, 0x33333333 -; SI-NEXT: s_mov_b32 s11, 0xcccccccc -; SI-NEXT: s_mov_b32 s0, 0x55555555 -; SI-NEXT: s_mov_b32 s1, 0xaaaaaaaa +; SI-NEXT: s_mov_b32 s0, 0xff00ff +; SI-NEXT: s_mov_b32 s1, 0xf0f0f0f +; SI-NEXT: s_mov_b32 s2, 0xf0f0f0f0 +; SI-NEXT: s_mov_b32 s3, 0x33333333 +; SI-NEXT: s_mov_b32 s6, 0xcccccccc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_alignbit_b32 v2, v0, v0, 8 ; SI-NEXT: v_alignbit_b32 v0, v0, v0, 24 ; SI-NEXT: v_alignbit_b32 v3, v1, v1, 8 ; SI-NEXT: v_alignbit_b32 v1, v1, v1, 24 -; SI-NEXT: v_bfi_b32 v2, s6, v0, v2 -; SI-NEXT: v_bfi_b32 v4, s6, v1, v3 -; SI-NEXT: v_and_b32_e32 v1, s8, v2 -; SI-NEXT: v_and_b32_e32 v0, s8, v4 -; SI-NEXT: v_and_b32_e32 v3, s9, v2 -; SI-NEXT: v_and_b32_e32 v2, s9, v4 +; SI-NEXT: v_bfi_b32 v2, s0, v0, v2 +; SI-NEXT: v_bfi_b32 v4, s0, v1, v3 +; SI-NEXT: v_and_b32_e32 v1, s1, v2 +; SI-NEXT: v_and_b32_e32 v0, s1, v4 +; SI-NEXT: v_and_b32_e32 v3, s2, v2 +; SI-NEXT: v_and_b32_e32 v2, s2, v4 ; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], 4 ; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 4 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s0, 0x55555555 ; SI-NEXT: v_or_b32_e32 v3, v3, v1 ; SI-NEXT: v_or_b32_e32 v2, v2, v0 -; SI-NEXT: v_and_b32_e32 v1, s10, v3 -; SI-NEXT: v_and_b32_e32 v0, s10, v2 -; SI-NEXT: v_and_b32_e32 v3, s11, v3 -; SI-NEXT: v_and_b32_e32 v2, s11, v2 +; SI-NEXT: v_and_b32_e32 v1, s3, v3 +; SI-NEXT: v_and_b32_e32 v0, s3, v2 +; SI-NEXT: v_and_b32_e32 v3, s6, v3 +; SI-NEXT: v_and_b32_e32 v2, s6, v2 ; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 ; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 2 +; SI-NEXT: s_mov_b32 s1, 0xaaaaaaaa ; SI-NEXT: v_or_b32_e32 v3, v3, v1 ; SI-NEXT: v_or_b32_e32 v2, v2, v0 ; SI-NEXT: v_and_b32_e32 v1, s0, v3 @@ -376,6 +375,7 @@ define amdgpu_kernel void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrsp ; SI-NEXT: v_and_b32_e32 v2, s1, v2 ; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 ; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 1 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_or_b32_e32 v1, v3, v1 ; SI-NEXT: v_or_b32_e32 v0, v2, v0 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -386,33 +386,33 @@ define amdgpu_kernel void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrsp ; FLAT-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; FLAT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c ; FLAT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; FLAT-NEXT: s_mov_b32 s6, 0x10203 -; FLAT-NEXT: s_mov_b32 s2, 0x33333333 -; FLAT-NEXT: s_mov_b32 s3, 0xcccccccc +; FLAT-NEXT: s_mov_b32 s2, 0xf0f0f0f0 +; FLAT-NEXT: s_mov_b32 s3, 0x33333333 +; FLAT-NEXT: s_mov_b32 s6, 0xcccccccc ; FLAT-NEXT: s_waitcnt lgkmcnt(0) ; FLAT-NEXT: v_mov_b32_e32 v1, s1 ; FLAT-NEXT: v_add_u32_e32 v0, vcc, s0, v0 ; FLAT-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; FLAT-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; FLAT-NEXT: s_mov_b32 s0, 0xf0f0f0f -; FLAT-NEXT: s_mov_b32 s1, 0xf0f0f0f0 +; FLAT-NEXT: s_mov_b32 s0, 0x10203 +; FLAT-NEXT: s_mov_b32 s1, 0xf0f0f0f ; FLAT-NEXT: s_mov_b32 s7, 0xf000 ; FLAT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; FLAT-NEXT: v_perm_b32 v2, 0, v0, s6 -; FLAT-NEXT: v_perm_b32 v4, 0, v1, s6 -; FLAT-NEXT: v_and_b32_e32 v1, s0, v2 -; FLAT-NEXT: v_and_b32_e32 v0, s0, v4 -; FLAT-NEXT: v_and_b32_e32 v3, s1, v2 -; FLAT-NEXT: v_and_b32_e32 v2, s1, v4 +; FLAT-NEXT: v_perm_b32 v2, 0, v0, s0 +; FLAT-NEXT: v_perm_b32 v4, 0, v1, s0 +; FLAT-NEXT: v_and_b32_e32 v1, s1, v2 +; FLAT-NEXT: v_and_b32_e32 v0, s1, v4 +; FLAT-NEXT: v_and_b32_e32 v3, s2, v2 +; FLAT-NEXT: v_and_b32_e32 v2, s2, v4 ; FLAT-NEXT: v_lshlrev_b64 v[0:1], 4, v[0:1] ; FLAT-NEXT: v_lshrrev_b64 v[2:3], 4, v[2:3] ; FLAT-NEXT: s_mov_b32 s0, 0x55555555 ; FLAT-NEXT: v_or_b32_e32 v3, v3, v1 ; FLAT-NEXT: v_or_b32_e32 v2, v2, v0 -; FLAT-NEXT: v_and_b32_e32 v1, s2, v3 -; FLAT-NEXT: v_and_b32_e32 v0, s2, v2 -; FLAT-NEXT: v_and_b32_e32 v3, s3, v3 -; FLAT-NEXT: v_and_b32_e32 v2, s3, v2 +; FLAT-NEXT: v_and_b32_e32 v1, s3, v3 +; FLAT-NEXT: v_and_b32_e32 v0, s3, v2 +; FLAT-NEXT: v_and_b32_e32 v3, s6, v3 +; FLAT-NEXT: v_and_b32_e32 v2, s6, v2 ; FLAT-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] ; FLAT-NEXT: v_lshrrev_b64 v[2:3], 2, v[2:3] ; FLAT-NEXT: s_mov_b32 s1, 0xaaaaaaaa @@ -600,13 +600,13 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 ; SI-NEXT: v_mov_b32_e32 v1, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64 -; SI-NEXT: s_mov_b32 s8, 0xff00ff -; SI-NEXT: s_mov_b32 s9, 0xf0f0f0f -; SI-NEXT: s_mov_b32 s10, 0xf0f0f0f0 -; SI-NEXT: s_mov_b32 s11, 0x33333333 -; SI-NEXT: s_mov_b32 s12, 0xcccccccc -; SI-NEXT: s_mov_b32 s13, 0x55555555 -; SI-NEXT: s_mov_b32 s14, 0xaaaaaaaa +; SI-NEXT: s_mov_b32 s0, 0xff00ff +; SI-NEXT: s_mov_b32 s1, 0xf0f0f0f +; SI-NEXT: s_mov_b32 s2, 0xf0f0f0f0 +; SI-NEXT: s_mov_b32 s3, 0x33333333 +; SI-NEXT: s_mov_b32 s8, 0xcccccccc +; SI-NEXT: s_mov_b32 s9, 0x55555555 +; SI-NEXT: s_mov_b32 s10, 0xaaaaaaaa ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_alignbit_b32 v4, v2, v2, 8 @@ -617,18 +617,18 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 ; SI-NEXT: v_alignbit_b32 v7, v1, v1, 8 ; SI-NEXT: v_alignbit_b32 v1, v1, v1, 24 ; SI-NEXT: v_alignbit_b32 v3, v3, v3, 24 -; SI-NEXT: v_bfi_b32 v2, s8, v2, v4 -; SI-NEXT: v_bfi_b32 v4, s8, v3, v5 -; SI-NEXT: v_bfi_b32 v6, s8, v0, v6 -; SI-NEXT: v_bfi_b32 v8, s8, v1, v7 -; SI-NEXT: v_and_b32_e32 v1, s9, v2 -; SI-NEXT: v_and_b32_e32 v0, s9, v4 -; SI-NEXT: v_and_b32_e32 v3, s10, v2 -; SI-NEXT: v_and_b32_e32 v2, s10, v4 -; SI-NEXT: v_and_b32_e32 v5, s9, v6 -; SI-NEXT: v_and_b32_e32 v4, s9, v8 -; SI-NEXT: v_and_b32_e32 v7, s10, v6 -; SI-NEXT: v_and_b32_e32 v6, s10, v8 +; SI-NEXT: v_bfi_b32 v2, s0, v2, v4 +; SI-NEXT: v_bfi_b32 v4, s0, v3, v5 +; SI-NEXT: v_bfi_b32 v6, s0, v0, v6 +; SI-NEXT: v_bfi_b32 v8, s0, v1, v7 +; SI-NEXT: v_and_b32_e32 v1, s1, v2 +; SI-NEXT: v_and_b32_e32 v0, s1, v4 +; SI-NEXT: v_and_b32_e32 v3, s2, v2 +; SI-NEXT: v_and_b32_e32 v2, s2, v4 +; SI-NEXT: v_and_b32_e32 v5, s1, v6 +; SI-NEXT: v_and_b32_e32 v4, s1, v8 +; SI-NEXT: v_and_b32_e32 v7, s2, v6 +; SI-NEXT: v_and_b32_e32 v6, s2, v8 ; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], 4 ; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 4 ; SI-NEXT: v_lshl_b64 v[4:5], v[4:5], 4 @@ -637,14 +637,14 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 ; SI-NEXT: v_or_b32_e32 v2, v2, v0 ; SI-NEXT: v_or_b32_e32 v7, v7, v5 ; SI-NEXT: v_or_b32_e32 v6, v6, v4 -; SI-NEXT: v_and_b32_e32 v1, s11, v3 -; SI-NEXT: v_and_b32_e32 v0, s11, v2 -; SI-NEXT: v_and_b32_e32 v5, s11, v7 -; SI-NEXT: v_and_b32_e32 v4, s11, v6 -; SI-NEXT: v_and_b32_e32 v3, s12, v3 -; SI-NEXT: v_and_b32_e32 v2, s12, v2 -; SI-NEXT: v_and_b32_e32 v7, s12, v7 -; SI-NEXT: v_and_b32_e32 v6, s12, v6 +; SI-NEXT: v_and_b32_e32 v1, s3, v3 +; SI-NEXT: v_and_b32_e32 v0, s3, v2 +; SI-NEXT: v_and_b32_e32 v5, s3, v7 +; SI-NEXT: v_and_b32_e32 v4, s3, v6 +; SI-NEXT: v_and_b32_e32 v3, s8, v3 +; SI-NEXT: v_and_b32_e32 v2, s8, v2 +; SI-NEXT: v_and_b32_e32 v7, s8, v7 +; SI-NEXT: v_and_b32_e32 v6, s8, v6 ; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 ; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 2 ; SI-NEXT: v_lshl_b64 v[4:5], v[4:5], 2 @@ -653,14 +653,14 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 ; SI-NEXT: v_or_b32_e32 v2, v2, v0 ; SI-NEXT: v_or_b32_e32 v7, v7, v5 ; SI-NEXT: v_or_b32_e32 v6, v6, v4 -; SI-NEXT: v_and_b32_e32 v1, s13, v3 -; SI-NEXT: v_and_b32_e32 v0, s13, v2 -; SI-NEXT: v_and_b32_e32 v5, s13, v7 -; SI-NEXT: v_and_b32_e32 v4, s13, v6 -; SI-NEXT: v_and_b32_e32 v3, s14, v3 -; SI-NEXT: v_and_b32_e32 v2, s14, v2 -; SI-NEXT: v_and_b32_e32 v7, s14, v7 -; SI-NEXT: v_and_b32_e32 v6, s14, v6 +; SI-NEXT: v_and_b32_e32 v1, s9, v3 +; SI-NEXT: v_and_b32_e32 v0, s9, v2 +; SI-NEXT: v_and_b32_e32 v5, s9, v7 +; SI-NEXT: v_and_b32_e32 v4, s9, v6 +; SI-NEXT: v_and_b32_e32 v3, s10, v3 +; SI-NEXT: v_and_b32_e32 v2, s10, v2 +; SI-NEXT: v_and_b32_e32 v7, s10, v7 +; SI-NEXT: v_and_b32_e32 v6, s10, v6 ; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 ; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 1 ; SI-NEXT: v_lshl_b64 v[4:5], v[4:5], 1 @@ -677,33 +677,33 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 ; FLAT-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; FLAT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c ; FLAT-NEXT: v_lshlrev_b32_e32 v0, 4, v0 -; FLAT-NEXT: s_mov_b32 s10, 0x10203 -; FLAT-NEXT: s_mov_b32 s2, 0x33333333 -; FLAT-NEXT: s_mov_b32 s3, 0xcccccccc +; FLAT-NEXT: s_mov_b32 s2, 0xf0f0f0f0 +; FLAT-NEXT: s_mov_b32 s3, 0x33333333 +; FLAT-NEXT: s_mov_b32 s8, 0xcccccccc ; FLAT-NEXT: s_waitcnt lgkmcnt(0) ; FLAT-NEXT: v_mov_b32_e32 v1, s1 ; FLAT-NEXT: v_add_u32_e32 v0, vcc, s0, v0 ; FLAT-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; FLAT-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; FLAT-NEXT: s_mov_b32 s0, 0xf0f0f0f -; FLAT-NEXT: s_mov_b32 s1, 0xf0f0f0f0 -; FLAT-NEXT: s_mov_b32 s8, 0x55555555 -; FLAT-NEXT: s_mov_b32 s9, 0xaaaaaaaa +; FLAT-NEXT: s_mov_b32 s0, 0x10203 +; FLAT-NEXT: s_mov_b32 s1, 0xf0f0f0f +; FLAT-NEXT: s_mov_b32 s9, 0x55555555 +; FLAT-NEXT: s_mov_b32 s10, 0xaaaaaaaa ; FLAT-NEXT: s_mov_b32 s7, 0xf000 ; FLAT-NEXT: s_mov_b32 s6, -1 ; FLAT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; FLAT-NEXT: v_perm_b32 v6, 0, v0, s10 -; FLAT-NEXT: v_perm_b32 v4, 0, v3, s10 -; FLAT-NEXT: v_perm_b32 v2, 0, v2, s10 -; FLAT-NEXT: v_perm_b32 v8, 0, v1, s10 -; FLAT-NEXT: v_and_b32_e32 v1, s0, v2 -; FLAT-NEXT: v_and_b32_e32 v0, s0, v4 -; FLAT-NEXT: v_and_b32_e32 v3, s1, v2 -; FLAT-NEXT: v_and_b32_e32 v2, s1, v4 -; FLAT-NEXT: v_and_b32_e32 v5, s0, v6 -; FLAT-NEXT: v_and_b32_e32 v4, s0, v8 -; FLAT-NEXT: v_and_b32_e32 v7, s1, v6 -; FLAT-NEXT: v_and_b32_e32 v6, s1, v8 +; FLAT-NEXT: v_perm_b32 v6, 0, v0, s0 +; FLAT-NEXT: v_perm_b32 v4, 0, v3, s0 +; FLAT-NEXT: v_perm_b32 v2, 0, v2, s0 +; FLAT-NEXT: v_perm_b32 v8, 0, v1, s0 +; FLAT-NEXT: v_and_b32_e32 v1, s1, v2 +; FLAT-NEXT: v_and_b32_e32 v0, s1, v4 +; FLAT-NEXT: v_and_b32_e32 v3, s2, v2 +; FLAT-NEXT: v_and_b32_e32 v2, s2, v4 +; FLAT-NEXT: v_and_b32_e32 v5, s1, v6 +; FLAT-NEXT: v_and_b32_e32 v4, s1, v8 +; FLAT-NEXT: v_and_b32_e32 v7, s2, v6 +; FLAT-NEXT: v_and_b32_e32 v6, s2, v8 ; FLAT-NEXT: v_lshlrev_b64 v[0:1], 4, v[0:1] ; FLAT-NEXT: v_lshrrev_b64 v[2:3], 4, v[2:3] ; FLAT-NEXT: v_lshlrev_b64 v[4:5], 4, v[4:5] @@ -712,14 +712,14 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 ; FLAT-NEXT: v_or_b32_e32 v2, v2, v0 ; FLAT-NEXT: v_or_b32_e32 v7, v7, v5 ; FLAT-NEXT: v_or_b32_e32 v6, v6, v4 -; FLAT-NEXT: v_and_b32_e32 v1, s2, v3 -; FLAT-NEXT: v_and_b32_e32 v0, s2, v2 -; FLAT-NEXT: v_and_b32_e32 v5, s2, v7 -; FLAT-NEXT: v_and_b32_e32 v4, s2, v6 -; FLAT-NEXT: v_and_b32_e32 v3, s3, v3 -; FLAT-NEXT: v_and_b32_e32 v2, s3, v2 -; FLAT-NEXT: v_and_b32_e32 v7, s3, v7 -; FLAT-NEXT: v_and_b32_e32 v6, s3, v6 +; FLAT-NEXT: v_and_b32_e32 v1, s3, v3 +; FLAT-NEXT: v_and_b32_e32 v0, s3, v2 +; FLAT-NEXT: v_and_b32_e32 v5, s3, v7 +; FLAT-NEXT: v_and_b32_e32 v4, s3, v6 +; FLAT-NEXT: v_and_b32_e32 v3, s8, v3 +; FLAT-NEXT: v_and_b32_e32 v2, s8, v2 +; FLAT-NEXT: v_and_b32_e32 v7, s8, v7 +; FLAT-NEXT: v_and_b32_e32 v6, s8, v6 ; FLAT-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] ; FLAT-NEXT: v_lshrrev_b64 v[2:3], 2, v[2:3] ; FLAT-NEXT: v_lshlrev_b64 v[4:5], 2, v[4:5] @@ -728,14 +728,14 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 ; FLAT-NEXT: v_or_b32_e32 v2, v2, v0 ; FLAT-NEXT: v_or_b32_e32 v7, v7, v5 ; FLAT-NEXT: v_or_b32_e32 v6, v6, v4 -; FLAT-NEXT: v_and_b32_e32 v1, s8, v3 -; FLAT-NEXT: v_and_b32_e32 v0, s8, v2 -; FLAT-NEXT: v_and_b32_e32 v5, s8, v7 -; FLAT-NEXT: v_and_b32_e32 v4, s8, v6 -; FLAT-NEXT: v_and_b32_e32 v3, s9, v3 -; FLAT-NEXT: v_and_b32_e32 v2, s9, v2 -; FLAT-NEXT: v_and_b32_e32 v7, s9, v7 -; FLAT-NEXT: v_and_b32_e32 v6, s9, v6 +; FLAT-NEXT: v_and_b32_e32 v1, s9, v3 +; FLAT-NEXT: v_and_b32_e32 v0, s9, v2 +; FLAT-NEXT: v_and_b32_e32 v5, s9, v7 +; FLAT-NEXT: v_and_b32_e32 v4, s9, v6 +; FLAT-NEXT: v_and_b32_e32 v3, s10, v3 +; FLAT-NEXT: v_and_b32_e32 v2, s10, v2 +; FLAT-NEXT: v_and_b32_e32 v7, s10, v7 +; FLAT-NEXT: v_and_b32_e32 v6, s10, v6 ; FLAT-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; FLAT-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] ; FLAT-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll index 8c6b94da79cf8..1125dbb75c56b 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll @@ -482,13 +482,10 @@ ret: ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-( ; GCN-NEXT: s_addc_u32 ; GCN-NEXT: s_setpc_b64 - ; GCN-NEXT: [[LONG_BR_0]]: -; GCN: s_setpc_b64 -; GCN: [[LONG_BR_DEST0]] +; GCN: [[LONG_BR_DEST0]]: -; GCN: s_cbranch_vccnz ; GCN-DAG: v_cmp_lt_i32 ; GCN-DAG: v_cmp_ge_i32 diff --git a/llvm/test/CodeGen/AMDGPU/bswap.ll b/llvm/test/CodeGen/AMDGPU/bswap.ll index 74fe04bcf3473..1cdd6f4e37105 100644 --- a/llvm/test/CodeGen/AMDGPU/bswap.ll +++ b/llvm/test/CodeGen/AMDGPU/bswap.ll @@ -33,17 +33,17 @@ define amdgpu_kernel void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace( ; ; VI-LABEL: test_bswap_i32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_mov_b32_e32 v0, 0x10203 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_load_dword s4, s[6:7], 0x0 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_mov_b32 s4, s0 +; VI-NEXT: s_load_dword s0, s[2:3], 0x0 +; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_perm_b32 v0, 0, s4, v0 -; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: v_perm_b32 v0, 0, s0, v0 +; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm %val = load i32, i32 addrspace(1)* %in, align 4 %bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone @@ -72,18 +72,18 @@ define amdgpu_kernel void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i ; ; VI-LABEL: test_bswap_v2i32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_mov_b32_e32 v0, 0x10203 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-NEXT: s_mov_b32 s4, s0 +; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_perm_b32 v1, 0, s5, v0 -; VI-NEXT: v_perm_b32 v0, 0, s4, v0 -; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; VI-NEXT: v_perm_b32 v1, 0, s3, v0 +; VI-NEXT: v_perm_b32 v0, 0, s2, v0 +; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_endpgm %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8 %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone @@ -123,14 +123,14 @@ define amdgpu_kernel void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx4 s[8:11], s[6:7], 0x0 ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_perm_b32 v3, 0, s7, v0 -; VI-NEXT: v_perm_b32 v2, 0, s6, v0 -; VI-NEXT: v_perm_b32 v1, 0, s5, v0 -; VI-NEXT: v_perm_b32 v0, 0, s4, v0 +; VI-NEXT: v_perm_b32 v3, 0, s11, v0 +; VI-NEXT: v_perm_b32 v2, 0, s10, v0 +; VI-NEXT: v_perm_b32 v1, 0, s9, v0 +; VI-NEXT: v_perm_b32 v0, 0, s8, v0 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16 @@ -226,18 +226,18 @@ define amdgpu_kernel void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace( ; ; VI-LABEL: test_bswap_i64: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_mov_b32_e32 v0, 0x10203 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-NEXT: s_mov_b32 s4, s0 +; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_perm_b32 v1, 0, s4, v0 -; VI-NEXT: v_perm_b32 v0, 0, s5, v0 -; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; VI-NEXT: v_perm_b32 v1, 0, s2, v0 +; VI-NEXT: v_perm_b32 v0, 0, s3, v0 +; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_endpgm %val = load i64, i64 addrspace(1)* %in, align 8 %bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone @@ -277,14 +277,14 @@ define amdgpu_kernel void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx4 s[8:11], s[6:7], 0x0 ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_perm_b32 v3, 0, s6, v0 -; VI-NEXT: v_perm_b32 v2, 0, s7, v0 -; VI-NEXT: v_perm_b32 v1, 0, s4, v0 -; VI-NEXT: v_perm_b32 v0, 0, s5, v0 +; VI-NEXT: v_perm_b32 v3, 0, s10, v0 +; VI-NEXT: v_perm_b32 v2, 0, s11, v0 +; VI-NEXT: v_perm_b32 v1, 0, s8, v0 +; VI-NEXT: v_perm_b32 v0, 0, s9, v0 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16 diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll index b99e82b312ee2..04c8a2e9aa36a 100644 --- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll +++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll @@ -5,7 +5,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s -; GCN-ISEL-LABEL: name: sadd64rr +; GCN-ISEL-LABEL: name: sadd64rr ; GCN-ISEL-LABEL: body: ; GCN-ISEL-LABEL: bb.0.entry: ; GCN-ISEL: S_ADD_U64_PSEUDO @@ -20,7 +20,7 @@ entry: ret void } -; GCN-ISEL-LABEL: name: sadd64ri +; GCN-ISEL-LABEL: name: sadd64ri ; GCN-ISEL-LABEL: body: ; GCN-ISEL-LABEL: bb.0.entry: ; GCN-ISEL: S_ADD_U64_PSEUDO @@ -35,7 +35,7 @@ entry: ret void } -; GCN-ISEL-LABEL: name: vadd64rr +; GCN-ISEL-LABEL: name: vadd64rr ; GCN-ISEL-LABEL: body: ; GCN-ISEL-LABEL: bb.0.entry: ; GCN-ISEL: V_ADD_U64_PSEUDO @@ -62,7 +62,7 @@ entry: ret void } -; GCN-ISEL-LABEL: name: vadd64ri +; GCN-ISEL-LABEL: name: vadd64ri ; GCN-ISEL-LABEL: body: ; GCN-ISEL-LABEL: bb.0.entry: ; GCN-ISEL: V_ADD_U64_PSEUDO @@ -77,9 +77,9 @@ entry: ; VI: v_mov_b32_e32 v1, 0x1234 ; VI: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; -; GFX9: v_add_co_u32_e32 v0, vcc, 0x56789876, v0 -; GFX9: v_mov_b32_e32 v1, 0x1234 -; GFX9: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9: v_add_co_u32_e32 v0, vcc, 0x56789876, v0 +; GFX9: v_mov_b32_e32 v1, 0x1234 +; GFX9: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; ; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} ; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0, 0x1234, [[CARRY]] @@ -108,7 +108,7 @@ define amdgpu_kernel void @suaddo32(i32 addrspace(1)* %out, i1 addrspace(1)* %ca ; GCN-ISEL-LABEL: name: uaddo32_vcc_user ; GCN-ISEL-LABEL: body: ; GCN-ISEL-LABEL: bb.0 -; GCN-ISEL: V_ADD_I32_e64 +; GCN-ISEL: V_ADD_CO_U32_e64 ; below we check selection to v_add/addc ; because the only user of VCC produced by the UADDOis v_cndmask. @@ -190,7 +190,7 @@ define amdgpu_kernel void @vuaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %ca ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s -; GCN-ISEL-LABEL: name: ssub64rr +; GCN-ISEL-LABEL: name: ssub64rr ; GCN-ISEL-LABEL: body: ; GCN-ISEL-LABEL: bb.0.entry: ; GCN-ISEL: S_SUB_U64_PSEUDO @@ -205,7 +205,7 @@ entry: ret void } -; GCN-ISEL-LABEL: name: ssub64ri +; GCN-ISEL-LABEL: name: ssub64ri ; GCN-ISEL-LABEL: body: ; GCN-ISEL-LABEL: bb.0.entry: ; GCN-ISEL: S_SUB_U64_PSEUDO @@ -220,7 +220,7 @@ entry: ret void } -; GCN-ISEL-LABEL: name: vsub64rr +; GCN-ISEL-LABEL: name: vsub64rr ; GCN-ISEL-LABEL: body: ; GCN-ISEL-LABEL: bb.0.entry: ; GCN-ISEL: V_SUB_U64_PSEUDO @@ -247,7 +247,7 @@ entry: ret void } -; GCN-ISEL-LABEL: name: vsub64ri +; GCN-ISEL-LABEL: name: vsub64ri ; GCN-ISEL-LABEL: body: ; GCN-ISEL-LABEL: bb.0.entry: ; GCN-ISEL: V_SUB_U64_PSEUDO @@ -262,9 +262,9 @@ entry: ; VI: v_mov_b32_e32 v1, 0x1234 ; VI: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; -; GFX9: v_sub_co_u32_e32 v0, vcc, 0x56789876, v0 -; GFX9: v_mov_b32_e32 v1, 0x1234 -; GFX9: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9: v_sub_co_u32_e32 v0, vcc, 0x56789876, v0 +; GFX9: v_mov_b32_e32 v1, 0x1234 +; GFX9: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc ; ; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} ; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0x1234, 0, [[CARRY]] @@ -293,7 +293,7 @@ define amdgpu_kernel void @susubo32(i32 addrspace(1)* %out, i1 addrspace(1)* %ca ; GCN-ISEL-LABEL: name: usubo32_vcc_user ; GCN-ISEL-LABEL: body: ; GCN-ISEL-LABEL: bb.0 -; GCN-ISEL: V_SUB_I32_e64 +; GCN-ISEL: V_SUB_CO_U32_e64 ; below we check selection to v_sub/subb ; because the only user of VCC produced by the USUBOis v_cndmask. @@ -371,9 +371,9 @@ define amdgpu_kernel void @vusubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %ca ; GCN-ISEL-LABEL: name: sudiv64 ; GCN-ISEL-LABEL: body: ; GCN-ISEL-LABEL: bb.3 -; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 +; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 ; GCN-ISEL: S_ADD_CO_PSEUDO %{{[0-9]+}}, %{{[0-9]+}}, %[[CARRY]] -; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 +; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 ; GCN-ISEL: S_SUB_CO_PSEUDO %{{[0-9]+}}, %{{[0-9]+}}, %[[CARRY]] define amdgpu_kernel void @sudiv64(i64 addrspace(1)* %out, i64 %x, i64 %y) { %result = udiv i64 %x, %y diff --git a/llvm/test/CodeGen/AMDGPU/cc-update.ll b/llvm/test/CodeGen/AMDGPU/cc-update.ll index dcd3b63f10ce9..a3727cee15705 100644 --- a/llvm/test/CodeGen/AMDGPU/cc-update.ll +++ b/llvm/test/CodeGen/AMDGPU/cc-update.ll @@ -386,7 +386,7 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() #1 { ; GFX1010-NEXT: ; implicit-def: $vcc_hi ; GFX1010-NEXT: s_waitcnt vmcnt(0) ; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX1010-NEXT: v_nop +; GFX1010-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1010-NEXT: s_mov_b32 s6, 0x20000 ; GFX1010-NEXT: ;;#ASMSTART ; GFX1010-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll index a85cdcc01922d..da86b8104b8ec 100644 --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -199,17 +199,17 @@ define amdgpu_kernel void @vload2_private(i16 addrspace(1)* nocapture readonly % ; GCN-NEXT: s_add_u32 s0, s0, s9 ; GCN-NEXT: s_addc_u32 s1, s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v2, s4 -; GCN-NEXT: v_mov_b32_e32 v3, s5 -; GCN-NEXT: global_load_ushort v4, v[2:3], off -; GCN-NEXT: v_mov_b32_e32 v0, s6 -; GCN-NEXT: v_mov_b32_e32 v1, s7 +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: global_load_ushort v2, v[0:1], off ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_short v4, off, s[0:3], 0 offset:4 -; GCN-NEXT: global_load_ushort v4, v[2:3], off offset:2 +; GCN-NEXT: buffer_store_short v2, off, s[0:3], 0 offset:4 +; GCN-NEXT: global_load_ushort v2, v[0:1], off offset:2 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_short v4, off, s[0:3], 0 offset:6 -; GCN-NEXT: global_load_ushort v2, v[2:3], off offset:4 +; GCN-NEXT: buffer_store_short v2, off, s[0:3], 0 offset:6 +; GCN-NEXT: global_load_ushort v2, v[0:1], off offset:4 +; GCN-NEXT: v_mov_b32_e32 v0, s6 +; GCN-NEXT: v_mov_b32_e32 v1, s7 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_short v2, off, s[0:3], 0 offset:8 ; GCN-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:4 diff --git a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir index 3970c9fdf193a..64023cdd525f0 100644 --- a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir +++ b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir @@ -22,7 +22,7 @@ body: | $vgpr2 = IMPLICIT_DEF $vgpr3 = IMPLICIT_DEF $vgpr6 = IMPLICIT_DEF - $vgpr0 = V_ADD_I32_e32 16, $vgpr2, implicit-def $vcc, implicit $exec + $vgpr0 = V_ADD_CO_U32_e32 16, $vgpr2, implicit-def $vcc, implicit $exec $vgpr1 = V_ADDC_U32_e32 $vgpr3, killed $vgpr6, implicit-def dead $vcc, implicit $vcc, implicit $exec FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) diff --git a/llvm/test/CodeGen/AMDGPU/cmp_shrink.mir b/llvm/test/CodeGen/AMDGPU/cmp_shrink.mir new file mode 100644 index 0000000000000..e7bf09ab49b2a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/cmp_shrink.mir @@ -0,0 +1,11 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +--- +name: not_shrink_icmp +body: | + bb.0: + ; GCN-LABEL: name: not_shrink_icmp + ; GCN: S_CMP_GT_I32 1, 65, implicit-def $scc + S_CMP_GT_I32 1, 65, implicit-def $scc +... diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir index 7fff7ca70dc74..0a0928a51813a 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir @@ -104,7 +104,7 @@ body: | bb.11: successors: %bb.2, %bb.1 - %42:vgpr_32 = V_ADD_I32_e32 32, %9, implicit-def dead $vcc, implicit $exec + %42:vgpr_32 = V_ADD_CO_U32_e32 32, %9, implicit-def dead $vcc, implicit $exec V_CMP_EQ_U32_e32 0, %42, implicit-def $vcc, implicit $exec %43:vgpr_32 = COPY %42 $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir b/llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir index cd4a851bc3ea5..71e5ec7d52ed3 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir @@ -64,19 +64,19 @@ body: | %36:vreg_128 = COPY killed %44 %0:sreg_64 = COPY killed %43 %39:vgpr_32 = V_LSHLREV_B32_e32 2, %29.sub2, implicit $exec - %41:vgpr_32 = V_ADD_I32_e32 1152, %39, implicit-def dead $vcc, implicit $exec + %41:vgpr_32 = V_ADD_CO_U32_e32 1152, %39, implicit-def dead $vcc, implicit $exec $m0 = S_MOV_B32 -1 %12:vreg_64 = DS_READ2_B32 killed %41, 0, 1, 0, implicit $m0, implicit $exec %13:vreg_64 = DS_READ2_B32 %39, -112, -111, 0, implicit $m0, implicit $exec %14:vreg_64 = DS_READ2_B32 %39, 0, 1, 0, implicit $m0, implicit $exec - %40:vgpr_32 = V_ADD_I32_e32 1160, %39, implicit-def dead $vcc, implicit $exec + %40:vgpr_32 = V_ADD_CO_U32_e32 1160, %39, implicit-def dead $vcc, implicit $exec %15:vreg_64 = DS_READ2_B32 killed %40, 0, 1, 0, implicit $m0, implicit $exec %16:vreg_64 = DS_READ2_B32 %39, -110, -109, 0, implicit $m0, implicit $exec %17:vreg_64 = DS_READ2_B32 %39, 2, 3, 0, implicit $m0, implicit $exec undef %35.sub1:vreg_128 = COPY undef %34 %31:vreg_128 = COPY killed %29 %31.sub1:vreg_128 = COPY %34 - %38:vgpr_32 = V_ADD_I32_e32 1, %36.sub0, implicit-def dead $vcc, implicit $exec + %38:vgpr_32 = V_ADD_CO_U32_e32 1, %36.sub0, implicit-def dead $vcc, implicit $exec %18:sreg_64 = V_CMP_LT_I32_e64 5, %38, implicit $exec %1:sreg_64 = S_OR_B64 killed %18, killed %0, implicit-def $scc %30:vreg_128 = COPY %31 diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir index 9219083bb64ce..666dfd74c5008 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir @@ -38,7 +38,7 @@ body: | ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 - ; GCN: undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_I32_e64 %5.sub0, %6.sub0, 0, implicit $exec + ; GCN: undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_CO_U32_e64 %5.sub0, %6.sub0, 0, implicit $exec ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 @@ -87,7 +87,7 @@ body: | undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec %7:vgpr_32 = COPY %5.sub1 - undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_I32_e64 %5.sub0, %6.sub0, 0, implicit $exec + undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_CO_U32_e64 %5.sub0, %6.sub0, 0, implicit $exec %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec %5.sub3:sgpr_128 = S_MOV_B32 61440 %5.sub2:sgpr_128 = S_MOV_B32 0 diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir index 5baf4ac94d0aa..c8adce6fed389 100644 --- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir +++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir @@ -76,7 +76,7 @@ body: | %14:vreg_64 = REG_SEQUENCE %3, %subreg.hi16, %13, %subreg.lo16 %15:vreg_64 = V_LSHLREV_B64 2, killed %14, implicit $exec %5:sreg_32_xm0 = COPY %4.sub1 - %20:vgpr_32 = V_ADD_I32_e32 %4.sub0, %15.sub0, implicit-def $vcc, implicit $exec + %20:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %15.sub0, implicit-def $vcc, implicit $exec %18:vgpr_32 = COPY killed %5 %17:vgpr_32 = V_ADDC_U32_e32 %15.sub1, %18, implicit-def $vcc, implicit $vcc, implicit $exec %19:vreg_64 = REG_SEQUENCE %20, %subreg.hi16, killed %17, %subreg.lo16 @@ -206,7 +206,7 @@ body: | %16:vreg_64 = REG_SEQUENCE %2, %subreg.hi16, %15, %subreg.lo16 %17:vreg_64 = V_LSHLREV_B64 2, killed %16, implicit $exec %9:sreg_32_xm0 = COPY %3.sub1 - %21:vgpr_32 = V_ADD_I32_e32 %3.sub0, %17.sub0, implicit-def $vcc, implicit $exec + %21:vgpr_32 = V_ADD_CO_U32_e32 %3.sub0, %17.sub0, implicit-def $vcc, implicit $exec %19:vgpr_32 = COPY killed %9 %18:vgpr_32 = V_ADDC_U32_e32 %17.sub1, %19, implicit-def $vcc, implicit $vcc, implicit $exec %20:vreg_64 = REG_SEQUENCE %21, %subreg.hi16, killed %18, %subreg.lo16 @@ -330,7 +330,7 @@ body: | %16:vreg_64 = REG_SEQUENCE %2, %subreg.hi16, %15, %subreg.lo16 %17:vreg_64 = V_LSHLREV_B64 2, killed %16, implicit $exec %9:sreg_32_xm0 = COPY %3.sub1 - %21:vgpr_32 = V_ADD_I32_e32 %3.sub0, %17.sub0, implicit-def $vcc, implicit $exec + %21:vgpr_32 = V_ADD_CO_U32_e32 %3.sub0, %17.sub0, implicit-def $vcc, implicit $exec %19:vgpr_32 = COPY killed %9 %18:vgpr_32 = V_ADDC_U32_e32 %17.sub1, %19, implicit-def $vcc, implicit $vcc, implicit $exec %20:vreg_64 = REG_SEQUENCE %21, %subreg.hi16, killed %18, %subreg.lo16 @@ -580,7 +580,7 @@ body: | %14:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %13, %subreg.sub1 %15:vreg_64 = V_LSHLREV_B64 2, killed %14, implicit $exec %5:sreg_32_xm0 = COPY %4.sub1 - %20:vgpr_32 = V_ADD_I32_e32 %4.sub0, %15.sub0, implicit-def $vcc, implicit $exec + %20:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %15.sub0, implicit-def $vcc, implicit $exec %18:vgpr_32 = COPY killed %5 %17:vgpr_32 = V_ADDC_U32_e32 %15.sub1, %18, implicit-def $vcc, implicit $vcc, implicit $exec %19:vreg_64 = REG_SEQUENCE %20, %subreg.sub0, killed %17, %subreg.sub1 @@ -787,7 +787,7 @@ body: | %16:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %15, %subreg.sub1 %17:vreg_64 = V_LSHLREV_B64 2, killed %16, implicit $exec %9:sreg_32_xm0 = COPY %3.sub1 - %21:vgpr_32 = V_ADD_I32_e32 %3.sub0, %17.sub0, implicit-def $vcc, implicit $exec + %21:vgpr_32 = V_ADD_CO_U32_e32 %3.sub0, %17.sub0, implicit-def $vcc, implicit $exec %19:vgpr_32 = COPY killed %9 %18:vgpr_32 = V_ADDC_U32_e32 %17.sub1, %19, implicit-def $vcc, implicit $vcc, implicit $exec %20:vreg_64 = REG_SEQUENCE %21, %subreg.sub0, killed %18, %subreg.sub1 diff --git a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll index 0306177f64231..e7030ab813269 100644 --- a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll +++ b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll @@ -51,24 +51,23 @@ define amdgpu_kernel void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x ; SI-LABEL: test_copy_v4i8_x2: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s10, 0 -; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: s_mov_b32 s3, s11 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SI-NEXT: v_mov_b32_e32 v1, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_mov_b32 s12, s6 -; SI-NEXT: s_mov_b32 s13, s7 -; SI-NEXT: s_mov_b32 s14, s2 -; SI-NEXT: s_mov_b32 s15, s3 +; SI-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s8, s4 +; SI-NEXT: s_mov_b32 s9, s5 +; SI-NEXT: s_mov_b32 s2, s10 +; SI-NEXT: s_mov_b32 s0, s6 +; SI-NEXT: s_mov_b32 s1, s7 ; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 -; SI-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_copy_v4i8_x2: @@ -78,17 +77,17 @@ define amdgpu_kernel void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_mov_b32 s10, s2 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s8, s6 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s9, s7 -; VI-NEXT: s_mov_b32 s10, s2 ; VI-NEXT: s_mov_b32 s11, s3 +; VI-NEXT: s_mov_b32 s8, s6 +; VI-NEXT: s_mov_b32 s9, s7 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 @@ -106,28 +105,25 @@ define amdgpu_kernel void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 ; SI-NEXT: s_mov_b32 s11, 0xf000 -; SI-NEXT: s_mov_b32 s22, 0 -; SI-NEXT: s_mov_b32 s23, s11 +; SI-NEXT: s_mov_b32 s14, 0 +; SI-NEXT: s_mov_b32 s15, s11 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[20:21], s[6:7] +; SI-NEXT: s_mov_b64 s[12:13], s[6:7] ; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: buffer_load_dword v0, v[0:1], s[20:23], 0 addr64 +; SI-NEXT: buffer_load_dword v0, v[0:1], s[12:15], 0 addr64 ; SI-NEXT: s_mov_b32 s10, -1 ; SI-NEXT: s_mov_b32 s8, s0 ; SI-NEXT: s_mov_b32 s9, s1 +; SI-NEXT: s_mov_b32 s14, s10 +; SI-NEXT: s_mov_b32 s6, s10 +; SI-NEXT: s_mov_b32 s7, s11 ; SI-NEXT: s_mov_b32 s12, s2 ; SI-NEXT: s_mov_b32 s13, s3 -; SI-NEXT: s_mov_b32 s14, s10 -; SI-NEXT: s_mov_b32 s15, s11 -; SI-NEXT: s_mov_b32 s16, s4 -; SI-NEXT: s_mov_b32 s17, s5 -; SI-NEXT: s_mov_b32 s18, s10 -; SI-NEXT: s_mov_b32 s19, s11 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; SI-NEXT: buffer_store_dword v0, off, s[12:15], 0 -; SI-NEXT: buffer_store_dword v0, off, s[16:19], 0 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_copy_v4i8_x3: @@ -144,17 +140,15 @@ define amdgpu_kernel void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x ; VI-NEXT: flat_load_dword v0, v[0:1] ; VI-NEXT: s_mov_b32 s8, s0 ; VI-NEXT: s_mov_b32 s9, s1 +; VI-NEXT: s_mov_b32 s15, s11 +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 ; VI-NEXT: s_mov_b32 s12, s2 ; VI-NEXT: s_mov_b32 s13, s3 -; VI-NEXT: s_mov_b32 s15, s11 -; VI-NEXT: s_mov_b32 s16, s4 -; VI-NEXT: s_mov_b32 s17, s5 -; VI-NEXT: s_mov_b32 s18, s10 -; VI-NEXT: s_mov_b32 s19, s11 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; VI-NEXT: buffer_store_dword v0, off, s[12:15], 0 -; VI-NEXT: buffer_store_dword v0, off, s[16:19], 0 +; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm %tid.x = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x @@ -168,68 +162,70 @@ define amdgpu_kernel void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x define amdgpu_kernel void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind { ; SI-LABEL: test_copy_v4i8_x4: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x11 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s14, 0 -; SI-NEXT: s_mov_b32 s15, s3 +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x11 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s6, 0 +; SI-NEXT: s_mov_b32 s7, s11 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SI-NEXT: v_mov_b32_e32 v1, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_load_dword v0, v[0:1], s[12:15], 0 addr64 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_mov_b32 s20, s8 -; SI-NEXT: s_mov_b32 s21, s9 -; SI-NEXT: s_mov_b32 s8, s10 -; SI-NEXT: s_mov_b32 s9, s11 -; SI-NEXT: s_mov_b32 s16, s6 -; SI-NEXT: s_mov_b32 s17, s7 -; SI-NEXT: s_mov_b32 s18, s2 -; SI-NEXT: s_mov_b32 s19, s3 -; SI-NEXT: s_mov_b32 s22, s2 -; SI-NEXT: s_mov_b32 s23, s3 -; SI-NEXT: s_mov_b32 s10, s2 -; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s14, s10 +; SI-NEXT: s_mov_b32 s15, s11 +; SI-NEXT: s_mov_b32 s18, s10 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s0 +; SI-NEXT: s_mov_b32 s9, s1 +; SI-NEXT: s_mov_b32 s19, s11 +; SI-NEXT: s_mov_b32 s22, s10 +; SI-NEXT: s_mov_b32 s23, s11 +; SI-NEXT: s_mov_b32 s12, s2 +; SI-NEXT: s_mov_b32 s13, s3 +; SI-NEXT: s_mov_b32 s16, s4 +; SI-NEXT: s_mov_b32 s17, s5 +; SI-NEXT: s_mov_b32 s20, s6 +; SI-NEXT: s_mov_b32 s21, s7 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; SI-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; SI-NEXT: buffer_store_dword v0, off, s[16:19], 0 ; SI-NEXT: buffer_store_dword v0, off, s[20:23], 0 -; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_copy_v4i8_x4: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x44 +; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x44 ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: s_mov_b32 s14, s10 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s16, s8 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s17, s9 -; VI-NEXT: s_mov_b32 s8, s10 -; VI-NEXT: s_mov_b32 s9, s11 -; VI-NEXT: s_mov_b32 s12, s6 -; VI-NEXT: s_mov_b32 s13, s7 -; VI-NEXT: s_mov_b32 s14, s2 -; VI-NEXT: s_mov_b32 s15, s3 -; VI-NEXT: s_mov_b32 s18, s2 -; VI-NEXT: s_mov_b32 s19, s3 -; VI-NEXT: s_mov_b32 s10, s2 -; VI-NEXT: s_mov_b32 s11, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s15, s11 +; VI-NEXT: s_mov_b32 s18, s10 +; VI-NEXT: s_mov_b32 s19, s11 +; VI-NEXT: s_mov_b32 s22, s10 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s0 +; VI-NEXT: s_mov_b32 s9, s1 +; VI-NEXT: s_mov_b32 s23, s11 +; VI-NEXT: s_mov_b32 s12, s2 +; VI-NEXT: s_mov_b32 s13, s3 +; VI-NEXT: s_mov_b32 s16, s4 +; VI-NEXT: s_mov_b32 s17, s5 +; VI-NEXT: s_mov_b32 s20, s6 +; VI-NEXT: s_mov_b32 s21, s7 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; VI-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; VI-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; VI-NEXT: buffer_store_dword v0, off, s[20:23], 0 ; VI-NEXT: s_endpgm %tid.x = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x @@ -245,23 +241,22 @@ define amdgpu_kernel void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0 ; SI-LABEL: test_copy_v4i8_extra_use: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s10, 0 -; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: s_mov_b32 s3, s11 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SI-NEXT: v_mov_b32_e32 v1, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_mov_b32 s12, 0xff00 ; SI-NEXT: s_movk_i32 s13, 0xff -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s4, s6 -; SI-NEXT: s_mov_b32 s5, s7 -; SI-NEXT: s_mov_b32 s6, s2 -; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s8, s4 +; SI-NEXT: s_mov_b32 s9, s5 +; SI-NEXT: s_mov_b32 s2, s10 +; SI-NEXT: s_mov_b32 s0, s6 +; SI-NEXT: s_mov_b32 s1, s7 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_add_i32_e32 v3, vcc, 9, v0 ; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 @@ -277,47 +272,47 @@ define amdgpu_kernel void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0 ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 ; SI-NEXT: v_add_i32_e32 v1, vcc, 0x9000000, v1 -; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 -; SI-NEXT: buffer_store_dword v1, off, s[4:7], 0 +; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_copy_v4i8_extra_use: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: s_movk_i32 s10, 0x900 -; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_movk_i32 s12, 0xff00 +; VI-NEXT: s_movk_i32 s13, 0xff +; VI-NEXT: s_movk_i32 s14, 0x900 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: v_add_u32_e32 v0, vcc, s8, v0 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: s_movk_i32 s8, 0xff00 -; VI-NEXT: s_movk_i32 s9, 0xff -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_mov_b32 s4, s6 -; VI-NEXT: s_mov_b32 s5, s7 -; VI-NEXT: s_mov_b32 s6, s2 -; VI-NEXT: s_mov_b32 s7, s3 +; VI-NEXT: s_mov_b32 s0, s4 +; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_mov_b32 s10, s2 +; VI-NEXT: s_mov_b32 s11, s3 +; VI-NEXT: s_mov_b32 s8, s6 +; VI-NEXT: s_mov_b32 s9, s7 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; VI-NEXT: v_and_b32_e32 v4, s8, v1 +; VI-NEXT: v_and_b32_e32 v4, s12, v1 ; VI-NEXT: v_add_u16_e32 v1, 9, v1 ; VI-NEXT: v_add_u16_e32 v3, 9, v0 -; VI-NEXT: v_and_b32_e32 v1, s9, v1 +; VI-NEXT: v_and_b32_e32 v1, s13, v1 ; VI-NEXT: v_or_b32_e32 v1, v4, v1 -; VI-NEXT: v_and_b32_e32 v2, s8, v0 -; VI-NEXT: v_and_b32_e32 v3, s9, v3 +; VI-NEXT: v_and_b32_e32 v2, s12, v0 +; VI-NEXT: v_and_b32_e32 v3, s13, v3 ; VI-NEXT: v_or_b32_e32 v2, v2, v3 -; VI-NEXT: v_add_u16_e32 v1, s10, v1 -; VI-NEXT: v_add_u16_e32 v2, s10, v2 +; VI-NEXT: v_add_u16_e32 v1, s14, v1 +; VI-NEXT: v_add_u16_e32 v2, s14, v2 ; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; VI-NEXT: v_or_b32_e32 v1, v2, v1 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 -; VI-NEXT: buffer_store_dword v1, off, s[4:7], 0 +; VI-NEXT: buffer_store_dword v1, off, s[8:11], 0 ; VI-NEXT: s_endpgm %tid.x = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x @@ -334,35 +329,32 @@ define amdgpu_kernel void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %o ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 ; SI-NEXT: s_mov_b32 s11, 0xf000 -; SI-NEXT: s_mov_b32 s18, 0 -; SI-NEXT: s_mov_b32 s19, s11 +; SI-NEXT: s_mov_b32 s14, 0 +; SI-NEXT: s_mov_b32 s15, s11 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[16:17], s[6:7] +; SI-NEXT: s_mov_b64 s[12:13], s[6:7] ; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: buffer_load_dword v0, v[0:1], s[16:19], 0 addr64 -; SI-NEXT: s_mov_b32 s12, s4 -; SI-NEXT: s_mov_b32 s13, s5 -; SI-NEXT: s_mov_b32 s4, 0xff00 -; SI-NEXT: s_movk_i32 s5, 0xff +; SI-NEXT: buffer_load_dword v0, v[0:1], s[12:15], 0 addr64 +; SI-NEXT: s_mov_b32 s16, 0xff00 +; SI-NEXT: s_movk_i32 s17, 0xff ; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s14, s10 ; SI-NEXT: s_mov_b32 s8, s0 ; SI-NEXT: s_mov_b32 s9, s1 -; SI-NEXT: s_mov_b32 s0, s2 -; SI-NEXT: s_mov_b32 s1, s3 -; SI-NEXT: s_mov_b32 s2, s10 -; SI-NEXT: s_mov_b32 s3, s11 -; SI-NEXT: s_mov_b32 s14, s10 -; SI-NEXT: s_mov_b32 s15, s11 +; SI-NEXT: s_mov_b32 s12, s2 +; SI-NEXT: s_mov_b32 s13, s3 +; SI-NEXT: s_mov_b32 s6, s10 +; SI-NEXT: s_mov_b32 s7, s11 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_add_i32_e32 v3, vcc, 9, v0 ; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; SI-NEXT: v_and_b32_e32 v4, s4, v1 +; SI-NEXT: v_and_b32_e32 v4, s16, v1 ; SI-NEXT: v_add_i32_e32 v1, vcc, 9, v1 -; SI-NEXT: v_and_b32_e32 v2, s4, v0 -; SI-NEXT: v_and_b32_e32 v3, s5, v3 +; SI-NEXT: v_and_b32_e32 v2, s16, v0 +; SI-NEXT: v_and_b32_e32 v3, s17, v3 ; SI-NEXT: v_or_b32_e32 v2, v2, v3 -; SI-NEXT: v_and_b32_e32 v1, s5, v1 +; SI-NEXT: v_and_b32_e32 v1, s17, v1 ; SI-NEXT: v_add_i32_e32 v2, vcc, 0x900, v2 ; SI-NEXT: v_or_b32_e32 v1, v4, v1 ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 @@ -370,51 +362,49 @@ define amdgpu_kernel void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %o ; SI-NEXT: v_or_b32_e32 v1, v1, v2 ; SI-NEXT: v_add_i32_e32 v1, vcc, 0x9000000, v1 ; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 -; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0 -; SI-NEXT: buffer_store_dword v0, off, s[12:15], 0 +; SI-NEXT: buffer_store_dword v1, off, s[12:15], 0 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_copy_v4i8_x2_extra_use: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: s_mov_b32 s11, 0xf000 -; VI-NEXT: s_mov_b32 s10, -1 -; VI-NEXT: s_mov_b32 s14, s10 +; VI-NEXT: s_movk_i32 s16, 0xff00 +; VI-NEXT: s_movk_i32 s17, 0xff +; VI-NEXT: s_movk_i32 s18, 0x900 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v0 ; VI-NEXT: v_mov_b32_e32 v1, s7 +; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: s_mov_b32 s12, s4 -; VI-NEXT: s_movk_i32 s4, 0xff00 -; VI-NEXT: s_mov_b32 s13, s5 -; VI-NEXT: s_movk_i32 s5, 0xff -; VI-NEXT: s_movk_i32 s6, 0x900 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: s_mov_b32 s14, s10 +; VI-NEXT: s_mov_b32 s15, s11 ; VI-NEXT: s_mov_b32 s8, s0 ; VI-NEXT: s_mov_b32 s9, s1 -; VI-NEXT: s_mov_b32 s0, s2 -; VI-NEXT: s_mov_b32 s1, s3 -; VI-NEXT: s_mov_b32 s2, s10 -; VI-NEXT: s_mov_b32 s3, s11 -; VI-NEXT: s_mov_b32 s15, s11 +; VI-NEXT: s_mov_b32 s12, s2 +; VI-NEXT: s_mov_b32 s13, s3 +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; VI-NEXT: v_and_b32_e32 v4, s4, v1 +; VI-NEXT: v_and_b32_e32 v4, s16, v1 ; VI-NEXT: v_add_u16_e32 v1, 9, v1 ; VI-NEXT: v_add_u16_e32 v3, 9, v0 -; VI-NEXT: v_and_b32_e32 v1, s5, v1 +; VI-NEXT: v_and_b32_e32 v1, s17, v1 ; VI-NEXT: v_or_b32_e32 v1, v4, v1 -; VI-NEXT: v_and_b32_e32 v2, s4, v0 -; VI-NEXT: v_and_b32_e32 v3, s5, v3 +; VI-NEXT: v_and_b32_e32 v2, s16, v0 +; VI-NEXT: v_and_b32_e32 v3, s17, v3 ; VI-NEXT: v_or_b32_e32 v2, v2, v3 -; VI-NEXT: v_add_u16_e32 v1, s6, v1 -; VI-NEXT: v_add_u16_e32 v2, s6, v2 +; VI-NEXT: v_add_u16_e32 v1, s18, v1 +; VI-NEXT: v_add_u16_e32 v2, s18, v2 ; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; VI-NEXT: v_or_b32_e32 v1, v2, v1 ; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0 -; VI-NEXT: buffer_store_dword v0, off, s[12:15], 0 +; VI-NEXT: buffer_store_dword v1, off, s[12:15], 0 +; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm %tid.x = call i32 @llvm.amdgcn.workitem.id.x() %in.ptr = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x @@ -429,18 +419,18 @@ define amdgpu_kernel void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %o define amdgpu_kernel void @test_copy_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind { ; SI-LABEL: test_copy_v3i8_align4: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 ; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s10, 0 -; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: s_mov_b32 s6, 0 +; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[8:9], s[6:7] +; SI-NEXT: s_mov_b64 s[4:5], s[10:11] ; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 +; SI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s0, s8 +; SI-NEXT: s_mov_b32 s1, s9 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 @@ -449,17 +439,15 @@ define amdgpu_kernel void @test_copy_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 ; ; VI-LABEL: test_copy_v3i8_align4: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s7 -; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v0 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll index c26af864e7fe3..31671b9a15766 100644 --- a/llvm/test/CodeGen/AMDGPU/ctlz.ll +++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll @@ -526,27 +526,27 @@ define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrsp ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c -; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; VI-NEXT: v_mov_b32_e32 v5, 0 -; VI-NEXT: v_mov_b32_e32 v1, 0 +; VI-NEXT: v_lshlrev_b32_e32 v3, 3, v0 +; VI-NEXT: v_mov_b32_e32 v4, 0 +; VI-NEXT: v_mov_b32_e32 v2, 0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v6, s3 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v0 -; VI-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc -; VI-NEXT: flat_load_dwordx2 v[2:3], v[2:3] -; VI-NEXT: v_add_u32_e32 v4, vcc, s2, v0 -; VI-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc +; VI-NEXT: v_mov_b32_e32 v5, s3 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v3 +; VI-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; VI-NEXT: v_add_u32_e32 v3, vcc, s2, v3 +; VI-NEXT: v_addc_u32_e32 v4, vcc, v5, v4, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_ffbh_u32_e32 v0, v2 -; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 -; VI-NEXT: v_ffbh_u32_e32 v6, v3 -; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; VI-NEXT: v_or_b32_e32 v2, v2, v3 -; VI-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; VI-NEXT: v_cndmask_b32_e32 v0, 64, v0, vcc -; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] +; VI-NEXT: v_ffbh_u32_e32 v5, v0 +; VI-NEXT: v_add_u32_e32 v5, vcc, 32, v5 +; VI-NEXT: v_ffbh_u32_e32 v6, v1 +; VI-NEXT: v_or_b32_e32 v0, v0, v1 +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; VI-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; VI-NEXT: v_cndmask_b32_e32 v1, 64, v1, vcc +; VI-NEXT: flat_store_dwordx2 v[3:4], v[1:2] ; VI-NEXT: s_endpgm ; ; EG-LABEL: v_ctlz_i64: @@ -621,18 +621,18 @@ define amdgpu_kernel void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 ; VI-NEXT: v_mov_b32_e32 v2, s1 ; VI-NEXT: v_add_u32_e32 v1, vcc, s0, v1 ; VI-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; VI-NEXT: flat_load_dwordx2 v[1:2], v[1:2] ; VI-NEXT: v_add_u32_e32 v3, vcc, s2, v0 -; VI-NEXT: flat_load_dwordx2 v[0:1], v[1:2] ; VI-NEXT: v_addc_u32_e32 v4, vcc, v5, v4, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_ffbh_u32_e32 v2, v0 -; VI-NEXT: v_add_u32_e32 v2, vcc, 32, v2 -; VI-NEXT: v_ffbh_u32_e32 v5, v1 -; VI-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; VI-NEXT: v_cndmask_b32_e32 v0, 64, v1, vcc +; VI-NEXT: v_ffbh_u32_e32 v0, v1 +; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 +; VI-NEXT: v_ffbh_u32_e32 v5, v2 +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; VI-NEXT: v_or_b32_e32 v1, v1, v2 +; VI-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; VI-NEXT: v_cndmask_b32_e32 v0, 64, v0, vcc ; VI-NEXT: flat_store_dword v[3:4], v0 ; VI-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll index 7b4d6f8d2f925..6bc3073eb18e0 100644 --- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll +++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll @@ -177,11 +177,11 @@ define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(i32 addrspace(1)* n ; SI-NOSDWA: v_or_b32_e32 [[VAL2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} ; SI-NOSDWA: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL1]] ; SI-NOSDWA: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL2]] -; SI-SDWA: v_or_b32_e32 ; SI-SDWA: v_or_b32_sdwa +; SI-SDWA: v_or_b32_e32 +; SI-SDWA: v_or_b32_e32 ; SI-SDWA: v_or_b32_e32 [[VAL1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} ; SI-SDWA: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL1]] -; SI-SDWA: v_or_b32_e32 ; SI-SDWA: v_or_b32_sdwa ; SI-SDWA: v_or_b32_e32 [[VAL2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} ; SI-SDWA: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL2]] @@ -198,8 +198,8 @@ define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(i64 addrspace(1)* n } ; FUNC-LABEL: {{^}}v_cttz_i32_sel_eq_neg1: -; SI: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL:v[0-9]+]] -; SI: v_cmp_ne_u32_e32 vcc, 0, [[VAL]] +; SI: v_ffbl_b32_e32 [[VAL:v[0-9]+]], v{{[0-9]+}} +; SI: buffer_store_dword [[VAL]], ; SI: s_endpgm ; EG: MEM_RAT_CACHELESS STORE_RAW ; EG: FFBL_INT @@ -213,8 +213,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out } ; FUNC-LABEL: {{^}}v_cttz_i32_sel_ne_neg1: -; SI: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL:v[0-9]+]] -; SI: v_cmp_ne_u32_e32 vcc, 0, [[VAL]] +; SI: v_ffbl_b32_e32 [[VAL:v[0-9]+]], v{{[0-9]+}} +; SI: buffer_store_dword [[VAL]], ; SI: s_endpgm ; EG: MEM_RAT_CACHELESS STORE_RAW ; EG: FFBL_INT diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index 0d1c61597848c..21360aa85cbc1 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -636,20 +636,19 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1) define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind { ; SI-LABEL: load_v4i8_to_v4f32_2_uses: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xb -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s11, 0xf000 -; SI-NEXT: s_mov_b32 s2, 0 -; SI-NEXT: s_mov_b32 s3, s11 +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s6, 0 +; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SI-NEXT: v_mov_b32_e32 v1, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_load_dword v4, v[0:1], s[0:3], 0 addr64 -; SI-NEXT: s_mov_b32 s10, -1 -; SI-NEXT: s_mov_b32 s6, s10 -; SI-NEXT: s_mov_b32 s7, s11 -; SI-NEXT: s_movk_i32 s12, 0xff +; SI-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_movk_i32 s8, 0xff +; SI-NEXT: s_mov_b32 s6, s2 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v4 ; SI-NEXT: v_lshrrev_b32_e32 v6, 24, v4 @@ -659,57 +658,58 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n ; SI-NEXT: v_cvt_f32_ubyte1_e32 v1, v4 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v4 ; SI-NEXT: v_add_i32_e32 v4, vcc, 9, v4 +; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_and_b32_e32 v0, s12, v4 +; SI-NEXT: v_and_b32_e32 v0, s8, v4 ; SI-NEXT: v_add_i32_e32 v2, vcc, 9, v5 ; SI-NEXT: v_or_b32_e32 v0, v7, v0 ; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v6 -; SI-NEXT: v_and_b32_e32 v2, s12, v2 +; SI-NEXT: v_and_b32_e32 v2, s8, v2 ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x900, v0 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-NEXT: v_or_b32_e32 v0, v1, v0 ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x9000000, v0 -; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: load_v4i8_to_v4f32_2_uses: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: s_mov_b32 s11, 0xf000 -; VI-NEXT: s_mov_b32 s10, -1 -; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: v_mov_b32_e32 v5, 9 +; VI-NEXT: s_movk_i32 s8, 0x900 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0 -; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v5, v[0:1] -; VI-NEXT: v_mov_b32_e32 v4, 9 -; VI-NEXT: s_mov_b32 s7, s11 -; VI-NEXT: s_movk_i32 s0, 0x900 +; VI-NEXT: flat_load_dword v4, v[0:1] +; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_mov_b32 s6, s2 +; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v6, 24, v5 -; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v5 -; VI-NEXT: v_cvt_f32_ubyte2_e32 v2, v5 -; VI-NEXT: v_cvt_f32_ubyte1_e32 v1, v5 -; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v5 +; VI-NEXT: v_lshrrev_b32_e32 v6, 24, v4 +; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v4 +; VI-NEXT: v_cvt_f32_ubyte2_e32 v2, v4 +; VI-NEXT: v_cvt_f32_ubyte1_e32 v1, v4 +; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v4 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; VI-NEXT: v_and_b32_e32 v7, 0xffffff00, v5 -; VI-NEXT: v_add_u16_e32 v8, 9, v5 -; VI-NEXT: v_add_u16_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_and_b32_e32 v7, 0xffffff00, v4 +; VI-NEXT: v_add_u16_e32 v8, 9, v4 +; VI-NEXT: v_add_u16_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v6 ; VI-NEXT: v_or_b32_sdwa v0, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; VI-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: v_add_u16_e32 v0, s0, v0 +; VI-NEXT: v_mov_b32_e32 v2, s8 +; VI-NEXT: v_add_u16_e32 v0, s8, v0 ; VI-NEXT: v_add_u16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %tid.x = call i32 @llvm.amdgcn.workitem.id.x() %in.ptr = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x @@ -733,29 +733,30 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; SI-NEXT: v_mov_b32_e32 v1, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_load_ubyte v2, v[0:1], s[0:3], 0 addr64 offset:5 -; SI-NEXT: buffer_load_ubyte v3, v[0:1], s[0:3], 0 addr64 offset:6 -; SI-NEXT: buffer_load_ubyte v4, v[0:1], s[0:3], 0 addr64 -; SI-NEXT: buffer_load_ubyte v5, v[0:1], s[0:3], 0 addr64 offset:1 +; SI-NEXT: buffer_load_ubyte v2, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: buffer_load_ubyte v3, v[0:1], s[0:3], 0 addr64 offset:1 ; SI-NEXT: buffer_load_ubyte v6, v[0:1], s[0:3], 0 addr64 offset:2 -; SI-NEXT: buffer_load_ubyte v7, v[0:1], s[0:3], 0 addr64 offset:3 -; SI-NEXT: buffer_load_ubyte v8, v[0:1], s[0:3], 0 addr64 offset:4 +; SI-NEXT: buffer_load_ubyte v4, v[0:1], s[0:3], 0 addr64 offset:3 +; SI-NEXT: buffer_load_ubyte v7, v[0:1], s[0:3], 0 addr64 offset:4 +; SI-NEXT: buffer_load_ubyte v5, v[0:1], s[0:3], 0 addr64 offset:5 +; SI-NEXT: buffer_load_ubyte v8, v[0:1], s[0:3], 0 addr64 offset:6 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(4) -; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v4 +; SI-NEXT: s_waitcnt vmcnt(6) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v2 +; SI-NEXT: s_waitcnt vmcnt(5) +; SI-NEXT: v_cvt_f32_ubyte2_e32 v1, v3 ; SI-NEXT: s_waitcnt vmcnt(3) -; SI-NEXT: v_cvt_f32_ubyte2_e32 v1, v5 -; SI-NEXT: v_cvt_f32_ubyte2_e32 v5, v2 -; SI-NEXT: s_waitcnt vmcnt(1) -; SI-NEXT: v_lshlrev_b32_e32 v7, 8, v7 -; SI-NEXT: v_or_b32_e32 v2, v7, v6 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_cvt_f32_ubyte0_e32 v4, v8 -; SI-NEXT: v_cvt_f32_ubyte0_e32 v8, v3 +; SI-NEXT: v_lshlrev_b32_e32 v9, 8, v4 +; SI-NEXT: v_or_b32_e32 v2, v9, v6 +; SI-NEXT: s_waitcnt vmcnt(2) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v4, v7 ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v7, v8 ; SI-NEXT: v_cvt_f32_ubyte3_e32 v3, v2 +; SI-NEXT: v_cvt_f32_ubyte2_e32 v5, v5 ; SI-NEXT: v_cvt_f32_ubyte2_e32 v2, v2 -; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:24 +; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:24 ; SI-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir index 6194515c3bec4..274f8ddc89443 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir +++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir @@ -256,8 +256,8 @@ body: | # GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec # GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec # GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec -# GCN: %16:vgpr_32 = V_SUBREV_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec -# GCN: %19:vgpr_32 = V_ADD_I32_e32 5, %18, implicit-def $vcc, implicit $exec +# GCN: %16:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec +# GCN: %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec name: dpp_commute tracksRegLiveness: true body: | @@ -285,12 +285,12 @@ body: | %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec - %16:vgpr_32 = V_SUB_I32_e32 %1, %15, implicit-def $vcc, implicit $exec + %16:vgpr_32 = V_SUB_CO_U32_e32 %1, %15, implicit-def $vcc, implicit $exec ; this cannot be combined because immediate as src0 isn't commutable %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %18:vgpr_32 = V_MOV_B32_dpp %17, %0, 1, 14, 15, 0, implicit $exec - %19:vgpr_32 = V_ADD_I32_e32 5, %18, implicit-def $vcc, implicit $exec + %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec ... --- @@ -356,8 +356,8 @@ body: | # tests on sequences of dpp consumers # GCN-LABEL: name: dpp_seq -# GCN: %4:vgpr_32 = V_ADD_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec -# GCN: %5:vgpr_32 = V_SUBREV_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec +# GCN: %4:vgpr_32 = V_ADD_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec +# GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec # GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec # broken sequence: # GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec @@ -372,20 +372,20 @@ body: | %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec - %4:vgpr_32 = V_ADD_I32_e32 %3, %1, implicit-def $vcc, implicit $exec - %5:vgpr_32 = V_SUB_I32_e32 %1, %3, implicit-def $vcc, implicit $exec + %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec + %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec - %8:vgpr_32 = V_ADD_I32_e32 %7, %1, implicit-def $vcc, implicit $exec + %8:vgpr_32 = V_ADD_CO_U32_e32 %7, %1, implicit-def $vcc, implicit $exec ; this breaks the sequence - %9:vgpr_32 = V_SUB_I32_e32 5, %7, implicit-def $vcc, implicit $exec + %9:vgpr_32 = V_SUB_CO_U32_e32 5, %7, implicit-def $vcc, implicit $exec ... # tests on sequences of dpp consumers followed by control flow # GCN-LABEL: name: dpp_seq_cf -# GCN: %4:vgpr_32 = V_ADD_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec -# GCN: %5:vgpr_32 = V_SUBREV_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec +# GCN: %4:vgpr_32 = V_ADD_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec +# GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec # GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec name: dpp_seq_cf @@ -399,8 +399,8 @@ body: | %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec - %4:vgpr_32 = V_ADD_I32_e32 %3, %1, implicit-def $vcc, implicit $exec - %5:vgpr_32 = V_SUB_I32_e32 %1, %3, implicit-def $vcc, implicit $exec + %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec + %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec %7:sreg_64 = V_CMP_EQ_U32_e64 %5, %6, implicit $exec @@ -607,7 +607,7 @@ body: | # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec # GCN: %9:vgpr_32 = IMPLICIT_DEF # GCN: %8:vgpr_32 = IMPLICIT_DEF -# GCN: %6:vgpr_32 = V_ADD_I32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec +# GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec # GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec name: dpp_reg_sequence_both_combined tracksRegLiveness: true @@ -621,7 +621,7 @@ body: | %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 - %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec + %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec ... @@ -632,7 +632,7 @@ body: | # GCN: %8:vgpr_32 = IMPLICIT_DEF # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec # GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1 -# GCN: %6:vgpr_32 = V_ADD_I32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec +# GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec name: dpp_reg_sequence_first_combined tracksRegLiveness: true @@ -646,7 +646,7 @@ body: | %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 - %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec + %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec ... @@ -657,7 +657,7 @@ body: | # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec # GCN: %8:vgpr_32 = IMPLICIT_DEF # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1 -# GCN: %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec +# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec # GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec name: dpp_reg_sequence_second_combined tracksRegLiveness: true @@ -671,7 +671,7 @@ body: | %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 - %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec + %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec ... @@ -682,7 +682,7 @@ body: | # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 -# GCN: %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec +# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec name: dpp_reg_sequence_none_combined tracksRegLiveness: true @@ -696,7 +696,7 @@ body: | %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 - %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec + %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec ... @@ -709,7 +709,7 @@ body: | # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 # GCN: S_BRANCH %bb.1 # GCN: bb.1: -# GCN: %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec +# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec name: dpp_reg_sequence_exec_changed tracksRegLiveness: true @@ -726,7 +726,7 @@ body: | S_BRANCH %bb.1 bb.1: - %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec + %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec ... @@ -738,7 +738,7 @@ body: | # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 # GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1 -# GCN: %7:vgpr_32 = V_ADD_I32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec +# GCN: %7:vgpr_32 = V_ADD_CO_U32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec # GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec name: dpp_reg_sequence_subreg tracksRegLiveness: true @@ -753,12 +753,12 @@ body: | %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1 - %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec + %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %8, implicit-def $vcc, implicit $vcc, implicit $exec ... # GCN-LABEL: name: dpp64_add64_impdef -# GCN: %3:vgpr_32 = V_ADD_I32_dpp %1.sub0, %0.sub0, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec +# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp %1.sub0, %0.sub0, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec # GCN: %5:vgpr_32 = V_ADDC_U32_dpp %1.sub1, %0.sub1, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec name: dpp64_add64_impdef tracksRegLiveness: true @@ -767,33 +767,33 @@ body: | %0:vreg_64 = IMPLICIT_DEF %1:vreg_64 = IMPLICIT_DEF %2:vreg_64 = V_MOV_B64_DPP_PSEUDO %1:vreg_64, %0:vreg_64, 1, 15, 15, 1, implicit $exec - %5:vgpr_32 = V_ADD_I32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec + %5:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec ... # GCN-LABEL: name: dpp64_add64_undef -# GCN: %3:vgpr_32 = V_ADD_I32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec +# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec # GCN: %5:vgpr_32 = V_ADDC_U32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec name: dpp64_add64_undef tracksRegLiveness: true body: | bb.0: %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec - %5:vgpr_32 = V_ADD_I32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec + %5:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec ... # GCN-LABEL: name: dpp64_add64_first_combined # GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec # GCN: %0:vreg_64 = REG_SEQUENCE undef %7:vgpr_32, %subreg.sub0, %8, %subreg.sub1 -# GCN: %3:vgpr_32 = V_ADD_I32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec +# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec # GCN: %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %0.sub1, undef $vcc, 0, implicit $exec name: dpp64_add64_first_combined tracksRegLiveness: true body: | bb.0: %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec - %4:vgpr_32 = V_ADD_I32_e32 %2.sub0, undef %3:vgpr_32, implicit-def $vcc, implicit $exec + %4:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %3:vgpr_32, implicit-def $vcc, implicit $exec %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %2.sub1, undef $vcc, 0, implicit $exec ... @@ -848,7 +848,7 @@ body: | ... # GCN-LABEL: name: dont_combine_more_than_one_operand_dpp_reg_sequence -# GCN: %5:vgpr_32 = V_ADD_I32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec +# GCN: %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec # GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec name: dont_combine_more_than_one_operand_dpp_reg_sequence tracksRegLiveness: true @@ -860,6 +860,6 @@ body: | %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 - %5:vgpr_32 = V_ADD_I32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec + %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir b/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir index 95a878c1997ff..3c0c5715420e9 100644 --- a/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir +++ b/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir @@ -298,7 +298,7 @@ body: | ... # GCN-LABEL: name: implicit_use_on_S_ENDPGM 0 -# GCN: V_ADD_I32 +# GCN: V_ADD_CO_U32 # GCN: COPY # GCN: V_ADDC_U32 # GCN: S_ENDPGM 0, implicit %3 @@ -307,7 +307,7 @@ tracksRegLiveness: true body: | bb.0: - dead %0:vgpr_32 = V_ADD_I32_e32 12345, undef %1:vgpr_32, implicit-def $vcc, implicit $exec + dead %0:vgpr_32 = V_ADD_CO_U32_e32 12345, undef %1:vgpr_32, implicit-def $vcc, implicit $exec %2:sreg_64_xexec = COPY $vcc %3:vgpr_32, dead %4:sreg_64_xexec = V_ADDC_U32_e64 undef %5:vgpr_32, undef %6:vgpr_32, %2, 0, implicit $exec S_ENDPGM 0, implicit %3 diff --git a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll index 3409cfef880f2..66cd1e81cdee4 100644 --- a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll @@ -609,10 +609,10 @@ entry: ; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] ; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] ; SI-DAG: v_cmp_nlt_f32_e32 vcc, v[[A_F32_1]], v[[B_F32_1]] -; VI-DAG: v_cmp_nlt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] +; VI-DAG: v_cmp_nlt_f16_e32 vcc, v[[B_V2_F16]], v[[A_V2_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] -; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16_1]], v[[B_F16_1]] +; VI: v_cmp_nlt_f16_e32 vcc, v[[B_F16_1]], v[[A_F16_1]] ; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] ; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} ; GCN: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir index 22775ec82714f..1315c227ecde3 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir @@ -1,7 +1,7 @@ # RUN: llc -march=amdgcn -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s # GCN-LABEL: name: fix-sgpr-copies -# GCN: V_ADD_I32_e32 +# GCN: V_ADD_CO_U32_e32 # GCN: V_ADDC_U32_e32 --- name: fix-sgpr-copies diff --git a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll index b2e74d2819b58..4027312c69699 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll @@ -191,9 +191,9 @@ define amdgpu_kernel void @store_flat_i8_max_offset_p1(i8* %fptr, i8 %x) #0 { ; CHECK-LABEL: {{^}}store_flat_i8_neg_offset: ; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}} -; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v +; GFX9: v_add_co_u32_e64 v{{[0-9]+}}, vcc, -2, s ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, -; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:4094{{$}} +; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @store_flat_i8_neg_offset(i8* %fptr, i8 %x) #0 { %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2 store volatile i8 %x, i8* %fptr.offset @@ -220,9 +220,9 @@ define amdgpu_kernel void @load_flat_i8_max_offset_p1(i8* %fptr) #0 { ; CHECK-LABEL: {{^}}load_flat_i8_neg_offset: ; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}} -; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v +; GFX9: v_add_co_u32_e64 v{{[0-9]+}}, vcc, -2, s ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, -; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4094{{$}} +; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}} define amdgpu_kernel void @load_flat_i8_neg_offset(i8* %fptr) #0 { %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2 %val = load volatile i8, i8* %fptr.offset diff --git a/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir b/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir index 3cc0f8d9a1d20..cfa623fa1ebf7 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir +++ b/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir @@ -58,17 +58,17 @@ body: | %4 = S_LOAD_DWORDX2_IMM %1, 8, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) %7 = V_LSHLREV_B32_e32 2, %0, implicit $exec %2 = V_MOV_B32_e32 0, implicit $exec - undef %12.sub0 = V_ADD_I32_e32 %4.sub0, %7, implicit-def $vcc, implicit $exec + undef %12.sub0 = V_ADD_CO_U32_e32 %4.sub0, %7, implicit-def $vcc, implicit $exec %11 = COPY %4.sub1 %12.sub1 = V_ADDC_U32_e32 %11, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec %5 = FLAT_LOAD_DWORD %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep1) - undef %9.sub0 = V_ADD_I32_e32 %3.sub0, %7, implicit-def $vcc, implicit $exec + undef %9.sub0 = V_ADD_CO_U32_e32 %3.sub0, %7, implicit-def $vcc, implicit $exec %8 = COPY %3.sub1 %9.sub1 = V_ADDC_U32_e32 %8, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec - undef %13.sub0 = V_ADD_I32_e32 16, %12.sub0, implicit-def $vcc, implicit $exec + undef %13.sub0 = V_ADD_CO_U32_e32 16, %12.sub0, implicit-def $vcc, implicit $exec %13.sub1 = V_ADDC_U32_e32 %12.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec %6 = FLAT_LOAD_DWORD %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep34) - undef %10.sub0 = V_ADD_I32_e32 16, %9.sub0, implicit-def $vcc, implicit $exec + undef %10.sub0 = V_ADD_CO_U32_e32 16, %9.sub0, implicit-def $vcc, implicit $exec %10.sub1 = V_ADDC_U32_e32 %9.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec FLAT_STORE_DWORD %9, %5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep2) FLAT_STORE_DWORD %10, %6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep4) diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll index 1a11c5b6c9a96..7b38b79c78a15 100644 --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll @@ -17,8 +17,8 @@ ; FIXME: Should probably test this, but sometimes selecting fmac is painful to match. ; XUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX906 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx1030 -mattr=-fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH-STRICT,GCN-FLUSH-FMAC,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx1030 -mattr=+fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM,GCN-DENORM-FASTFMA-STRICT,GCN-DENORM-STRICT %s +; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx1030 -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-FMAC,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX1030 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx1030 -denormal-fp-math-f32=ieee -mattr=+mad-mac-f32-insts -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GCN-DENORM-FASTFMA-STRICT,GFX1030 %s ; Test all permutations of: fp32 denormals, fast fp contract, fp contract enabled for fmuladd, fmaf fast/slow. diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll index 108b5830f22a3..54ccc8fd870d4 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -191,7 +191,7 @@ define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* % ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} ; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] -; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]] +; GCN-SAFE-DAG: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]] ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]] ; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} @@ -1343,9 +1343,9 @@ define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] -; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} -; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]] -; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]] +; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} +; GCN-SAFE-DAG: v_fma_f32 [[FMA:v[0-9]+]] +; GCN-SAFE-DAG: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]] ; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]] ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]] diff --git a/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir b/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir index d5058c026a10d..473193a2a3b4d 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir @@ -16,11 +16,11 @@ body: | ; GCN: liveins: $vgpr0 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec %1:vgpr_32 = COPY $vgpr0 - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -40,11 +40,11 @@ body: | ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -64,11 +64,11 @@ body: | ; GCN: liveins: $sgpr0 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]] + ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec %1:sreg_32_xm0 = COPY $sgpr0 - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -88,11 +88,11 @@ body: | ; GCN: liveins: $sgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[COPY]], 0, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]] + ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[COPY]], 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] %0:sreg_32_xm0 = COPY $sgpr0 %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -112,11 +112,11 @@ body: | ; GCN: liveins: $vgpr0 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 %stack.0 %1:vgpr_32 = COPY $vgpr0 - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -136,11 +136,11 @@ body: | ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] %0:vgpr_32 = COPY $vgpr0 %1:sreg_32_xm0 = S_MOV_B32 %stack.0 - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -157,11 +157,11 @@ body: | ; GCN-LABEL: name: shrink_vgpr_imm_fi_vgpr_v_add_i32_e64_no_carry_out_use ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 16, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 16, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -178,11 +178,11 @@ body: | ; GCN-LABEL: name: shrink_vgpr_imm_vgpr_fi_v_add_i32_e64_no_carry_out_use ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 16, [[V_MOV_B32_e32_]], 0, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]] + ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 16, [[V_MOV_B32_e32_]], 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] %0:vgpr_32 = V_MOV_B32_e32 16, implicit $exec %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -199,11 +199,11 @@ body: | ; GCN-LABEL: name: shrink_vgpr_k_fi_vgpr_v_add_i32_e64_no_carry_out_use ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 1234, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 1234, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec %1:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -220,11 +220,11 @@ body: | ; GCN-LABEL: name: shrink_vgpr_k_vgpr_fi_v_add_i32_e64_no_carry_out_use ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 %stack.0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] %0:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir index c026c5c0e6b5d..079147083863b 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir @@ -13,7 +13,7 @@ body: | ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_other_carry_out_use ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc ; GCN: S_ENDPGM 0, implicit [[COPY]] %0:sreg_32_xm0 = S_MOV_B32 12345 @@ -21,7 +21,7 @@ body: | %2:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = IMPLICIT_DEF - %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %5 ... @@ -36,17 +36,17 @@ body: | ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec - ; GCN: [[V_ADD_I32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF1]], 0, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_1]], implicit [[V_ADD_I32_e64_2]] + ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec + ; GCN: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[DEF1]], 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_1]], implicit [[V_ADD_CO_U32_e64_2]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = IMPLICIT_DEF %4:vgpr_32 = IMPLICIT_DEF - %5:vgpr_32, %6:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec - %7:vgpr_32, %8:sreg_64_xexec = V_ADD_I32_e64 %0, %2, 0, implicit $exec + %5:vgpr_32, %6:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec + %7:vgpr_32, %8:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %2, 0, implicit $exec S_ENDPGM 0, implicit %6, implicit %7 ... @@ -62,15 +62,15 @@ body: | ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_dbg_only_carry_out_use ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec ; GCN: DBG_VALUE %5:sreg_64_xexec, $noreg - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = IMPLICIT_DEF - %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec DBG_VALUE %5, $noreg S_ENDPGM 0, implicit %4 @@ -90,7 +90,7 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[COPY]], 0, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]] @@ -99,7 +99,7 @@ body: | %2:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = IMPLICIT_DEF - %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec %6:vgpr_32, %7:sreg_64_xexec = V_ADDC_U32_e64 %2, %3, %5, 0, implicit $exec S_ENDPGM 0, implicit %6 diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir index 865c84ad8fce4..aec3f28f12932 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir @@ -11,11 +11,11 @@ body: | ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_no_carry_out_use ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -30,11 +30,11 @@ body: | ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_add_i32_e64_no_carry_out_use ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32_xm0 = S_MOV_B32 12345 - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -48,11 +48,11 @@ body: | ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -70,11 +70,11 @@ body: | ; GCN-LABEL: name: shrink_vector_imm_sgpr_v_add_i32_e64_no_carry_out_use ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec ; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[DEF]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]] + ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[DEF]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] %0:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec %1:sreg_32_xm0 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -89,11 +89,11 @@ body: | ; GCN-LABEL: name: shrink_sgpr_vector_imm_v_add_i32_e64_no_carry_out_use ; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[DEF]], 0, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]] + ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[DEF]], 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] %0:sreg_32_xm0 = IMPLICIT_DEF %1:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -109,12 +109,12 @@ body: | ; GCN: $vcc = S_MOV_B64 -1 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc + ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit $vcc $vcc = S_MOV_B64 -1 %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2, implicit $vcc ... @@ -131,16 +131,16 @@ body: | ; GCN: $vcc = S_MOV_B64 -1 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec + ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec ; GCN: bb.1: ; GCN: liveins: $vcc - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit $vcc bb.0: successors: %bb.1 $vcc = S_MOV_B64 -1 %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec bb.1: liveins: $vcc @@ -158,16 +158,16 @@ body: | ; GCN: successors: %bb.1(0x80000000) ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec + ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec ; GCN: bb.1: ; GCN: liveins: $vcc_lo - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc_lo + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit $vcc_lo bb.0: successors: %bb.1 $vcc = S_MOV_B64 -1 %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec bb.1: liveins: $vcc_lo @@ -190,8 +190,8 @@ body: | ; GCN: liveins: $vcc ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc_lo + ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit $vcc_lo bb.0: successors: %bb.1 $vcc = S_MOV_B64 -1 @@ -200,7 +200,7 @@ body: | liveins: $vcc %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2, implicit $vcc_lo ... @@ -219,10 +219,10 @@ body: | ; GCN: liveins: $vcc_hi ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec + ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec ; GCN: bb.2: ; GCN: liveins: $vcc_hi - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc_hi + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit $vcc_hi bb.0: successors: %bb.1 $vcc_hi = S_MOV_B32 -1 @@ -231,7 +231,7 @@ body: | liveins: $vcc_hi %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec bb.2: liveins: $vcc_hi @@ -250,11 +250,11 @@ body: | ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_SUB_I32_e32_]] + ; GCN: [[V_SUB_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_SUB_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -269,11 +269,11 @@ body: | ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 - ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_I32_e32_]] + ; GCN: [[V_SUBREV_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_CO_U32_e32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32_xm0 = S_MOV_B32 12345 - %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_SUB_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -288,11 +288,11 @@ body: | ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_I32_e32_]] + ; GCN: [[V_SUBREV_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_CO_U32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_SUBREV_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -307,11 +307,11 @@ body: | ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 - ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_SUB_I32_e32_]] + ; GCN: [[V_SUB_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32_xm0 = S_MOV_B32 12345 - %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_SUBREV_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -329,9 +329,9 @@ body: | ; GCN: successors: %bb.1(0x80000000) ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec ; GCN: bb.1: - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] bb.0: successors: %bb.1 @@ -367,7 +367,7 @@ body: | S_NOP 0 %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_NOP 0 S_NOP 0 @@ -390,16 +390,16 @@ body: | ; GCN: successors: %bb.1(0x80000000) ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec ; GCN: bb.1: - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] bb.0: successors: %bb.1 S_NOP 0, implicit-def $vcc %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_NOP 0 S_NOP 0 @@ -448,8 +448,8 @@ body: | ; GCN: DBG_VALUE $noreg, 0 ; GCN: DBG_VALUE $noreg, 0 ; GCN: DBG_VALUE $noreg, 0 - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF DBG_VALUE $noreg, 0 @@ -480,7 +480,7 @@ body: | DBG_VALUE $noreg, 0 DBG_VALUE $noreg, 0 DBG_VALUE $noreg, 0 - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -497,7 +497,7 @@ body: | ; GCN-LABEL: name: vcc_liveness_dbg_value_search_after ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec ; GCN: DBG_VALUE $noreg, 0 ; GCN: DBG_VALUE $noreg, 0 ; GCN: DBG_VALUE $noreg, 0 @@ -526,7 +526,7 @@ body: | ; GCN: DBG_VALUE $noreg, 0 ; GCN: DBG_VALUE $noreg, 0 ; GCN: DBG_VALUE $noreg, 0 - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF S_NOP 0 @@ -557,7 +557,7 @@ body: | S_NOP 0 S_NOP 0 S_NOP 0 - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec DBG_VALUE $noreg, 0 DBG_VALUE $noreg, 0 DBG_VALUE $noreg, 0 @@ -601,11 +601,11 @@ body: | ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 killed [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec - %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 killed %1, %0, 0, implicit $exec + %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 killed %1, %0, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -619,11 +619,11 @@ body: | ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], killed [[COPY]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] + ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], killed [[COPY]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec - %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %1, killed %0, 0, implicit $exec + %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %1, killed %0, 0, implicit $exec S_ENDPGM 0, implicit %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir b/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir index 9b6b086c5c42f..778b6fc1ae582 100644 --- a/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir +++ b/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir @@ -103,7 +103,7 @@ body: | %22:vgpr_32 = COPY %14.sub0 %23:sgpr_32 = COPY %4.sub1 %24:vgpr_32 = COPY %14.sub1 - %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21, %22, 0, implicit $exec + %17:vgpr_32, %19:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %22, 0, implicit $exec %25:vgpr_32 = COPY %23 %18:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 %25, %24, killed %19, 0, implicit $exec %16:vreg_64 = REG_SEQUENCE %17, %subreg.sub0, %18, %subreg.sub1 diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr.ll b/llvm/test/CodeGen/AMDGPU/global-saddr.ll index 8a3d1d3053f9b..136cfd63686c8 100644 --- a/llvm/test/CodeGen/AMDGPU/global-saddr.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr.ll @@ -46,8 +46,8 @@ entry: ; Test various offset boundaries. ; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:4088{{$}} +; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:2040{{$}} ; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:4088{{$}} -; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:2056{{$}} %gep11 = getelementptr inbounds i64, i64 addrspace(1)* %gep, i64 511 %load11 = load i64, i64 addrspace(1)* %gep11 %gep12 = getelementptr inbounds i64, i64 addrspace(1)* %gep, i64 1023 diff --git a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll index cf17589f135d2..ffa17c94a93ad 100644 --- a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll +++ b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll @@ -289,35 +289,35 @@ bb3: ; preds = %bb3, %bb define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) { ; GFX9-LABEL: urem16_invariant_denom: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_load_dword s3, s[0:1], 0x2c -; GFX9-NEXT: s_mov_b32 s2, 0xffff -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX9-NEXT: s_mov_b32 s4, 0xffff +; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-NEXT: s_movk_i32 s6, 0x400 +; GFX9-NEXT: s_movk_i32 s8, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_and_b32 s3, s2, s3 -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX9-NEXT: s_and_b32 s5, s4, s2 +; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s5 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v0 ; GFX9-NEXT: BB5_1: ; %bb3 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-NEXT: v_and_b32_e32 v2, s2, v4 +; GFX9-NEXT: v_and_b32_e32 v2, s4, v4 ; GFX9-NEXT: v_cvt_f32_u32_e32 v8, v2 ; GFX9-NEXT: v_lshlrev_b64 v[5:6], 1, v[2:3] -; GFX9-NEXT: v_mov_b32_e32 v7, s5 -; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], s4, v5 -; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], v7, v6, s[0:1] -; GFX9-NEXT: v_mul_f32_e32 v7, v8, v1 -; GFX9-NEXT: v_trunc_f32_e32 v7, v7 -; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v7 -; GFX9-NEXT: v_mad_f32 v7, -v7, v0, v8 -; GFX9-NEXT: v_cmp_ge_f32_e64 s[0:1], |v7|, v0 ; GFX9-NEXT: v_add_u16_e32 v4, 1, v4 -; GFX9-NEXT: v_addc_co_u32_e64 v7, s[0:1], 0, v9, s[0:1] -; GFX9-NEXT: v_mul_lo_u32 v7, v7, s3 -; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s6, v4 +; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s8, v4 +; GFX9-NEXT: v_mul_f32_e32 v9, v8, v1 +; GFX9-NEXT: v_trunc_f32_e32 v9, v9 +; GFX9-NEXT: v_cvt_u32_f32_e32 v10, v9 +; GFX9-NEXT: v_mad_f32 v8, -v9, v0, v8 +; GFX9-NEXT: v_cmp_ge_f32_e64 s[2:3], |v8|, v0 +; GFX9-NEXT: v_mov_b32_e32 v7, s7 +; GFX9-NEXT: v_addc_co_u32_e64 v8, s[2:3], 0, v10, s[2:3] +; GFX9-NEXT: v_mul_lo_u32 v8, v8, s5 +; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], s6, v5 ; GFX9-NEXT: s_and_b64 vcc, exec, vcc -; GFX9-NEXT: v_sub_u32_e32 v2, v2, v7 +; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], v7, v6, s[0:1] +; GFX9-NEXT: v_sub_u32_e32 v2, v2, v8 ; GFX9-NEXT: global_store_short v[5:6], v2, off ; GFX9-NEXT: s_cbranch_vccz BB5_1 ; GFX9-NEXT: ; %bb.2: ; %bb2 @@ -398,38 +398,38 @@ define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %a ; GFX9-LABEL: srem16_invariant_denom: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-NEXT: s_movk_i32 s3, 0x400 +; GFX9-NEXT: s_movk_i32 s5, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_sext_i32_i16 s2, s2 -; GFX9-NEXT: v_cvt_f32_i32_e32 v0, s2 +; GFX9-NEXT: s_sext_i32_i16 s4, s2 +; GFX9-NEXT: v_cvt_f32_i32_e32 v0, s4 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v0 ; GFX9-NEXT: BB7_1: ; %bb3 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_bfe_i32 v7, v4, 0, 16 -; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v4 ; GFX9-NEXT: v_cvt_f32_i32_e32 v10, v7 +; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v4 +; GFX9-NEXT: v_xor_b32_e32 v9, s4, v7 ; GFX9-NEXT: v_lshlrev_b64 v[5:6], 1, v[2:3] -; GFX9-NEXT: v_mov_b32_e32 v8, s5 -; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], s4, v5 -; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], v8, v6, s[0:1] -; GFX9-NEXT: v_mul_f32_e32 v8, v10, v1 -; GFX9-NEXT: v_xor_b32_e32 v9, s2, v7 -; GFX9-NEXT: v_trunc_f32_e32 v8, v8 ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 30, v9 -; GFX9-NEXT: v_cvt_i32_f32_e32 v9, v8 -; GFX9-NEXT: v_mad_f32 v8, -v8, v0, v10 +; GFX9-NEXT: v_mul_f32_e32 v9, v10, v1 +; GFX9-NEXT: v_trunc_f32_e32 v9, v9 +; GFX9-NEXT: v_cvt_i32_f32_e32 v11, v9 +; GFX9-NEXT: v_mad_f32 v9, -v9, v0, v10 ; GFX9-NEXT: v_or_b32_e32 v2, 1, v2 -; GFX9-NEXT: v_cmp_ge_f32_e64 s[0:1], |v8|, |v0| -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] -; GFX9-NEXT: v_add_u32_e32 v2, v9, v2 -; GFX9-NEXT: v_mul_lo_u32 v2, v2, s2 +; GFX9-NEXT: v_cmp_ge_f32_e64 s[2:3], |v9|, |v0| +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[2:3] +; GFX9-NEXT: v_add_u32_e32 v2, v11, v2 +; GFX9-NEXT: v_mul_lo_u32 v2, v2, s4 ; GFX9-NEXT: v_add_u16_e32 v4, 1, v4 -; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s3, v4 +; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s5, v4 +; GFX9-NEXT: v_mov_b32_e32 v8, s7 +; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], s6, v5 ; GFX9-NEXT: s_and_b64 vcc, exec, vcc ; GFX9-NEXT: v_sub_u32_e32 v2, v7, v2 +; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], v8, v6, s[0:1] ; GFX9-NEXT: global_store_short v[5:6], v2, off ; GFX9-NEXT: s_cbranch_vccz BB7_1 ; GFX9-NEXT: ; %bb.2: ; %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/idot2.ll b/llvm/test/CodeGen/AMDGPU/idot2.ll index ff4c874ec7b79..0ec3dec2b8c7c 100644 --- a/llvm/test/CodeGen/AMDGPU/idot2.ll +++ b/llvm/test/CodeGen/AMDGPU/idot2.ll @@ -2591,20 +2591,20 @@ define amdgpu_kernel void @udot2_acc16(<2 x i16> addrspace(1)* %src1, ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ushort v2, v[0:1] -; GFX8-NEXT: s_load_dword s1, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX8-NEXT: s_mov_b32 s0, 0xffff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s3, s1, s0 -; GFX8-NEXT: s_lshr_b32 s1, s1, 16 -; GFX8-NEXT: s_and_b32 s0, s2, s0 +; GFX8-NEXT: s_and_b32 s3, s2, s0 ; GFX8-NEXT: s_lshr_b32 s2, s2, 16 -; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: s_and_b32 s0, s1, s0 +; GFX8-NEXT: s_lshr_b32 s1, s1, 16 +; GFX8-NEXT: v_mov_b32_e32 v3, s2 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mad_u32_u24 v2, s2, v3, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v3, v2 ; GFX8-NEXT: v_mov_b32_e32 v3, s3 ; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX8-NEXT: flat_store_short v[0:1], v2 @@ -2615,20 +2615,20 @@ define amdgpu_kernel void @udot2_acc16(<2 x i16> addrspace(1)* %src1, ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_load_ushort v2, v[0:1], off -; GFX9-NODL-NEXT: s_load_dword s1, s[6:7], 0x0 +; GFX9-NODL-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX9-NODL-NEXT: s_mov_b32 s0, 0xffff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s3, s1, s0 -; GFX9-NODL-NEXT: s_lshr_b32 s1, s1, 16 -; GFX9-NODL-NEXT: s_and_b32 s0, s2, s0 +; GFX9-NODL-NEXT: s_and_b32 s3, s2, s0 ; GFX9-NODL-NEXT: s_lshr_b32 s2, s2, 16 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NODL-NEXT: s_and_b32 s0, s1, s0 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s1, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s2 ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v3, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v3, v2 ; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s3 ; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-NODL-NEXT: global_store_short v[0:1], v2, off @@ -2728,19 +2728,19 @@ define amdgpu_kernel void @notsdot2_sext8(<2 x i8> addrspace(1)* %src1, ; GFX8-NEXT: v_mov_b32_e32 v1, s5 ; GFX8-NEXT: v_mov_b32_e32 v2, s6 ; GFX8-NEXT: v_mov_b32_e32 v3, s7 -; GFX8-NEXT: flat_load_ushort v2, v[2:3] ; GFX8-NEXT: flat_load_ushort v0, v[0:1] +; GFX8-NEXT: flat_load_ushort v1, v[2:3] ; GFX8-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt vmcnt(1) lgkmcnt(0) -; GFX8-NEXT: v_bfe_i32 v3, v2, 0, 8 -; GFX8-NEXT: v_lshrrev_b16_e32 v2, 8, v2 -; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_bfe_i32 v1, v0, 0, 8 +; GFX8-NEXT: v_bfe_i32 v2, v0, 0, 8 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 8, v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_bfe_i32 v3, v1, 0, 8 +; GFX8-NEXT: v_lshrrev_b16_e32 v1, 8, v1 ; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX8-NEXT: v_mad_i32_i24 v0, v2, v0, s2 -; GFX8-NEXT: v_mad_i32_i24 v2, v3, v1, v0 +; GFX8-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX8-NEXT: v_mad_i32_i24 v0, v1, v0, s2 +; GFX8-NEXT: v_mad_i32_i24 v2, v3, v2, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -2755,20 +2755,20 @@ define amdgpu_kernel void @notsdot2_sext8(<2 x i8> addrspace(1)* %src1, ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s6 ; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s7 -; GFX9-NODL-NEXT: global_load_ushort v2, v[2:3], off ; GFX9-NODL-NEXT: global_load_ushort v0, v[0:1], off +; GFX9-NODL-NEXT: global_load_ushort v1, v[2:3], off ; GFX9-NODL-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NODL-NEXT: s_waitcnt vmcnt(1) -; GFX9-NODL-NEXT: v_bfe_i32 v3, v2, 0, 8 -; GFX9-NODL-NEXT: v_lshrrev_b16_e32 v2, 8, v2 -; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) -; GFX9-NODL-NEXT: v_bfe_i32 v1, v0, 0, 8 +; GFX9-NODL-NEXT: v_bfe_i32 v2, v0, 0, 8 ; GFX9-NODL-NEXT: v_lshrrev_b16_e32 v0, 8, v0 +; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) +; GFX9-NODL-NEXT: v_bfe_i32 v3, v1, 0, 8 +; GFX9-NODL-NEXT: v_lshrrev_b16_e32 v1, 8, v1 ; GFX9-NODL-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX9-NODL-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX9-NODL-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: v_mad_i32_i24 v0, v2, v0, s2 -; GFX9-NODL-NEXT: v_mad_i32_i24 v2, v3, v1, v0 +; GFX9-NODL-NEXT: v_mad_i32_i24 v0, v1, v0, s2 +; GFX9-NODL-NEXT: v_mad_i32_i24 v2, v3, v2, v0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off @@ -2783,20 +2783,20 @@ define amdgpu_kernel void @notsdot2_sext8(<2 x i8> addrspace(1)* %src1, ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-DL-NEXT: v_mov_b32_e32 v2, s6 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, s7 -; GFX9-DL-NEXT: global_load_ushort v2, v[2:3], off ; GFX9-DL-NEXT: global_load_ushort v0, v[0:1], off +; GFX9-DL-NEXT: global_load_ushort v1, v[2:3], off ; GFX9-DL-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-DL-NEXT: s_waitcnt vmcnt(1) -; GFX9-DL-NEXT: v_bfe_i32 v3, v2, 0, 8 -; GFX9-DL-NEXT: v_lshrrev_b16_e32 v2, 8, v2 -; GFX9-DL-NEXT: s_waitcnt vmcnt(0) -; GFX9-DL-NEXT: v_bfe_i32 v1, v0, 0, 8 +; GFX9-DL-NEXT: v_bfe_i32 v2, v0, 0, 8 ; GFX9-DL-NEXT: v_lshrrev_b16_e32 v0, 8, v0 +; GFX9-DL-NEXT: s_waitcnt vmcnt(0) +; GFX9-DL-NEXT: v_bfe_i32 v3, v1, 0, 8 +; GFX9-DL-NEXT: v_lshrrev_b16_e32 v1, 8, v1 ; GFX9-DL-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX9-DL-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX9-DL-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: v_mad_i32_i24 v0, v2, v0, s2 -; GFX9-DL-NEXT: v_mad_i32_i24 v2, v3, v1, v0 +; GFX9-DL-NEXT: v_mad_i32_i24 v0, v1, v0, s2 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, v3, v2, v0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-DL-NEXT: global_store_dword v[0:1], v2, off diff --git a/llvm/test/CodeGen/AMDGPU/idot4s.ll b/llvm/test/CodeGen/AMDGPU/idot4s.ll index 0cce3a655ec73..629538ac1bc9d 100644 --- a/llvm/test/CodeGen/AMDGPU/idot4s.ll +++ b/llvm/test/CodeGen/AMDGPU/idot4s.ll @@ -221,28 +221,28 @@ define amdgpu_kernel void @idot4_acc16(<4 x i8> addrspace(1)* %src1, ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ushort v2, v[0:1] ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_sext_i32_i8 s3, s2 -; GFX8-NEXT: s_bfe_i32 s5, s2, 0x80008 +; GFX8-NEXT: s_sext_i32_i8 s2, s0 +; GFX8-NEXT: s_sext_i32_i8 s3, s1 +; GFX8-NEXT: s_bfe_i32 s5, s1, 0x80008 ; GFX8-NEXT: v_mov_b32_e32 v3, s3 -; GFX8-NEXT: s_bfe_i32 s7, s2, 0x80010 -; GFX8-NEXT: s_sext_i32_i8 s1, s0 +; GFX8-NEXT: s_bfe_i32 s7, s1, 0x80010 ; GFX8-NEXT: s_bfe_i32 s4, s0, 0x80008 ; GFX8-NEXT: v_mov_b32_e32 v4, s5 ; GFX8-NEXT: s_bfe_i32 s6, s0, 0x80010 -; GFX8-NEXT: s_ashr_i32 s2, s2, 24 +; GFX8-NEXT: s_ashr_i32 s1, s1, 24 ; GFX8-NEXT: v_mov_b32_e32 v5, s7 ; GFX8-NEXT: s_ashr_i32 s0, s0, 24 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mad_i32_i24 v2, s1, v3, v2 +; GFX8-NEXT: v_mad_i32_i24 v2, s2, v3, v2 ; GFX8-NEXT: v_mad_i32_i24 v2, s4, v4, v2 ; GFX8-NEXT: v_mad_i32_i24 v2, s6, v5, v2 -; GFX8-NEXT: v_mov_b32_e32 v3, s2 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_mad_i32_i24 v2, s0, v3, v2 ; GFX8-NEXT: flat_store_short v[0:1], v2 ; GFX8-NEXT: s_endpgm @@ -252,28 +252,28 @@ define amdgpu_kernel void @idot4_acc16(<4 x i8> addrspace(1)* %src1, ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_load_ushort v2, v[0:1], off ; GFX9-NODL-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_sext_i32_i8 s3, s2 -; GFX9-NODL-NEXT: s_bfe_i32 s5, s2, 0x80008 +; GFX9-NODL-NEXT: s_sext_i32_i8 s2, s0 +; GFX9-NODL-NEXT: s_sext_i32_i8 s3, s1 +; GFX9-NODL-NEXT: s_bfe_i32 s5, s1, 0x80008 ; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NODL-NEXT: s_bfe_i32 s7, s2, 0x80010 -; GFX9-NODL-NEXT: s_sext_i32_i8 s1, s0 +; GFX9-NODL-NEXT: s_bfe_i32 s7, s1, 0x80010 ; GFX9-NODL-NEXT: s_bfe_i32 s4, s0, 0x80008 ; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s5 ; GFX9-NODL-NEXT: s_bfe_i32 s6, s0, 0x80010 -; GFX9-NODL-NEXT: s_ashr_i32 s2, s2, 24 +; GFX9-NODL-NEXT: s_ashr_i32 s1, s1, 24 ; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s7 ; GFX9-NODL-NEXT: s_ashr_i32 s0, s0, 24 ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) -; GFX9-NODL-NEXT: v_mad_i32_i24 v2, s1, v3, v2 +; GFX9-NODL-NEXT: v_mad_i32_i24 v2, s2, v3, v2 ; GFX9-NODL-NEXT: v_mad_i32_i24 v2, s4, v4, v2 ; GFX9-NODL-NEXT: v_mad_i32_i24 v2, s6, v5, v2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NODL-NEXT: v_mad_i32_i24 v2, s0, v3, v2 ; GFX9-NODL-NEXT: global_store_short v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm @@ -357,28 +357,28 @@ define amdgpu_kernel void @idot4_acc8(<4 x i8> addrspace(1)* %src1, ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: s_movk_i32 s8, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s6, s[6:7], 0x0 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_movk_i32 s5, 0xff +; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s7, s6, s5 -; GFX7-NEXT: s_bfe_u32 s8, s6, 0x80008 -; GFX7-NEXT: s_and_b32 s5, s4, s5 -; GFX7-NEXT: v_mov_b32_e32 v1, s7 -; GFX7-NEXT: s_bfe_u32 s10, s6, 0x80010 +; GFX7-NEXT: s_and_b32 s7, s4, s8 +; GFX7-NEXT: s_and_b32 s6, s5, s8 +; GFX7-NEXT: s_bfe_u32 s8, s5, 0x80008 +; GFX7-NEXT: v_mov_b32_e32 v1, s6 +; GFX7-NEXT: s_bfe_u32 s10, s5, 0x80010 ; GFX7-NEXT: s_bfe_u32 s9, s4, 0x80008 ; GFX7-NEXT: v_mov_b32_e32 v2, s8 ; GFX7-NEXT: s_bfe_u32 s11, s4, 0x80010 -; GFX7-NEXT: s_lshr_b32 s6, s6, 24 +; GFX7-NEXT: s_lshr_b32 s5, s5, 24 ; GFX7-NEXT: v_mov_b32_e32 v3, s10 ; GFX7-NEXT: s_lshr_b32 s4, s4, 24 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s7, v1, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s9, v2, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s11, v3, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 ; GFX7-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm @@ -388,30 +388,30 @@ define amdgpu_kernel void @idot4_acc8(<4 x i8> addrspace(1)* %src1, ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1] -; GFX8-NEXT: s_load_dword s1, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_bfe_u32 s5, s2, 0x80008 -; GFX8-NEXT: s_bfe_u32 s7, s2, 0x80010 -; GFX8-NEXT: s_and_b32 s3, s1, s0 -; GFX8-NEXT: s_and_b32 s0, s2, s0 -; GFX8-NEXT: s_bfe_u32 s4, s1, 0x80008 +; GFX8-NEXT: s_bfe_u32 s5, s1, 0x80008 +; GFX8-NEXT: s_and_b32 s3, s2, s0 +; GFX8-NEXT: s_bfe_u32 s4, s2, 0x80008 +; GFX8-NEXT: s_and_b32 s0, s1, s0 ; GFX8-NEXT: v_mov_b32_e32 v3, s3 -; GFX8-NEXT: s_bfe_u32 s6, s1, 0x80010 +; GFX8-NEXT: s_bfe_u32 s6, s2, 0x80010 ; GFX8-NEXT: v_mov_b32_e32 v4, s4 -; GFX8-NEXT: s_lshr_b32 s1, s1, 24 -; GFX8-NEXT: v_mov_b32_e32 v5, s6 +; GFX8-NEXT: s_bfe_u32 s7, s1, 0x80010 ; GFX8-NEXT: s_lshr_b32 s2, s2, 24 +; GFX8-NEXT: v_mov_b32_e32 v5, s6 +; GFX8-NEXT: s_lshr_b32 s1, s1, 24 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX8-NEXT: v_mad_u32_u24 v2, s5, v4, v2 ; GFX8-NEXT: v_mad_u32_u24 v2, s7, v5, v2 -; GFX8-NEXT: v_mov_b32_e32 v3, s1 -; GFX8-NEXT: v_mad_u32_u24 v2, s2, v3, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s2 +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v3, v2 ; GFX8-NEXT: flat_store_byte v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -420,30 +420,30 @@ define amdgpu_kernel void @idot4_acc8(<4 x i8> addrspace(1)* %src1, ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-NODL-NEXT: s_load_dword s1, s[6:7], 0x0 +; GFX9-NODL-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX9-NODL-NEXT: s_movk_i32 s0, 0xff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_bfe_u32 s5, s2, 0x80008 -; GFX9-NODL-NEXT: s_bfe_u32 s7, s2, 0x80010 -; GFX9-NODL-NEXT: s_and_b32 s3, s1, s0 -; GFX9-NODL-NEXT: s_and_b32 s0, s2, s0 -; GFX9-NODL-NEXT: s_bfe_u32 s4, s1, 0x80008 +; GFX9-NODL-NEXT: s_bfe_u32 s5, s1, 0x80008 +; GFX9-NODL-NEXT: s_and_b32 s3, s2, s0 +; GFX9-NODL-NEXT: s_bfe_u32 s4, s2, 0x80008 +; GFX9-NODL-NEXT: s_and_b32 s0, s1, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NODL-NEXT: s_bfe_u32 s6, s1, 0x80010 +; GFX9-NODL-NEXT: s_bfe_u32 s6, s2, 0x80010 ; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s4 -; GFX9-NODL-NEXT: s_lshr_b32 s1, s1, 24 -; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s6 +; GFX9-NODL-NEXT: s_bfe_u32 s7, s1, 0x80010 ; GFX9-NODL-NEXT: s_lshr_b32 s2, s2, 24 +; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s6 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s1, 24 ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) ; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s5, v4, v2 ; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s7, v5, v2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v3, v2 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v3, v2 ; GFX9-NODL-NEXT: global_store_byte v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/idot4u.ll b/llvm/test/CodeGen/AMDGPU/idot4u.ll index 67936200396ab..aa4dc4e143d91 100644 --- a/llvm/test/CodeGen/AMDGPU/idot4u.ll +++ b/llvm/test/CodeGen/AMDGPU/idot4u.ll @@ -184,28 +184,28 @@ define amdgpu_kernel void @udot4_acc16(<4 x i8> addrspace(1)* %src1, ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: s_movk_i32 s8, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s6, s[6:7], 0x0 ; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_movk_i32 s5, 0xff +; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s7, s6, s5 -; GFX7-NEXT: s_bfe_u32 s8, s6, 0x80008 -; GFX7-NEXT: s_and_b32 s5, s4, s5 -; GFX7-NEXT: v_mov_b32_e32 v1, s7 -; GFX7-NEXT: s_bfe_u32 s10, s6, 0x80010 +; GFX7-NEXT: s_and_b32 s7, s4, s8 +; GFX7-NEXT: s_and_b32 s6, s5, s8 +; GFX7-NEXT: s_bfe_u32 s8, s5, 0x80008 +; GFX7-NEXT: v_mov_b32_e32 v1, s6 +; GFX7-NEXT: s_bfe_u32 s10, s5, 0x80010 ; GFX7-NEXT: s_bfe_u32 s9, s4, 0x80008 ; GFX7-NEXT: v_mov_b32_e32 v2, s8 ; GFX7-NEXT: s_bfe_u32 s11, s4, 0x80010 -; GFX7-NEXT: s_lshr_b32 s6, s6, 24 +; GFX7-NEXT: s_lshr_b32 s5, s5, 24 ; GFX7-NEXT: v_mov_b32_e32 v3, s10 ; GFX7-NEXT: s_lshr_b32 s4, s4, 24 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s7, v1, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s9, v2, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s11, v3, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 ; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm @@ -215,20 +215,20 @@ define amdgpu_kernel void @udot4_acc16(<4 x i8> addrspace(1)* %src1, ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ushort v2, v[0:1] ; GFX8-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_bfe_u32 s5, s2, 0x80008 -; GFX8-NEXT: s_bfe_u32 s7, s2, 0x80010 -; GFX8-NEXT: v_mov_b32_e32 v4, s5 ; GFX8-NEXT: s_and_b32 s3, s1, s0 ; GFX8-NEXT: s_and_b32 s0, s2, s0 +; GFX8-NEXT: s_bfe_u32 s5, s2, 0x80008 ; GFX8-NEXT: v_mov_b32_e32 v3, s0 +; GFX8-NEXT: s_bfe_u32 s7, s2, 0x80010 ; GFX8-NEXT: s_bfe_u32 s4, s1, 0x80008 +; GFX8-NEXT: v_mov_b32_e32 v4, s5 ; GFX8-NEXT: s_bfe_u32 s6, s1, 0x80010 ; GFX8-NEXT: s_lshr_b32 s2, s2, 24 ; GFX8-NEXT: v_mov_b32_e32 v5, s7 @@ -247,20 +247,20 @@ define amdgpu_kernel void @udot4_acc16(<4 x i8> addrspace(1)* %src1, ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_load_ushort v2, v[0:1], off ; GFX9-NODL-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX9-NODL-NEXT: s_movk_i32 s0, 0xff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_bfe_u32 s5, s2, 0x80008 -; GFX9-NODL-NEXT: s_bfe_u32 s7, s2, 0x80010 -; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s5 ; GFX9-NODL-NEXT: s_and_b32 s3, s1, s0 ; GFX9-NODL-NEXT: s_and_b32 s0, s2, s0 +; GFX9-NODL-NEXT: s_bfe_u32 s5, s2, 0x80008 ; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s0 +; GFX9-NODL-NEXT: s_bfe_u32 s7, s2, 0x80010 ; GFX9-NODL-NEXT: s_bfe_u32 s4, s1, 0x80008 +; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s5 ; GFX9-NODL-NEXT: s_bfe_u32 s6, s1, 0x80010 ; GFX9-NODL-NEXT: s_lshr_b32 s2, s2, 24 ; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s7 @@ -354,28 +354,28 @@ define amdgpu_kernel void @udot4_acc8(<4 x i8> addrspace(1)* %src1, ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: s_movk_i32 s8, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s6, s[6:7], 0x0 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_movk_i32 s5, 0xff +; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s7, s6, s5 -; GFX7-NEXT: s_bfe_u32 s8, s6, 0x80008 -; GFX7-NEXT: s_and_b32 s5, s4, s5 -; GFX7-NEXT: v_mov_b32_e32 v1, s7 -; GFX7-NEXT: s_bfe_u32 s10, s6, 0x80010 +; GFX7-NEXT: s_and_b32 s7, s4, s8 +; GFX7-NEXT: s_and_b32 s6, s5, s8 +; GFX7-NEXT: s_bfe_u32 s8, s5, 0x80008 +; GFX7-NEXT: v_mov_b32_e32 v1, s6 +; GFX7-NEXT: s_bfe_u32 s10, s5, 0x80010 ; GFX7-NEXT: s_bfe_u32 s9, s4, 0x80008 ; GFX7-NEXT: v_mov_b32_e32 v2, s8 ; GFX7-NEXT: s_bfe_u32 s11, s4, 0x80010 -; GFX7-NEXT: s_lshr_b32 s6, s6, 24 +; GFX7-NEXT: s_lshr_b32 s5, s5, 24 ; GFX7-NEXT: v_mov_b32_e32 v3, s10 ; GFX7-NEXT: s_lshr_b32 s4, s4, 24 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s7, v1, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s9, v2, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s11, v3, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 ; GFX7-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm @@ -385,30 +385,30 @@ define amdgpu_kernel void @udot4_acc8(<4 x i8> addrspace(1)* %src1, ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1] -; GFX8-NEXT: s_load_dword s1, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_bfe_u32 s5, s2, 0x80008 -; GFX8-NEXT: s_bfe_u32 s7, s2, 0x80010 -; GFX8-NEXT: s_and_b32 s3, s1, s0 -; GFX8-NEXT: s_and_b32 s0, s2, s0 -; GFX8-NEXT: s_bfe_u32 s4, s1, 0x80008 +; GFX8-NEXT: s_bfe_u32 s5, s1, 0x80008 +; GFX8-NEXT: s_and_b32 s3, s2, s0 +; GFX8-NEXT: s_bfe_u32 s4, s2, 0x80008 +; GFX8-NEXT: s_and_b32 s0, s1, s0 ; GFX8-NEXT: v_mov_b32_e32 v3, s3 -; GFX8-NEXT: s_bfe_u32 s6, s1, 0x80010 +; GFX8-NEXT: s_bfe_u32 s6, s2, 0x80010 ; GFX8-NEXT: v_mov_b32_e32 v4, s4 -; GFX8-NEXT: s_lshr_b32 s1, s1, 24 -; GFX8-NEXT: v_mov_b32_e32 v5, s6 +; GFX8-NEXT: s_bfe_u32 s7, s1, 0x80010 ; GFX8-NEXT: s_lshr_b32 s2, s2, 24 +; GFX8-NEXT: v_mov_b32_e32 v5, s6 +; GFX8-NEXT: s_lshr_b32 s1, s1, 24 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX8-NEXT: v_mad_u32_u24 v2, s5, v4, v2 ; GFX8-NEXT: v_mad_u32_u24 v2, s7, v5, v2 -; GFX8-NEXT: v_mov_b32_e32 v3, s1 -; GFX8-NEXT: v_mad_u32_u24 v2, s2, v3, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s2 +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v3, v2 ; GFX8-NEXT: flat_store_byte v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -417,30 +417,30 @@ define amdgpu_kernel void @udot4_acc8(<4 x i8> addrspace(1)* %src1, ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-NODL-NEXT: s_load_dword s1, s[6:7], 0x0 +; GFX9-NODL-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX9-NODL-NEXT: s_movk_i32 s0, 0xff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_bfe_u32 s5, s2, 0x80008 -; GFX9-NODL-NEXT: s_bfe_u32 s7, s2, 0x80010 -; GFX9-NODL-NEXT: s_and_b32 s3, s1, s0 -; GFX9-NODL-NEXT: s_and_b32 s0, s2, s0 -; GFX9-NODL-NEXT: s_bfe_u32 s4, s1, 0x80008 +; GFX9-NODL-NEXT: s_bfe_u32 s5, s1, 0x80008 +; GFX9-NODL-NEXT: s_and_b32 s3, s2, s0 +; GFX9-NODL-NEXT: s_bfe_u32 s4, s2, 0x80008 +; GFX9-NODL-NEXT: s_and_b32 s0, s1, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NODL-NEXT: s_bfe_u32 s6, s1, 0x80010 +; GFX9-NODL-NEXT: s_bfe_u32 s6, s2, 0x80010 ; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s4 -; GFX9-NODL-NEXT: s_lshr_b32 s1, s1, 24 -; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s6 +; GFX9-NODL-NEXT: s_bfe_u32 s7, s1, 0x80010 ; GFX9-NODL-NEXT: s_lshr_b32 s2, s2, 24 +; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s6 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s1, 24 ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) ; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s5, v4, v2 ; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s7, v5, v2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v3, v2 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v3, v2 ; GFX9-NODL-NEXT: global_store_byte v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -1426,28 +1426,28 @@ define amdgpu_kernel void @notdot4_mixedtypes(<4 x i8> addrspace(1)* %src1, ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_mov_b32 s8, 0xffff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s6, s[6:7], 0x0 ; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_sext_i32_i8 s7, s6 -; GFX7-NEXT: s_bfe_u32 s9, s6, 0x80008 -; GFX7-NEXT: s_sext_i32_i8 s5, s4 +; GFX7-NEXT: s_sext_i32_i8 s6, s4 +; GFX7-NEXT: s_sext_i32_i8 s7, s5 +; GFX7-NEXT: s_bfe_u32 s9, s5, 0x80008 ; GFX7-NEXT: s_and_b32 s7, s7, s8 ; GFX7-NEXT: s_bfe_u32 s10, s4, 0x80008 ; GFX7-NEXT: v_mov_b32_e32 v1, s9 -; GFX7-NEXT: s_bfe_u32 s11, s6, 0x80010 -; GFX7-NEXT: s_and_b32 s5, s5, s8 +; GFX7-NEXT: s_bfe_u32 s11, s5, 0x80010 +; GFX7-NEXT: s_and_b32 s6, s6, s8 ; GFX7-NEXT: v_mov_b32_e32 v3, s7 ; GFX7-NEXT: s_bfe_u32 s12, s4, 0x80010 -; GFX7-NEXT: s_lshr_b32 s6, s6, 24 +; GFX7-NEXT: s_lshr_b32 s5, s5, 24 ; GFX7-NEXT: v_mov_b32_e32 v2, s11 ; GFX7-NEXT: s_lshr_b32 s4, s4, 24 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_mad_u32_u24 v0, s10, v1, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v3, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v3, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s12, v2, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 ; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm @@ -1457,28 +1457,28 @@ define amdgpu_kernel void @notdot4_mixedtypes(<4 x i8> addrspace(1)* %src1, ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ushort v2, v[0:1] ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_bfe_u32 s5, s2, 0x80008 -; GFX8-NEXT: s_sext_i32_i8 s3, s2 -; GFX8-NEXT: v_mov_b32_e32 v3, s5 -; GFX8-NEXT: s_bfe_u32 s7, s2, 0x80010 ; GFX8-NEXT: s_bfe_u32 s4, s0, 0x80008 -; GFX8-NEXT: s_sext_i32_i8 s1, s0 +; GFX8-NEXT: s_bfe_u32 s5, s1, 0x80008 +; GFX8-NEXT: s_sext_i32_i8 s3, s1 +; GFX8-NEXT: v_mov_b32_e32 v3, s5 +; GFX8-NEXT: s_bfe_u32 s7, s1, 0x80010 +; GFX8-NEXT: s_sext_i32_i8 s2, s0 ; GFX8-NEXT: v_mov_b32_e32 v4, s3 ; GFX8-NEXT: s_bfe_u32 s6, s0, 0x80010 -; GFX8-NEXT: s_lshr_b32 s2, s2, 24 +; GFX8-NEXT: s_lshr_b32 s1, s1, 24 ; GFX8-NEXT: v_mov_b32_e32 v5, s7 ; GFX8-NEXT: s_lshr_b32 s0, s0, 24 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mad_u32_u24 v2, s4, v3, v2 -; GFX8-NEXT: v_mad_i32_i24 v2, s1, v4, v2 +; GFX8-NEXT: v_mad_i32_i24 v2, s2, v4, v2 ; GFX8-NEXT: v_mad_u32_u24 v2, s6, v5, v2 -; GFX8-NEXT: v_mov_b32_e32 v3, s2 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX8-NEXT: flat_store_short v[0:1], v2 ; GFX8-NEXT: s_endpgm @@ -1488,28 +1488,28 @@ define amdgpu_kernel void @notdot4_mixedtypes(<4 x i8> addrspace(1)* %src1, ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_load_ushort v2, v[0:1], off ; GFX9-NODL-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_bfe_u32 s5, s2, 0x80008 -; GFX9-NODL-NEXT: s_sext_i32_i8 s3, s2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-NODL-NEXT: s_bfe_u32 s7, s2, 0x80010 ; GFX9-NODL-NEXT: s_bfe_u32 s4, s0, 0x80008 -; GFX9-NODL-NEXT: s_sext_i32_i8 s1, s0 +; GFX9-NODL-NEXT: s_bfe_u32 s5, s1, 0x80008 +; GFX9-NODL-NEXT: s_sext_i32_i8 s3, s1 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s5 +; GFX9-NODL-NEXT: s_bfe_u32 s7, s1, 0x80010 +; GFX9-NODL-NEXT: s_sext_i32_i8 s2, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s3 ; GFX9-NODL-NEXT: s_bfe_u32 s6, s0, 0x80010 -; GFX9-NODL-NEXT: s_lshr_b32 s2, s2, 24 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s1, 24 ; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s7 ; GFX9-NODL-NEXT: s_lshr_b32 s0, s0, 24 ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) ; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s4, v3, v2 -; GFX9-NODL-NEXT: v_mad_i32_i24 v2, s1, v4, v2 +; GFX9-NODL-NEXT: v_mad_i32_i24 v2, s2, v4, v2 ; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s6, v5, v2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-NODL-NEXT: global_store_short v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm @@ -1519,28 +1519,28 @@ define amdgpu_kernel void @notdot4_mixedtypes(<4 x i8> addrspace(1)* %src1, ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-DL-NEXT: global_load_ushort v2, v[0:1], off ; GFX9-DL-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_bfe_u32 s5, s2, 0x80008 -; GFX9-DL-NEXT: s_sext_i32_i8 s3, s2 -; GFX9-DL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-DL-NEXT: s_bfe_u32 s7, s2, 0x80010 ; GFX9-DL-NEXT: s_bfe_u32 s4, s0, 0x80008 -; GFX9-DL-NEXT: s_sext_i32_i8 s1, s0 +; GFX9-DL-NEXT: s_bfe_u32 s5, s1, 0x80008 +; GFX9-DL-NEXT: s_sext_i32_i8 s3, s1 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s5 +; GFX9-DL-NEXT: s_bfe_u32 s7, s1, 0x80010 +; GFX9-DL-NEXT: s_sext_i32_i8 s2, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v4, s3 ; GFX9-DL-NEXT: s_bfe_u32 s6, s0, 0x80010 -; GFX9-DL-NEXT: s_lshr_b32 s2, s2, 24 +; GFX9-DL-NEXT: s_lshr_b32 s1, s1, 24 ; GFX9-DL-NEXT: v_mov_b32_e32 v5, s7 ; GFX9-DL-NEXT: s_lshr_b32 s0, s0, 24 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) ; GFX9-DL-NEXT: v_mad_u32_u24 v2, s4, v3, v2 -; GFX9-DL-NEXT: v_mad_i32_i24 v2, s1, v4, v2 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, s2, v4, v2 ; GFX9-DL-NEXT: v_mad_u32_u24 v2, s6, v5, v2 -; GFX9-DL-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-DL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-DL-NEXT: global_store_short v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm @@ -1809,29 +1809,29 @@ define amdgpu_kernel void @udot4_acc16_vecMul(<4 x i8> addrspace(1)* %src1, ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: s_movk_i32 s8, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s6, s[6:7], 0x0 ; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_movk_i32 s7, 0xff +; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_bfe_u32 s10, s6, 0x80008 -; GFX7-NEXT: s_bfe_u32 s12, s6, 0x80010 -; GFX7-NEXT: s_lshr_b32 s9, s6, 24 -; GFX7-NEXT: s_and_b32 s6, s6, s7 -; GFX7-NEXT: s_lshr_b32 s5, s4, 24 -; GFX7-NEXT: s_bfe_u32 s8, s4, 0x80008 +; GFX7-NEXT: s_lshr_b32 s6, s4, 24 +; GFX7-NEXT: s_bfe_u32 s10, s5, 0x80008 +; GFX7-NEXT: s_bfe_u32 s12, s5, 0x80010 +; GFX7-NEXT: s_lshr_b32 s9, s5, 24 +; GFX7-NEXT: s_and_b32 s5, s5, s8 +; GFX7-NEXT: s_bfe_u32 s7, s4, 0x80008 ; GFX7-NEXT: s_bfe_u32 s11, s4, 0x80010 -; GFX7-NEXT: s_and_b32 s4, s4, s7 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 +; GFX7-NEXT: s_and_b32 s4, s4, s8 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v2, s10 ; GFX7-NEXT: v_mov_b32_e32 v3, s12 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s8, v2, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s7, v2, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s11, v3, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s9 -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 ; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/idot8s.ll b/llvm/test/CodeGen/AMDGPU/idot8s.ll index 20df4fff7f4e6..9a88c82b5a85e 100644 --- a/llvm/test/CodeGen/AMDGPU/idot8s.ll +++ b/llvm/test/CodeGen/AMDGPU/idot8s.ll @@ -657,43 +657,43 @@ define amdgpu_kernel void @idot8_acc8(<8 x i4> addrspace(1)* %src1, ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_movk_i32 s2, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1] -; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 +; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_bfe_i32 s6, s3, 0x40000 -; GFX8-NEXT: s_lshr_b32 s4, s3, 12 -; GFX8-NEXT: s_bfe_i32 s8, s3, 0x40004 -; GFX8-NEXT: s_bfe_i32 s10, s3, 0x40008 -; GFX8-NEXT: s_lshr_b32 s1, s0, 12 -; GFX8-NEXT: s_bfe_i32 s5, s0, 0x40000 +; GFX8-NEXT: s_lshr_b32 s3, s1, 12 +; GFX8-NEXT: s_bfe_i32 s6, s2, 0x40000 +; GFX8-NEXT: s_lshr_b32 s4, s2, 12 +; GFX8-NEXT: s_bfe_i32 s8, s2, 0x40004 +; GFX8-NEXT: s_bfe_i32 s10, s2, 0x40008 +; GFX8-NEXT: s_bfe_i32 s5, s1, 0x40000 ; GFX8-NEXT: v_mov_b32_e32 v6, s6 -; GFX8-NEXT: v_lshlrev_b16_e64 v4, 12, s1 +; GFX8-NEXT: v_lshlrev_b16_e64 v4, 12, s3 ; GFX8-NEXT: v_lshlrev_b16_e64 v5, 12, s4 -; GFX8-NEXT: s_bfe_i32 s7, s0, 0x40004 -; GFX8-NEXT: s_bfe_i32 s9, s0, 0x40008 +; GFX8-NEXT: s_bfe_i32 s7, s1, 0x40004 +; GFX8-NEXT: s_bfe_i32 s9, s1, 0x40008 ; GFX8-NEXT: v_mov_b32_e32 v3, s10 ; GFX8-NEXT: v_mov_b32_e32 v7, s8 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 12, v4 ; GFX8-NEXT: v_ashrrev_i16_e32 v5, 12, v5 ; GFX8-NEXT: v_mul_i32_i24_e32 v3, s9, v3 -; GFX8-NEXT: s_bfe_i32 s12, s3, 0x40010 -; GFX8-NEXT: v_and_b32_e32 v4, s2, v4 -; GFX8-NEXT: v_and_b32_e32 v5, s2, v5 -; GFX8-NEXT: s_bfe_i32 s14, s3, 0x40014 -; GFX8-NEXT: s_bfe_i32 s11, s0, 0x40010 +; GFX8-NEXT: s_bfe_i32 s12, s2, 0x40010 +; GFX8-NEXT: v_and_b32_e32 v4, s0, v4 +; GFX8-NEXT: v_and_b32_e32 v5, s0, v5 +; GFX8-NEXT: s_bfe_i32 s14, s2, 0x40014 +; GFX8-NEXT: s_bfe_i32 s11, s1, 0x40010 ; GFX8-NEXT: v_mov_b32_e32 v8, s12 -; GFX8-NEXT: s_bfe_i32 s16, s3, 0x40018 -; GFX8-NEXT: s_bfe_i32 s13, s0, 0x40014 +; GFX8-NEXT: s_bfe_i32 s16, s2, 0x40018 +; GFX8-NEXT: s_bfe_i32 s13, s1, 0x40014 ; GFX8-NEXT: v_mov_b32_e32 v9, s14 -; GFX8-NEXT: s_bfe_i32 s15, s0, 0x40018 -; GFX8-NEXT: s_ashr_i32 s3, s3, 28 +; GFX8-NEXT: s_bfe_i32 s15, s1, 0x40018 +; GFX8-NEXT: s_ashr_i32 s2, s2, 28 ; GFX8-NEXT: v_mov_b32_e32 v10, s16 -; GFX8-NEXT: s_ashr_i32 s0, s0, 28 +; GFX8-NEXT: s_ashr_i32 s1, s1, 28 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mad_i32_i24 v2, s5, v6, v2 ; GFX8-NEXT: v_mad_i32_i24 v2, s7, v7, v2 @@ -702,8 +702,8 @@ define amdgpu_kernel void @idot8_acc8(<8 x i4> addrspace(1)* %src1, ; GFX8-NEXT: v_mad_i32_i24 v2, s11, v8, v2 ; GFX8-NEXT: v_mad_i32_i24 v2, s13, v9, v2 ; GFX8-NEXT: v_mad_i32_i24 v2, s15, v10, v2 -; GFX8-NEXT: v_mov_b32_e32 v3, s3 -; GFX8-NEXT: v_mad_i32_i24 v2, s0, v3, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s2 +; GFX8-NEXT: v_mad_i32_i24 v2, s1, v3, v2 ; GFX8-NEXT: flat_store_byte v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -711,43 +711,43 @@ define amdgpu_kernel void @idot8_acc8(<8 x i4> addrspace(1)* %src1, ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NEXT: s_movk_i32 s2, 0xff ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX9-NEXT: s_load_dword s2, s[6:7], 0x0 +; GFX9-NEXT: s_movk_i32 s0, 0xff ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_bfe_i32 s6, s3, 0x40000 -; GFX9-NEXT: s_lshr_b32 s4, s3, 12 -; GFX9-NEXT: s_bfe_i32 s8, s3, 0x40004 -; GFX9-NEXT: s_bfe_i32 s10, s3, 0x40008 -; GFX9-NEXT: s_lshr_b32 s1, s0, 12 -; GFX9-NEXT: s_bfe_i32 s5, s0, 0x40000 +; GFX9-NEXT: s_lshr_b32 s3, s1, 12 +; GFX9-NEXT: s_bfe_i32 s6, s2, 0x40000 +; GFX9-NEXT: s_lshr_b32 s4, s2, 12 +; GFX9-NEXT: s_bfe_i32 s8, s2, 0x40004 +; GFX9-NEXT: s_bfe_i32 s10, s2, 0x40008 +; GFX9-NEXT: s_bfe_i32 s5, s1, 0x40000 ; GFX9-NEXT: v_mov_b32_e32 v6, s6 -; GFX9-NEXT: v_lshlrev_b16_e64 v4, 12, s1 +; GFX9-NEXT: v_lshlrev_b16_e64 v4, 12, s3 ; GFX9-NEXT: v_lshlrev_b16_e64 v5, 12, s4 -; GFX9-NEXT: s_bfe_i32 s7, s0, 0x40004 -; GFX9-NEXT: s_bfe_i32 s9, s0, 0x40008 +; GFX9-NEXT: s_bfe_i32 s7, s1, 0x40004 +; GFX9-NEXT: s_bfe_i32 s9, s1, 0x40008 ; GFX9-NEXT: v_mov_b32_e32 v3, s10 ; GFX9-NEXT: v_mov_b32_e32 v7, s8 ; GFX9-NEXT: v_ashrrev_i16_e32 v4, 12, v4 ; GFX9-NEXT: v_ashrrev_i16_e32 v5, 12, v5 ; GFX9-NEXT: v_mul_i32_i24_e32 v3, s9, v3 -; GFX9-NEXT: s_bfe_i32 s12, s3, 0x40010 -; GFX9-NEXT: v_and_b32_e32 v4, s2, v4 -; GFX9-NEXT: v_and_b32_e32 v5, s2, v5 -; GFX9-NEXT: s_bfe_i32 s14, s3, 0x40014 -; GFX9-NEXT: s_bfe_i32 s11, s0, 0x40010 +; GFX9-NEXT: s_bfe_i32 s12, s2, 0x40010 +; GFX9-NEXT: v_and_b32_e32 v4, s0, v4 +; GFX9-NEXT: v_and_b32_e32 v5, s0, v5 +; GFX9-NEXT: s_bfe_i32 s14, s2, 0x40014 +; GFX9-NEXT: s_bfe_i32 s11, s1, 0x40010 ; GFX9-NEXT: v_mov_b32_e32 v8, s12 -; GFX9-NEXT: s_bfe_i32 s16, s3, 0x40018 -; GFX9-NEXT: s_bfe_i32 s13, s0, 0x40014 +; GFX9-NEXT: s_bfe_i32 s16, s2, 0x40018 +; GFX9-NEXT: s_bfe_i32 s13, s1, 0x40014 ; GFX9-NEXT: v_mov_b32_e32 v9, s14 -; GFX9-NEXT: s_bfe_i32 s15, s0, 0x40018 -; GFX9-NEXT: s_ashr_i32 s3, s3, 28 +; GFX9-NEXT: s_bfe_i32 s15, s1, 0x40018 +; GFX9-NEXT: s_ashr_i32 s2, s2, 28 ; GFX9-NEXT: v_mov_b32_e32 v10, s16 -; GFX9-NEXT: s_ashr_i32 s0, s0, 28 +; GFX9-NEXT: s_ashr_i32 s1, s1, 28 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mad_i32_i24 v2, s5, v6, v2 ; GFX9-NEXT: v_mad_i32_i24 v2, s7, v7, v2 @@ -756,8 +756,8 @@ define amdgpu_kernel void @idot8_acc8(<8 x i4> addrspace(1)* %src1, ; GFX9-NEXT: v_mad_i32_i24 v2, s11, v8, v2 ; GFX9-NEXT: v_mad_i32_i24 v2, s13, v9, v2 ; GFX9-NEXT: v_mad_i32_i24 v2, s15, v10, v2 -; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: v_mad_i32_i24 v2, s0, v3, v2 +; GFX9-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NEXT: v_mad_i32_i24 v2, s1, v3, v2 ; GFX9-NEXT: global_store_byte v[0:1], v2, off ; GFX9-NEXT: s_endpgm ; @@ -765,43 +765,43 @@ define amdgpu_kernel void @idot8_acc8(<8 x i4> addrspace(1)* %src1, ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_movk_i32 s2, 0xff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-DL-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-DL-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s2, s[6:7], 0x0 +; GFX9-DL-NEXT: s_movk_i32 s0, 0xff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_bfe_i32 s6, s3, 0x40000 -; GFX9-DL-NEXT: s_lshr_b32 s4, s3, 12 -; GFX9-DL-NEXT: s_bfe_i32 s8, s3, 0x40004 -; GFX9-DL-NEXT: s_bfe_i32 s10, s3, 0x40008 -; GFX9-DL-NEXT: s_lshr_b32 s1, s0, 12 -; GFX9-DL-NEXT: s_bfe_i32 s5, s0, 0x40000 +; GFX9-DL-NEXT: s_lshr_b32 s3, s1, 12 +; GFX9-DL-NEXT: s_bfe_i32 s6, s2, 0x40000 +; GFX9-DL-NEXT: s_lshr_b32 s4, s2, 12 +; GFX9-DL-NEXT: s_bfe_i32 s8, s2, 0x40004 +; GFX9-DL-NEXT: s_bfe_i32 s10, s2, 0x40008 +; GFX9-DL-NEXT: s_bfe_i32 s5, s1, 0x40000 ; GFX9-DL-NEXT: v_mov_b32_e32 v6, s6 -; GFX9-DL-NEXT: v_lshlrev_b16_e64 v4, 12, s1 +; GFX9-DL-NEXT: v_lshlrev_b16_e64 v4, 12, s3 ; GFX9-DL-NEXT: v_lshlrev_b16_e64 v5, 12, s4 -; GFX9-DL-NEXT: s_bfe_i32 s7, s0, 0x40004 -; GFX9-DL-NEXT: s_bfe_i32 s9, s0, 0x40008 +; GFX9-DL-NEXT: s_bfe_i32 s7, s1, 0x40004 +; GFX9-DL-NEXT: s_bfe_i32 s9, s1, 0x40008 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, s10 ; GFX9-DL-NEXT: v_mov_b32_e32 v7, s8 ; GFX9-DL-NEXT: v_ashrrev_i16_e32 v4, 12, v4 ; GFX9-DL-NEXT: v_ashrrev_i16_e32 v5, 12, v5 ; GFX9-DL-NEXT: v_mul_i32_i24_e32 v3, s9, v3 -; GFX9-DL-NEXT: s_bfe_i32 s12, s3, 0x40010 -; GFX9-DL-NEXT: v_and_b32_e32 v4, s2, v4 -; GFX9-DL-NEXT: v_and_b32_e32 v5, s2, v5 -; GFX9-DL-NEXT: s_bfe_i32 s14, s3, 0x40014 -; GFX9-DL-NEXT: s_bfe_i32 s11, s0, 0x40010 +; GFX9-DL-NEXT: s_bfe_i32 s12, s2, 0x40010 +; GFX9-DL-NEXT: v_and_b32_e32 v4, s0, v4 +; GFX9-DL-NEXT: v_and_b32_e32 v5, s0, v5 +; GFX9-DL-NEXT: s_bfe_i32 s14, s2, 0x40014 +; GFX9-DL-NEXT: s_bfe_i32 s11, s1, 0x40010 ; GFX9-DL-NEXT: v_mov_b32_e32 v8, s12 -; GFX9-DL-NEXT: s_bfe_i32 s16, s3, 0x40018 -; GFX9-DL-NEXT: s_bfe_i32 s13, s0, 0x40014 +; GFX9-DL-NEXT: s_bfe_i32 s16, s2, 0x40018 +; GFX9-DL-NEXT: s_bfe_i32 s13, s1, 0x40014 ; GFX9-DL-NEXT: v_mov_b32_e32 v9, s14 -; GFX9-DL-NEXT: s_bfe_i32 s15, s0, 0x40018 -; GFX9-DL-NEXT: s_ashr_i32 s3, s3, 28 +; GFX9-DL-NEXT: s_bfe_i32 s15, s1, 0x40018 +; GFX9-DL-NEXT: s_ashr_i32 s2, s2, 28 ; GFX9-DL-NEXT: v_mov_b32_e32 v10, s16 -; GFX9-DL-NEXT: s_ashr_i32 s0, s0, 28 +; GFX9-DL-NEXT: s_ashr_i32 s1, s1, 28 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) ; GFX9-DL-NEXT: v_mad_i32_i24 v2, s5, v6, v2 ; GFX9-DL-NEXT: v_mad_i32_i24 v2, s7, v7, v2 @@ -810,8 +810,8 @@ define amdgpu_kernel void @idot8_acc8(<8 x i4> addrspace(1)* %src1, ; GFX9-DL-NEXT: v_mad_i32_i24 v2, s11, v8, v2 ; GFX9-DL-NEXT: v_mad_i32_i24 v2, s13, v9, v2 ; GFX9-DL-NEXT: v_mad_i32_i24 v2, s15, v10, v2 -; GFX9-DL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-DL-NEXT: v_mad_i32_i24 v2, s0, v3, v2 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, s1, v3, v2 ; GFX9-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm ; @@ -1462,19 +1462,19 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_mov_b32 s8, 0xffff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s6, s[6:7], 0x0 ; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_bfe_i32 s15, s6, 0x40018 -; GFX7-NEXT: s_bfe_i32 s16, s6, 0x40014 -; GFX7-NEXT: s_bfe_i32 s17, s6, 0x40010 -; GFX7-NEXT: s_bfe_i32 s18, s6, 0x40000 -; GFX7-NEXT: s_bfe_i32 s19, s6, 0x40004 -; GFX7-NEXT: s_bfe_i32 s20, s6, 0x40008 -; GFX7-NEXT: s_ashr_i32 s14, s6, 28 -; GFX7-NEXT: s_bfe_i32 s6, s6, 0x4000c -; GFX7-NEXT: s_ashr_i32 s5, s4, 28 +; GFX7-NEXT: s_ashr_i32 s6, s4, 28 +; GFX7-NEXT: s_bfe_i32 s15, s5, 0x40018 +; GFX7-NEXT: s_bfe_i32 s16, s5, 0x40014 +; GFX7-NEXT: s_bfe_i32 s17, s5, 0x40010 +; GFX7-NEXT: s_bfe_i32 s18, s5, 0x40000 +; GFX7-NEXT: s_bfe_i32 s19, s5, 0x40004 +; GFX7-NEXT: s_bfe_i32 s20, s5, 0x40008 +; GFX7-NEXT: s_ashr_i32 s14, s5, 28 +; GFX7-NEXT: s_bfe_i32 s5, s5, 0x4000c ; GFX7-NEXT: s_bfe_i32 s7, s4, 0x40018 ; GFX7-NEXT: s_bfe_i32 s9, s4, 0x40014 ; GFX7-NEXT: s_bfe_i32 s10, s4, 0x40010 @@ -1485,7 +1485,7 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX7-NEXT: s_bfe_i32 s13, s4, 0x40008 ; GFX7-NEXT: v_mov_b32_e32 v2, s20 ; GFX7-NEXT: s_bfe_i32 s4, s4, 0x4000c -; GFX7-NEXT: v_mov_b32_e32 v1, s6 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mul_i32_i24_e32 v1, s4, v1 ; GFX7-NEXT: v_mul_i32_i24_e32 v2, s13, v2 ; GFX7-NEXT: v_mul_i32_i24_e32 v3, s12, v3 @@ -1510,7 +1510,7 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX7-NEXT: v_mad_i32_i24 v0, s9, v6, v0 ; GFX7-NEXT: v_mad_i32_i24 v0, s7, v7, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s14 -; GFX7-NEXT: v_mad_i32_i24 v0, s5, v1, v0 +; GFX7-NEXT: v_mad_i32_i24 v0, s6, v1, v0 ; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -1954,24 +1954,24 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NEXT: s_mov_b32 s2, 0xffff ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s1, s[6:7], 0x0 +; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX9-NEXT: s_load_dword s2, s[6:7], 0x0 +; GFX9-NEXT: s_mov_b32 s0, 0xffff ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s7, s0, 4 -; GFX9-NEXT: s_lshr_b32 s14, s1, 4 -; GFX9-NEXT: v_lshlrev_b16_e64 v3, 12, s0 -; GFX9-NEXT: v_lshlrev_b16_e64 v4, 12, s1 +; GFX9-NEXT: s_lshr_b32 s7, s1, 4 +; GFX9-NEXT: s_lshr_b32 s14, s2, 4 +; GFX9-NEXT: v_lshlrev_b16_e64 v3, 12, s1 +; GFX9-NEXT: v_lshlrev_b16_e64 v4, 12, s2 ; GFX9-NEXT: v_lshlrev_b16_e64 v7, 12, s7 ; GFX9-NEXT: v_lshlrev_b16_e64 v14, 12, s14 -; GFX9-NEXT: s_lshr_b32 s8, s0, 12 -; GFX9-NEXT: s_lshr_b32 s9, s0, 8 -; GFX9-NEXT: s_lshr_b32 s15, s1, 12 -; GFX9-NEXT: s_lshr_b32 s16, s1, 8 +; GFX9-NEXT: s_lshr_b32 s8, s1, 12 +; GFX9-NEXT: s_lshr_b32 s9, s1, 8 +; GFX9-NEXT: s_lshr_b32 s15, s2, 12 +; GFX9-NEXT: s_lshr_b32 s16, s2, 8 ; GFX9-NEXT: v_lshlrev_b16_e64 v5, 12, s9 ; GFX9-NEXT: v_lshlrev_b16_e64 v6, 12, s8 ; GFX9-NEXT: v_lshlrev_b16_e64 v12, 12, s16 @@ -1987,21 +1987,21 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9-NEXT: v_mul_lo_u16_e32 v3, v3, v4 ; GFX9-NEXT: v_mul_lo_u16_sdwa v7, v7, v14 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_or_b32_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: s_lshr_b32 s3, s0, 20 -; GFX9-NEXT: s_lshr_b32 s4, s0, 16 -; GFX9-NEXT: s_lshr_b32 s10, s1, 20 -; GFX9-NEXT: s_lshr_b32 s11, s1, 16 +; GFX9-NEXT: s_lshr_b32 s3, s1, 20 +; GFX9-NEXT: s_lshr_b32 s4, s1, 16 +; GFX9-NEXT: s_lshr_b32 s10, s2, 20 +; GFX9-NEXT: s_lshr_b32 s11, s2, 16 ; GFX9-NEXT: v_mul_lo_u16_sdwa v6, v6, v13 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_mul_lo_u16_e32 v5, v5, v12 ; GFX9-NEXT: v_lshlrev_b16_e64 v10, 12, s4 ; GFX9-NEXT: v_lshlrev_b16_e64 v11, 12, s3 ; GFX9-NEXT: v_lshlrev_b16_e64 v17, 12, s11 ; GFX9-NEXT: v_lshlrev_b16_e64 v18, 12, s10 -; GFX9-NEXT: s_lshr_b32 s5, s0, 28 -; GFX9-NEXT: s_lshr_b32 s6, s0, 24 -; GFX9-NEXT: s_lshr_b32 s12, s1, 28 -; GFX9-NEXT: s_lshr_b32 s13, s1, 24 -; GFX9-NEXT: v_and_b32_e32 v3, s2, v3 +; GFX9-NEXT: s_lshr_b32 s5, s1, 28 +; GFX9-NEXT: s_lshr_b32 s6, s1, 24 +; GFX9-NEXT: s_lshr_b32 s12, s2, 28 +; GFX9-NEXT: s_lshr_b32 s13, s2, 24 +; GFX9-NEXT: v_and_b32_e32 v3, s0, v3 ; GFX9-NEXT: v_or_b32_sdwa v5, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_lshlrev_b16_e64 v8, 12, s6 ; GFX9-NEXT: v_lshlrev_b16_e64 v9, 12, s5 @@ -2023,7 +2023,7 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9-NEXT: v_mul_lo_u16_sdwa v9, v9, v16 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_mul_lo_u16_e32 v8, v8, v15 ; GFX9-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_e32 v4, s2, v4 +; GFX9-NEXT: v_and_b32_e32 v4, s0, v4 ; GFX9-NEXT: v_or_b32_e32 v6, v4, v8 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 @@ -2042,24 +2042,24 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_mov_b32 s2, 0xffff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-DL-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-DL-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s1, s[6:7], 0x0 +; GFX9-DL-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s2, s[6:7], 0x0 +; GFX9-DL-NEXT: s_mov_b32 s0, 0xffff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_lshr_b32 s7, s0, 4 -; GFX9-DL-NEXT: s_lshr_b32 s14, s1, 4 -; GFX9-DL-NEXT: v_lshlrev_b16_e64 v3, 12, s0 -; GFX9-DL-NEXT: v_lshlrev_b16_e64 v4, 12, s1 +; GFX9-DL-NEXT: s_lshr_b32 s7, s1, 4 +; GFX9-DL-NEXT: s_lshr_b32 s14, s2, 4 +; GFX9-DL-NEXT: v_lshlrev_b16_e64 v3, 12, s1 +; GFX9-DL-NEXT: v_lshlrev_b16_e64 v4, 12, s2 ; GFX9-DL-NEXT: v_lshlrev_b16_e64 v7, 12, s7 ; GFX9-DL-NEXT: v_lshlrev_b16_e64 v14, 12, s14 -; GFX9-DL-NEXT: s_lshr_b32 s8, s0, 12 -; GFX9-DL-NEXT: s_lshr_b32 s9, s0, 8 -; GFX9-DL-NEXT: s_lshr_b32 s15, s1, 12 -; GFX9-DL-NEXT: s_lshr_b32 s16, s1, 8 +; GFX9-DL-NEXT: s_lshr_b32 s8, s1, 12 +; GFX9-DL-NEXT: s_lshr_b32 s9, s1, 8 +; GFX9-DL-NEXT: s_lshr_b32 s15, s2, 12 +; GFX9-DL-NEXT: s_lshr_b32 s16, s2, 8 ; GFX9-DL-NEXT: v_lshlrev_b16_e64 v5, 12, s9 ; GFX9-DL-NEXT: v_lshlrev_b16_e64 v6, 12, s8 ; GFX9-DL-NEXT: v_lshlrev_b16_e64 v12, 12, s16 @@ -2075,21 +2075,21 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9-DL-NEXT: v_mul_lo_u16_e32 v3, v3, v4 ; GFX9-DL-NEXT: v_mul_lo_u16_sdwa v7, v7, v14 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-DL-NEXT: v_or_b32_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-DL-NEXT: s_lshr_b32 s3, s0, 20 -; GFX9-DL-NEXT: s_lshr_b32 s4, s0, 16 -; GFX9-DL-NEXT: s_lshr_b32 s10, s1, 20 -; GFX9-DL-NEXT: s_lshr_b32 s11, s1, 16 +; GFX9-DL-NEXT: s_lshr_b32 s3, s1, 20 +; GFX9-DL-NEXT: s_lshr_b32 s4, s1, 16 +; GFX9-DL-NEXT: s_lshr_b32 s10, s2, 20 +; GFX9-DL-NEXT: s_lshr_b32 s11, s2, 16 ; GFX9-DL-NEXT: v_mul_lo_u16_sdwa v6, v6, v13 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-DL-NEXT: v_mul_lo_u16_e32 v5, v5, v12 ; GFX9-DL-NEXT: v_lshlrev_b16_e64 v10, 12, s4 ; GFX9-DL-NEXT: v_lshlrev_b16_e64 v11, 12, s3 ; GFX9-DL-NEXT: v_lshlrev_b16_e64 v17, 12, s11 ; GFX9-DL-NEXT: v_lshlrev_b16_e64 v18, 12, s10 -; GFX9-DL-NEXT: s_lshr_b32 s5, s0, 28 -; GFX9-DL-NEXT: s_lshr_b32 s6, s0, 24 -; GFX9-DL-NEXT: s_lshr_b32 s12, s1, 28 -; GFX9-DL-NEXT: s_lshr_b32 s13, s1, 24 -; GFX9-DL-NEXT: v_and_b32_e32 v3, s2, v3 +; GFX9-DL-NEXT: s_lshr_b32 s5, s1, 28 +; GFX9-DL-NEXT: s_lshr_b32 s6, s1, 24 +; GFX9-DL-NEXT: s_lshr_b32 s12, s2, 28 +; GFX9-DL-NEXT: s_lshr_b32 s13, s2, 24 +; GFX9-DL-NEXT: v_and_b32_e32 v3, s0, v3 ; GFX9-DL-NEXT: v_or_b32_sdwa v5, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-DL-NEXT: v_lshlrev_b16_e64 v8, 12, s6 ; GFX9-DL-NEXT: v_lshlrev_b16_e64 v9, 12, s5 @@ -2111,7 +2111,7 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9-DL-NEXT: v_mul_lo_u16_sdwa v9, v9, v16 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-DL-NEXT: v_mul_lo_u16_e32 v8, v8, v15 ; GFX9-DL-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-DL-NEXT: v_and_b32_e32 v4, s2, v4 +; GFX9-DL-NEXT: v_and_b32_e32 v4, s0, v4 ; GFX9-DL-NEXT: v_or_b32_e32 v6, v4, v8 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) ; GFX9-DL-NEXT: v_add_u32_e32 v2, v3, v2 diff --git a/llvm/test/CodeGen/AMDGPU/idot8u.ll b/llvm/test/CodeGen/AMDGPU/idot8u.ll index 31d8eb0ec8b64..171cb6ac6ea77 100644 --- a/llvm/test/CodeGen/AMDGPU/idot8u.ll +++ b/llvm/test/CodeGen/AMDGPU/idot8u.ll @@ -2426,38 +2426,38 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NEXT: s_mov_b32 s2, 0xffff ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s1, s[6:7], 0x0 +; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX9-NEXT: s_load_dword s2, s[6:7], 0x0 +; GFX9-NEXT: s_mov_b32 s0, 0xffff ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_bfe_u32 s3, s0, 0x40010 -; GFX9-NEXT: s_bfe_u32 s10, s1, 0x40010 -; GFX9-NEXT: s_bfe_u32 s11, s1, 0x40014 -; GFX9-NEXT: s_bfe_u32 s12, s1, 0x40018 -; GFX9-NEXT: s_lshr_b32 s13, s1, 28 -; GFX9-NEXT: s_and_b32 s14, s1, 15 -; GFX9-NEXT: s_bfe_u32 s15, s1, 0x40004 -; GFX9-NEXT: s_bfe_u32 s16, s1, 0x40008 +; GFX9-NEXT: s_bfe_u32 s3, s1, 0x40010 +; GFX9-NEXT: s_bfe_u32 s10, s2, 0x40010 +; GFX9-NEXT: s_bfe_u32 s11, s2, 0x40014 +; GFX9-NEXT: s_bfe_u32 s12, s2, 0x40018 +; GFX9-NEXT: s_lshr_b32 s13, s2, 28 +; GFX9-NEXT: s_and_b32 s14, s2, 15 +; GFX9-NEXT: s_bfe_u32 s15, s2, 0x40004 +; GFX9-NEXT: s_bfe_u32 s16, s2, 0x40008 ; GFX9-NEXT: v_mov_b32_e32 v3, s10 -; GFX9-NEXT: s_bfe_u32 s1, s1, 0x4000c -; GFX9-NEXT: s_bfe_u32 s4, s0, 0x40014 +; GFX9-NEXT: s_bfe_u32 s2, s2, 0x4000c +; GFX9-NEXT: s_bfe_u32 s4, s1, 0x40014 ; GFX9-NEXT: v_mov_b32_e32 v4, s11 -; GFX9-NEXT: s_bfe_u32 s5, s0, 0x40018 +; GFX9-NEXT: s_bfe_u32 s5, s1, 0x40018 ; GFX9-NEXT: v_mov_b32_e32 v5, s12 -; GFX9-NEXT: s_lshr_b32 s6, s0, 28 +; GFX9-NEXT: s_lshr_b32 s6, s1, 28 ; GFX9-NEXT: v_mov_b32_e32 v6, s13 -; GFX9-NEXT: s_and_b32 s7, s0, 15 +; GFX9-NEXT: s_and_b32 s7, s1, 15 ; GFX9-NEXT: v_mov_b32_e32 v7, s14 -; GFX9-NEXT: s_bfe_u32 s8, s0, 0x40004 +; GFX9-NEXT: s_bfe_u32 s8, s1, 0x40004 ; GFX9-NEXT: v_mov_b32_e32 v8, s15 -; GFX9-NEXT: s_bfe_u32 s9, s0, 0x40008 +; GFX9-NEXT: s_bfe_u32 s9, s1, 0x40008 ; GFX9-NEXT: v_mov_b32_e32 v9, s16 -; GFX9-NEXT: s_bfe_u32 s0, s0, 0x4000c -; GFX9-NEXT: v_mov_b32_e32 v10, s1 +; GFX9-NEXT: s_bfe_u32 s1, s1, 0x4000c +; GFX9-NEXT: v_mov_b32_e32 v10, s2 ; GFX9-NEXT: v_mul_lo_u16_e32 v3, s3, v3 ; GFX9-NEXT: v_mul_lo_u16_sdwa v4, s4, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_mul_lo_u16_e32 v5, s5, v5 @@ -2468,12 +2468,12 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9-NEXT: v_or_b32_sdwa v4, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_or_b32_e32 v5, v7, v8 ; GFX9-NEXT: v_mul_lo_u16_e32 v9, s9, v9 -; GFX9-NEXT: v_mul_lo_u16_sdwa v10, s0, v10 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX9-NEXT: v_and_b32_e32 v5, s2, v5 +; GFX9-NEXT: v_mul_lo_u16_sdwa v10, s1, v10 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX9-NEXT: v_and_b32_e32 v5, s0, v5 ; GFX9-NEXT: v_or_b32_sdwa v6, v9, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_or_b32_e32 v6, v5, v6 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 8, v6 -; GFX9-NEXT: v_and_b32_e32 v3, s2, v3 +; GFX9-NEXT: v_and_b32_e32 v3, s0, v3 ; GFX9-NEXT: v_or_b32_e32 v4, v3, v4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v2, v5, v2 @@ -2492,38 +2492,38 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_mov_b32 s2, 0xffff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-DL-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-DL-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s1, s[6:7], 0x0 +; GFX9-DL-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s2, s[6:7], 0x0 +; GFX9-DL-NEXT: s_mov_b32 s0, 0xffff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_bfe_u32 s3, s0, 0x40010 -; GFX9-DL-NEXT: s_bfe_u32 s10, s1, 0x40010 -; GFX9-DL-NEXT: s_bfe_u32 s11, s1, 0x40014 -; GFX9-DL-NEXT: s_bfe_u32 s12, s1, 0x40018 -; GFX9-DL-NEXT: s_lshr_b32 s13, s1, 28 -; GFX9-DL-NEXT: s_and_b32 s14, s1, 15 -; GFX9-DL-NEXT: s_bfe_u32 s15, s1, 0x40004 -; GFX9-DL-NEXT: s_bfe_u32 s16, s1, 0x40008 +; GFX9-DL-NEXT: s_bfe_u32 s3, s1, 0x40010 +; GFX9-DL-NEXT: s_bfe_u32 s10, s2, 0x40010 +; GFX9-DL-NEXT: s_bfe_u32 s11, s2, 0x40014 +; GFX9-DL-NEXT: s_bfe_u32 s12, s2, 0x40018 +; GFX9-DL-NEXT: s_lshr_b32 s13, s2, 28 +; GFX9-DL-NEXT: s_and_b32 s14, s2, 15 +; GFX9-DL-NEXT: s_bfe_u32 s15, s2, 0x40004 +; GFX9-DL-NEXT: s_bfe_u32 s16, s2, 0x40008 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, s10 -; GFX9-DL-NEXT: s_bfe_u32 s1, s1, 0x4000c -; GFX9-DL-NEXT: s_bfe_u32 s4, s0, 0x40014 +; GFX9-DL-NEXT: s_bfe_u32 s2, s2, 0x4000c +; GFX9-DL-NEXT: s_bfe_u32 s4, s1, 0x40014 ; GFX9-DL-NEXT: v_mov_b32_e32 v4, s11 -; GFX9-DL-NEXT: s_bfe_u32 s5, s0, 0x40018 +; GFX9-DL-NEXT: s_bfe_u32 s5, s1, 0x40018 ; GFX9-DL-NEXT: v_mov_b32_e32 v5, s12 -; GFX9-DL-NEXT: s_lshr_b32 s6, s0, 28 +; GFX9-DL-NEXT: s_lshr_b32 s6, s1, 28 ; GFX9-DL-NEXT: v_mov_b32_e32 v6, s13 -; GFX9-DL-NEXT: s_and_b32 s7, s0, 15 +; GFX9-DL-NEXT: s_and_b32 s7, s1, 15 ; GFX9-DL-NEXT: v_mov_b32_e32 v7, s14 -; GFX9-DL-NEXT: s_bfe_u32 s8, s0, 0x40004 +; GFX9-DL-NEXT: s_bfe_u32 s8, s1, 0x40004 ; GFX9-DL-NEXT: v_mov_b32_e32 v8, s15 -; GFX9-DL-NEXT: s_bfe_u32 s9, s0, 0x40008 +; GFX9-DL-NEXT: s_bfe_u32 s9, s1, 0x40008 ; GFX9-DL-NEXT: v_mov_b32_e32 v9, s16 -; GFX9-DL-NEXT: s_bfe_u32 s0, s0, 0x4000c -; GFX9-DL-NEXT: v_mov_b32_e32 v10, s1 +; GFX9-DL-NEXT: s_bfe_u32 s1, s1, 0x4000c +; GFX9-DL-NEXT: v_mov_b32_e32 v10, s2 ; GFX9-DL-NEXT: v_mul_lo_u16_e32 v3, s3, v3 ; GFX9-DL-NEXT: v_mul_lo_u16_sdwa v4, s4, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-DL-NEXT: v_mul_lo_u16_e32 v5, s5, v5 @@ -2534,12 +2534,12 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9-DL-NEXT: v_or_b32_sdwa v4, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-DL-NEXT: v_or_b32_e32 v5, v7, v8 ; GFX9-DL-NEXT: v_mul_lo_u16_e32 v9, s9, v9 -; GFX9-DL-NEXT: v_mul_lo_u16_sdwa v10, s0, v10 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX9-DL-NEXT: v_and_b32_e32 v5, s2, v5 +; GFX9-DL-NEXT: v_mul_lo_u16_sdwa v10, s1, v10 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX9-DL-NEXT: v_and_b32_e32 v5, s0, v5 ; GFX9-DL-NEXT: v_or_b32_sdwa v6, v9, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-DL-NEXT: v_or_b32_e32 v6, v5, v6 ; GFX9-DL-NEXT: v_lshrrev_b32_e32 v7, 8, v6 -; GFX9-DL-NEXT: v_and_b32_e32 v3, s2, v3 +; GFX9-DL-NEXT: v_and_b32_e32 v3, s0, v3 ; GFX9-DL-NEXT: v_or_b32_e32 v4, v3, v4 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) ; GFX9-DL-NEXT: v_add_u32_e32 v2, v5, v2 diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll index b108e26375366..bca00f69e25cd 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -524,7 +524,7 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace( ; GCN: {{^; %bb.[0-9]}}: ; GCN: s_mov_b64 exec, -; GCN: s_cbranch_vccnz [[BB2]] +; GCN: s_cbranch_execnz [[BB2]] define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) #0 { bb: diff --git a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll index b2acc37493e43..d82d90564aa4d 100644 --- a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll @@ -158,8 +158,8 @@ define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) { ; SI-NEXT: ; %bb.4: ; %loop.exit.guard ; SI-NEXT: ; in Loop: Header=BB3_2 Depth=1 ; SI-NEXT: s_or_b64 exec, exec, s[2:3] -; SI-NEXT: s_and_b64 vcc, exec, 0 -; SI-NEXT: s_cbranch_vccz BB3_2 +; SI-NEXT: s_mov_b64 vcc, 0 +; SI-NEXT: s_branch BB3_2 ; SI-NEXT: BB3_5: ; %UnifiedReturnBlock ; SI-NEXT: s_endpgm ; IR-LABEL: @infinite_loop_nest_ret( diff --git a/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir b/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir index 3011da138c760..c6bb9dd0b1afe 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir @@ -338,3 +338,200 @@ body: | S_CBRANCH_VCCZ %bb.1, implicit killed $vcc S_ENDPGM 0 ... +--- +# GCN-LABEL: name: andn2_execz_mov_vccz +# GCN-NOT: S_MOV_ +# GCN-NOT: S_ANDN2_ +# GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec +name: andn2_execz_mov_vccz +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + $sgpr0_sgpr1 = S_MOV_B64 0 + $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc + S_CBRANCH_VCCZ %bb.1, implicit killed $vcc + S_ENDPGM 0 +... +--- +# GCN-LABEL: name: andn2_branch_mov_vccz +# GCN-NOT: S_MOV_ +# GCN-NOT: S_ANDN2_ +# GCN: S_BRANCH %bb.1 +name: andn2_branch_mov_vccz +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + $sgpr0_sgpr1 = S_MOV_B64 -1 + $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc + S_CBRANCH_VCCZ %bb.1, implicit killed $vcc + S_ENDPGM 0 +... +--- +# GCN-LABEL: name: andn2_execnz_mov_vccnz +# GCN-NOT: S_MOV_ +# GCN-NOT: S_ANDN2_ +# GCN: S_CBRANCH_EXECNZ %bb.1, implicit $exec +name: andn2_execnz_mov_vccnz +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + $sgpr0_sgpr1 = S_MOV_B64 0 + $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc + S_ENDPGM 0 +... +--- +# GCN-LABEL: name: andn2_no_branch_mov_vccnz +# GCN-NOT: S_MOV_ +# GCN-NOT: S_ANDN2_ +# GCN-NOT: S_CBRANCH +# GCN-NOT: S_BRANCH +name: andn2_no_branch_mov_vccnz +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + $sgpr0_sgpr1 = S_MOV_B64 -1 + $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc + S_ENDPGM 0 +... +--- +# GCN-LABEL: name: and_0_mov +# GCN: bb.2: +# GCN-NOT: S_AND +# GCN: $vcc = S_MOV_B64 0 +# GCN-NEXT: S_BRANCH %bb.1 +name: and_0_mov +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + $sgpr0_sgpr1 = S_MOV_B64 0 + $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc + S_CBRANCH_VCCZ %bb.1, implicit $vcc + S_ENDPGM 0 +... +--- +# GCN-LABEL: name: andn2_m1_mov +# GCN: bb.2: +# GCN-NOT: S_ANDN2 +# GCN: $vcc = S_MOV_B64 0 +# GCN-NEXT: S_BRANCH %bb.1 +name: andn2_m1_mov +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + $sgpr0_sgpr1 = S_MOV_B64 -1 + $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc + S_CBRANCH_VCCZ %bb.1, implicit $vcc + S_ENDPGM 0 +... +--- +# GCN-LABEL: name: and_m1_mov +# GCN: bb.2: +# GCN-NOT: S_AND +# GCN: $vcc = S_MOV_B64 $exec +# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec +name: and_m1_mov +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + $sgpr0_sgpr1 = S_MOV_B64 -1 + $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc + S_CBRANCH_VCCZ %bb.1, implicit $vcc + S_ENDPGM 0 +... +--- +# GCN-LABEL: name: andn2_0_mov +# GCN: bb.2: +# GCN-NOT: S_ANDN2 +# GCN: $vcc = S_MOV_B64 $exec +# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec +name: andn2_0_mov +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + $sgpr0_sgpr1 = S_MOV_B64 0 + $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc + S_CBRANCH_VCCZ %bb.1, implicit $vcc + S_ENDPGM 0 +... +--- +# GCN-LABEL: name: and_0_scc_req +# GCN: bb.2: +# GCN-NOT: S_MOV_ +# GCN: S_AND_ +# GCN-NEXT: S_BRANCH %bb.1 +name: and_0_scc_req +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + $sgpr0_sgpr1 = S_MOV_B64 0 + $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc + S_CBRANCH_VCCZ %bb.1, implicit $vcc + S_ENDPGM 0 +... +--- +# GCN-LABEL: name: andn2_m1_scc_req +# GCN: bb.2: +# GCN-NOT: S_MOV_ +# GCN: S_ANDN2_ +# GCN-NEXT: S_BRANCH %bb.1 +name: andn2_m1_scc_req +body: | + bb.0: + S_NOP 0 + + bb.1: + S_NOP 0 + + bb.2: + $sgpr0_sgpr1 = S_MOV_B64 -1 + $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc + S_CBRANCH_VCCZ %bb.1, implicit $vcc + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index c53f2b07aa7c9..817e3e5ca28c7 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -681,27 +681,27 @@ define amdgpu_kernel void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* % ; ; VI-LABEL: dynamic_insertelement_v4i32: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x10 -; VI-NEXT: s_load_dword s6, s[4:5], 0x20 -; VI-NEXT: s_load_dword s4, s[4:5], 0x44 -; VI-NEXT: s_mov_b32 s3, 0x1100f000 -; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_cmp_eq_u32 s6, 3 -; VI-NEXT: s_cselect_b32 s5, s4, s11 -; VI-NEXT: s_cmp_eq_u32 s6, 2 -; VI-NEXT: s_cselect_b32 s7, s4, s10 -; VI-NEXT: s_cmp_eq_u32 s6, 1 -; VI-NEXT: s_cselect_b32 s9, s4, s9 -; VI-NEXT: s_cmp_eq_u32 s6, 0 -; VI-NEXT: s_cselect_b32 s4, s4, s8 -; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: v_mov_b32_e32 v2, s7 -; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 -; VI-NEXT: s_endpgm +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x10 +; VI-NEXT: s_load_dword s6, s[4:5], 0x20 +; VI-NEXT: s_load_dword s4, s[4:5], 0x44 +; VI-NEXT: s_mov_b32 s3, 0x1100f000 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_cmp_eq_u32 s6, 3 +; VI-NEXT: s_cselect_b32 s5, s4, s11 +; VI-NEXT: s_cmp_eq_u32 s6, 2 +; VI-NEXT: s_cselect_b32 s7, s4, s10 +; VI-NEXT: s_cmp_eq_u32 s6, 1 +; VI-NEXT: s_cselect_b32 s9, s4, s9 +; VI-NEXT: s_cmp_eq_u32 s6, 0 +; VI-NEXT: s_cselect_b32 s4, s4, s8 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s9 +; VI-NEXT: v_mov_b32_e32 v2, s7 +; VI-NEXT: v_mov_b32_e32 v3, s5 +; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; VI-NEXT: s_endpgm %vecins = insertelement <4 x i32> %a, i32 %val, i32 %b store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16 ret void @@ -1327,9 +1327,6 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add ; SI-NEXT: s_cbranch_vccz BB26_3 ; SI-NEXT: s_branch BB26_4 ; SI-NEXT: BB26_2: -; SI-NEXT: s_mov_b64 s[2:3], -1 -; SI-NEXT: s_andn2_b64 vcc, exec, s[2:3] -; SI-NEXT: s_cbranch_vccnz BB26_4 ; SI-NEXT: BB26_3: ; %if ; SI-NEXT: s_load_dword s1, s[6:7], 0x0 ; SI-NEXT: BB26_4: ; %endif @@ -1350,14 +1347,9 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add ; VI-NEXT: s_cbranch_scc0 BB26_2 ; VI-NEXT: ; %bb.1: ; %else ; VI-NEXT: s_load_dword s1, s[6:7], 0x4 -; VI-NEXT: s_mov_b64 s[2:3], 0 -; VI-NEXT: s_andn2_b64 vcc, exec, s[2:3] -; VI-NEXT: s_cbranch_vccz BB26_3 +; VI-NEXT: s_cbranch_execz BB26_3 ; VI-NEXT: s_branch BB26_4 ; VI-NEXT: BB26_2: -; VI-NEXT: s_mov_b64 s[2:3], -1 -; VI-NEXT: s_andn2_b64 vcc, exec, s[2:3] -; VI-NEXT: s_cbranch_vccnz BB26_4 ; VI-NEXT: BB26_3: ; %if ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_load_dword s1, s[6:7], 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll index 8d5024bd14bf5..ae620b338e408 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -73,12 +73,12 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reg(<2 x i16> addrspace(1)* % ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; CI-NEXT: s_load_dword s4, s[4:5], 0xc ; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_load_dword s2, s[2:3], 0x0 ; CI-NEXT: v_mov_b32_e32 v0, s0 -; CI-NEXT: s_load_dword s0, s[2:3], 0x0 ; CI-NEXT: v_mov_b32_e32 v1, s1 ; CI-NEXT: s_and_b32 s1, s4, 0xffff ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_and_b32 s0, s0, 0xffff0000 +; CI-NEXT: s_and_b32 s0, s2, 0xffff0000 ; CI-NEXT: s_or_b32 s0, s1, s0 ; CI-NEXT: v_mov_b32_e32 v2, s0 ; CI-NEXT: flat_store_dword v[0:1], v2 @@ -95,11 +95,11 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_multi_use_hi_reg(<2 x i16> ad ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX9-NEXT: s_load_dword s4, s[4:5], 0x30 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s0, s0, 16 +; GFX9-NEXT: s_lshr_b32 s0, s2, 16 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v2, s1 ; GFX9-NEXT: global_store_dword v[0:1], v2, off @@ -133,18 +133,18 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_multi_use_hi_reg(<2 x i16> ad ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; CI-NEXT: s_load_dword s4, s[4:5], 0xc ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_mov_b32_e32 v0, s0 -; CI-NEXT: s_load_dword s0, s[2:3], 0x0 +; CI-NEXT: s_load_dword s2, s[2:3], 0x0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_and_b32 s1, s4, 0xffff +; CI-NEXT: v_mov_b32_e32 v0, s0 +; CI-NEXT: s_and_b32 s0, s4, 0xffff ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_lshr_b32 s0, s0, 16 -; CI-NEXT: s_lshl_b32 s2, s0, 16 -; CI-NEXT: s_or_b32 s1, s1, s2 -; CI-NEXT: v_mov_b32_e32 v2, s1 +; CI-NEXT: s_lshr_b32 s1, s2, 16 +; CI-NEXT: s_lshl_b32 s2, s1, 16 +; CI-NEXT: s_or_b32 s0, s0, s2 +; CI-NEXT: v_mov_b32_e32 v2, s0 ; CI-NEXT: flat_store_dword v[0:1], v2 ; CI-NEXT: ;;#ASMSTART -; CI-NEXT: ; use s0 +; CI-NEXT: ; use s1 ; CI-NEXT: ;;#ASMEND ; CI-NEXT: s_endpgm %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr @@ -192,12 +192,12 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi(<2 x i16> addrspace(1)* ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; CI-NEXT: s_load_dword s4, s[4:5], 0xc ; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_load_dword s2, s[2:3], 0x0 ; CI-NEXT: v_mov_b32_e32 v0, s0 -; CI-NEXT: s_load_dword s0, s[2:3], 0x0 ; CI-NEXT: v_mov_b32_e32 v1, s1 ; CI-NEXT: s_lshr_b32 s1, s4, 16 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_and_b32 s0, s0, 0xffff0000 +; CI-NEXT: s_and_b32 s0, s2, 0xffff0000 ; CI-NEXT: s_or_b32 s0, s1, s0 ; CI-NEXT: v_mov_b32_e32 v2, s0 ; CI-NEXT: flat_store_dword v[0:1], v2 @@ -216,16 +216,16 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(<2 x i16> a ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX9-NEXT: s_load_dword s4, s[4:5], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 +; GFX9-NEXT: s_lshr_b32 s0, s4, 16 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: s_lshr_b32 s1, s4, 16 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_pack_lh_b32_b16 s0, s1, s0 -; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: s_pack_lh_b32_b16 s1, s0, s2 +; GFX9-NEXT: v_mov_b32_e32 v2, s1 ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s1 +; GFX9-NEXT: ; use s0 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_endpgm ; @@ -234,17 +234,17 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(<2 x i16> a ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; VI-NEXT: s_load_dword s4, s[4:5], 0x10 ; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dword s2, s[2:3], 0x0 ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: s_load_dword s0, s[2:3], 0x0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_lshr_b32 s1, s4, 16 +; VI-NEXT: s_lshr_b32 s0, s4, 16 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_and_b32 s0, s0, 0xffff0000 -; VI-NEXT: s_or_b32 s0, s1, s0 -; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: s_and_b32 s1, s2, 0xffff0000 +; VI-NEXT: s_or_b32 s1, s0, s1 +; VI-NEXT: v_mov_b32_e32 v2, s1 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: ;;#ASMSTART -; VI-NEXT: ; use s1 +; VI-NEXT: ; use s0 ; VI-NEXT: ;;#ASMEND ; VI-NEXT: s_endpgm ; @@ -253,17 +253,17 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(<2 x i16> a ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; CI-NEXT: s_load_dword s4, s[4:5], 0x4 ; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_load_dword s2, s[2:3], 0x0 ; CI-NEXT: v_mov_b32_e32 v0, s0 -; CI-NEXT: s_load_dword s0, s[2:3], 0x0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_lshr_b32 s1, s4, 16 +; CI-NEXT: s_lshr_b32 s0, s4, 16 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_and_b32 s0, s0, 0xffff0000 -; CI-NEXT: s_or_b32 s0, s1, s0 -; CI-NEXT: v_mov_b32_e32 v2, s0 +; CI-NEXT: s_and_b32 s1, s2, 0xffff0000 +; CI-NEXT: s_or_b32 s1, s0, s1 +; CI-NEXT: v_mov_b32_e32 v2, s1 ; CI-NEXT: flat_store_dword v[0:1], v2 ; CI-NEXT: ;;#ASMSTART -; CI-NEXT: ; use s1 +; CI-NEXT: ; use s0 ; CI-NEXT: ;;#ASMEND ; CI-NEXT: s_endpgm %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr @@ -426,12 +426,12 @@ define amdgpu_kernel void @s_insertelement_v2i16_1_reg(<2 x i16> addrspace(1)* % ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; CI-NEXT: s_load_dword s4, s[4:5], 0xc ; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_load_dword s2, s[2:3], 0x0 ; CI-NEXT: v_mov_b32_e32 v0, s0 -; CI-NEXT: s_load_dword s0, s[2:3], 0x0 ; CI-NEXT: v_mov_b32_e32 v1, s1 ; CI-NEXT: s_lshl_b32 s1, s4, 16 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_and_b32 s0, s0, 0xffff +; CI-NEXT: s_and_b32 s0, s2, 0xffff ; CI-NEXT: s_or_b32 s0, s0, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 ; CI-NEXT: flat_store_dword v[0:1], v2 @@ -624,15 +624,15 @@ define amdgpu_kernel void @v_insertelement_v2i16_0_reghi(<2 x i16> addrspace(1)* ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: v_add_i32_e32 v0, vcc, s2, v2 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; CI-NEXT: flat_load_dword v0, v[0:1] -; CI-NEXT: v_add_i32_e32 v2, vcc, s0, v2 -; CI-NEXT: v_mov_b32_e32 v3, s1 +; CI-NEXT: flat_load_dword v3, v[0:1] +; CI-NEXT: v_add_i32_e32 v0, vcc, s0, v2 +; CI-NEXT: v_mov_b32_e32 v1, s1 ; CI-NEXT: s_lshr_b32 s0, s4, 16 -; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; CI-NEXT: v_or_b32_e32 v0, s0, v0 -; CI-NEXT: flat_store_dword v[2:3], v0 +; CI-NEXT: v_and_b32_e32 v2, 0xffff0000, v3 +; CI-NEXT: v_or_b32_e32 v2, s0, v2 +; CI-NEXT: flat_store_dword v[0:1], v2 ; CI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -849,15 +849,15 @@ define amdgpu_kernel void @v_insertelement_v2f16_0(<2 x half> addrspace(1)* %out ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x4500 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: global_load_dword v3, v[0:1], off +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v2 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1 -; GFX9-NEXT: global_store_dword v[2:3], v0, off +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v3 +; GFX9-NEXT: v_mov_b32_e32 v3, 0x4500 +; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v3 +; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: s_endpgm ; ; VI-LABEL: v_insertelement_v2f16_0: @@ -1107,13 +1107,13 @@ define amdgpu_kernel void @s_insertelement_v2i16_dynamic(<2 x i16> addrspace(1)* ; GFX9-NEXT: v_mov_b32_e32 v2, 0x3e703e7 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0 +; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s1, s[2:3], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshl_b32 s0, s0, 4 +; GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; GFX9-NEXT: s_lshl_b32 s0, 0xffff, s0 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: v_mov_b32_e32 v3, s2 ; GFX9-NEXT: v_bfi_b32 v2, s0, v2, v3 ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: s_endpgm @@ -1125,13 +1125,13 @@ define amdgpu_kernel void @s_insertelement_v2i16_dynamic(<2 x i16> addrspace(1)* ; VI-NEXT: v_mov_b32_e32 v2, 0x3e703e7 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: s_load_dword s4, s[4:5], 0x0 +; VI-NEXT: s_load_dword s2, s[2:3], 0x0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_load_dword s0, s[4:5], 0x0 -; VI-NEXT: s_load_dword s1, s[2:3], 0x0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_lshl_b32 s0, s0, 4 +; VI-NEXT: s_lshl_b32 s0, s4, 4 ; VI-NEXT: s_lshl_b32 s0, 0xffff, s0 -; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_mov_b32_e32 v3, s2 ; VI-NEXT: v_bfi_b32 v2, s0, v2, v3 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm @@ -1143,13 +1143,13 @@ define amdgpu_kernel void @s_insertelement_v2i16_dynamic(<2 x i16> addrspace(1)* ; CI-NEXT: v_mov_b32_e32 v2, 0x3e703e7 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 +; CI-NEXT: s_load_dword s4, s[4:5], 0x0 +; CI-NEXT: s_load_dword s2, s[2:3], 0x0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_load_dword s0, s[4:5], 0x0 -; CI-NEXT: s_load_dword s1, s[2:3], 0x0 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_lshl_b32 s0, s0, 4 +; CI-NEXT: s_lshl_b32 s0, s4, 4 ; CI-NEXT: s_lshl_b32 s0, 0xffff, s0 -; CI-NEXT: v_mov_b32_e32 v3, s1 +; CI-NEXT: v_mov_b32_e32 v3, s2 ; CI-NEXT: v_bfi_b32 v2, s0, v2, v3 ; CI-NEXT: flat_store_dword v[0:1], v2 ; CI-NEXT: s_endpgm @@ -1240,24 +1240,25 @@ define amdgpu_kernel void @v_insertelement_v2f16_dynamic_vgpr(<2 x half> addrspa ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x10 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v4 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s4, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc -; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: global_load_dword v1, v[2:3], off -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, s0, v4 -; GFX9-NEXT: s_mov_b32 s0, 0xffff -; GFX9-NEXT: v_mov_b32_e32 v5, s1 -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 4, v1 -; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s0 +; GFX9-NEXT: global_load_dword v2, v[2:3], off +; GFX9-NEXT: global_load_dword v3, v[0:1], off +; GFX9-NEXT: s_mov_b32 s2, 0xffff +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v4 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: s_mov_b32 s0, 0x12341234 -; GFX9-NEXT: v_bfi_b32 v0, v1, s0, v0 -; GFX9-NEXT: global_store_dword v[4:5], v0, off +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 4, v2 +; GFX9-NEXT: v_lshlrev_b32_e64 v2, v2, s2 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_bfi_b32 v2, v2, s0, v3 +; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: s_endpgm ; ; VI-LABEL: v_insertelement_v2f16_dynamic_vgpr: @@ -1266,24 +1267,25 @@ define amdgpu_kernel void @v_insertelement_v2f16_dynamic_vgpr(<2 x half> addrspa ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x10 ; VI-NEXT: v_lshlrev_b32_e32 v4, 2, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4 +; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_mov_b32_e32 v3, s5 ; VI-NEXT: v_add_u32_e32 v2, vcc, s4, v4 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: flat_load_dword v1, v[2:3] -; VI-NEXT: v_add_u32_e32 v4, vcc, s0, v4 -; VI-NEXT: s_mov_b32 s0, 0xffff -; VI-NEXT: v_mov_b32_e32 v5, s1 -; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshlrev_b32_e32 v1, 4, v1 -; VI-NEXT: v_lshlrev_b32_e64 v1, v1, s0 +; VI-NEXT: flat_load_dword v2, v[2:3] +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: s_mov_b32 s2, 0xffff +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v4 +; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: s_mov_b32 s0, 0x12341234 -; VI-NEXT: v_bfi_b32 v0, v1, s0, v0 -; VI-NEXT: flat_store_dword v[4:5], v0 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) +; VI-NEXT: v_lshlrev_b32_e32 v2, 4, v2 +; VI-NEXT: v_lshlrev_b32_e64 v2, v2, s2 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_bfi_b32 v2, v2, s0, v3 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; CI-LABEL: v_insertelement_v2f16_dynamic_vgpr: @@ -1299,17 +1301,17 @@ define amdgpu_kernel void @v_insertelement_v2f16_dynamic_vgpr(<2 x half> addrspa ; CI-NEXT: v_add_i32_e32 v2, vcc, s4, v4 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; CI-NEXT: flat_load_dword v2, v[2:3] -; CI-NEXT: flat_load_dword v0, v[0:1] -; CI-NEXT: v_add_i32_e32 v4, vcc, s0, v4 -; CI-NEXT: v_mov_b32_e32 v5, s1 +; CI-NEXT: flat_load_dword v3, v[0:1] +; CI-NEXT: v_add_i32_e32 v0, vcc, s0, v4 +; CI-NEXT: v_mov_b32_e32 v1, s1 ; CI-NEXT: s_mov_b32 s0, 0x12341234 -; CI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) -; CI-NEXT: v_lshlrev_b32_e32 v1, 4, v2 -; CI-NEXT: v_lshl_b32_e32 v1, 0xffff, v1 +; CI-NEXT: v_lshlrev_b32_e32 v2, 4, v2 +; CI-NEXT: v_lshl_b32_e32 v2, 0xffff, v2 ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; CI-NEXT: v_bfi_b32 v0, v1, s0, v0 -; CI-NEXT: flat_store_dword v[4:5], v0 +; CI-NEXT: v_bfi_b32 v2, v2, s0, v3 +; CI-NEXT: flat_store_dword v[0:1], v2 ; CI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -1684,26 +1686,26 @@ define amdgpu_kernel void @v_insertelement_v4i16_dynamic_vgpr(<4 x i16> addrspac ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX9-NEXT: s_load_dword s4, s[4:5], 0x10 -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: global_load_dword v4, v[0:1], off +; GFX9-NEXT: global_load_dword v2, v[0:1], off ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 -; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: s_mov_b32 s0, 0xffff -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: s_mov_b32 s3, 0 +; GFX9-NEXT: s_mov_b32 s2, 0xffff +; GFX9-NEXT: v_mov_b32_e32 v5, s1 +; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, s0, v4 +; GFX9-NEXT: s_pack_ll_b32_b16 s1, s4, s4 +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 4, v4 -; GFX9-NEXT: v_lshlrev_b64 v[4:5], v4, s[0:1] -; GFX9-NEXT: s_pack_ll_b32_b16 s0, s4, s4 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 4, v2 +; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_bfi_b32 v1, v5, s0, v1 -; GFX9-NEXT: v_bfi_b32 v0, v4, s0, v0 -; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX9-NEXT: v_bfi_b32 v1, v3, s1, v1 +; GFX9-NEXT: v_bfi_b32 v0, v2, s1, v0 +; GFX9-NEXT: global_store_dwordx2 v[4:5], v[0:1], off ; GFX9-NEXT: s_endpgm ; ; VI-LABEL: v_insertelement_v4i16_dynamic_vgpr: @@ -1717,17 +1719,17 @@ define amdgpu_kernel void @v_insertelement_v4i16_dynamic_vgpr(<4 x i16> addrspac ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v4, v[0:1] ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: s_mov_b32 s0, 0xffff +; VI-NEXT: s_mov_b32 s2, 0xffff ; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: s_and_b32 s2, s4, s0 -; VI-NEXT: s_mov_b32 s1, 0 -; VI-NEXT: s_lshl_b32 s3, s2, 16 +; VI-NEXT: s_mov_b32 s3, 0 +; VI-NEXT: s_and_b32 s1, s4, s2 +; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 +; VI-NEXT: s_lshl_b32 s0, s1, 16 +; VI-NEXT: s_or_b32 s0, s1, s0 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) ; VI-NEXT: v_lshlrev_b32_e32 v4, 4, v4 -; VI-NEXT: v_lshlrev_b64 v[4:5], v4, s[0:1] -; VI-NEXT: s_or_b32 s0, s2, s3 +; VI-NEXT: v_lshlrev_b64 v[4:5], v4, s[2:3] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: v_bfi_b32 v1, v5, s0, v1 ; VI-NEXT: v_bfi_b32 v0, v4, s0, v0 @@ -1736,26 +1738,26 @@ define amdgpu_kernel void @v_insertelement_v4i16_dynamic_vgpr(<4 x i16> addrspac ; ; CI-LABEL: v_insertelement_v4i16_dynamic_vgpr: ; CI: ; %bb.0: -; CI-NEXT: flat_load_dword v4, v[0:1] ; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; CI-NEXT: s_load_dword s4, s[4:5], 0x4 ; CI-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; CI-NEXT: s_mov_b32 s6, 0xffff -; CI-NEXT: s_mov_b32 s7, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: v_add_i32_e32 v0, vcc, s2, v2 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; CI-NEXT: flat_load_dword v4, v[0:1] ; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; CI-NEXT: s_mov_b32 s2, 0xffff +; CI-NEXT: s_mov_b32 s3, 0 ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: s_lshl_b32 s1, s4, 16 -; CI-NEXT: s_and_b32 s3, s4, s6 +; CI-NEXT: s_and_b32 s4, s4, s2 ; CI-NEXT: v_add_i32_e32 v2, vcc, s0, v2 -; CI-NEXT: s_or_b32 s0, s3, s1 +; CI-NEXT: s_or_b32 s0, s4, s1 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; CI-NEXT: s_waitcnt vmcnt(1) +; CI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) ; CI-NEXT: v_lshlrev_b32_e32 v4, 4, v4 -; CI-NEXT: v_lshl_b64 v[4:5], s[6:7], v4 +; CI-NEXT: v_lshl_b64 v[4:5], s[2:3], v4 ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: v_bfi_b32 v1, v5, s0, v1 ; CI-NEXT: v_bfi_b32 v0, v4, s0, v0 @@ -1785,19 +1787,19 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(<4 x half> addrspa ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX9-NEXT: s_pack_ll_b32_b16 s2, s4, s4 +; GFX9-NEXT: s_pack_ll_b32_b16 s4, s4, s4 ; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: s_mov_b32 s3, 0 +; GFX9-NEXT: s_mov_b32 s2, 0xffff +; GFX9-NEXT: s_lshl_b32 s1, s5, 4 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 -; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: s_mov_b32 s0, 0xffff -; GFX9-NEXT: s_lshl_b32 s3, s5, 4 -; GFX9-NEXT: v_mov_b32_e32 v4, s2 -; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s3 -; GFX9-NEXT: v_mov_b32_e32 v5, s2 +; GFX9-NEXT: s_lshl_b64 s[0:1], s[2:3], s1 +; GFX9-NEXT: v_mov_b32_e32 v4, s4 +; GFX9-NEXT: v_mov_b32_e32 v5, s4 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_bfi_b32 v1, s1, v5, v1 -; GFX9-NEXT: v_bfi_b32 v0, s0, v4, v0 +; GFX9-NEXT: v_bfi_b32 v1, s1, v4, v1 +; GFX9-NEXT: v_bfi_b32 v0, s0, v5, v0 ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX9-NEXT: s_endpgm ; @@ -1807,19 +1809,19 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(<4 x half> addrspa ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x10 ; VI-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: s_mov_b32 s0, 0xffff +; VI-NEXT: s_mov_b32 s2, 0xffff ; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: s_mov_b32 s1, 0 -; VI-NEXT: s_lshl_b32 s2, s5, 4 -; VI-NEXT: s_and_b32 s3, s4, s0 -; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 -; VI-NEXT: s_lshl_b32 s2, s3, 16 -; VI-NEXT: s_or_b32 s2, s3, s2 +; VI-NEXT: s_mov_b32 s3, 0 +; VI-NEXT: s_lshl_b32 s1, s5, 4 +; VI-NEXT: s_and_b32 s4, s4, s2 +; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 +; VI-NEXT: s_lshl_b64 s[0:1], s[2:3], s1 +; VI-NEXT: s_lshl_b32 s2, s4, 16 +; VI-NEXT: s_or_b32 s2, s4, s2 ; VI-NEXT: v_mov_b32_e32 v4, s2 ; VI-NEXT: v_mov_b32_e32 v5, s2 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc @@ -1839,15 +1841,15 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(<4 x half> addrspa ; CI-NEXT: v_add_i32_e32 v0, vcc, s2, v2 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; CI-NEXT: v_add_i32_e32 v2, vcc, s0, v2 -; CI-NEXT: s_mov_b32 s0, 0xffff -; CI-NEXT: s_and_b32 s2, s4, s0 -; CI-NEXT: s_lshl_b32 s4, s4, 16 +; CI-NEXT: s_mov_b32 s2, 0xffff ; CI-NEXT: v_mov_b32_e32 v3, s1 -; CI-NEXT: s_or_b32 s2, s2, s4 -; CI-NEXT: s_mov_b32 s1, 0 -; CI-NEXT: s_lshl_b32 s3, s5, 4 -; CI-NEXT: s_lshl_b64 s[0:1], s[0:1], s3 +; CI-NEXT: s_and_b32 s6, s4, s2 +; CI-NEXT: s_mov_b32 s3, 0 +; CI-NEXT: s_lshl_b32 s1, s5, 4 +; CI-NEXT: s_lshl_b32 s4, s4, 16 +; CI-NEXT: v_add_i32_e32 v2, vcc, s0, v2 +; CI-NEXT: s_lshl_b64 s[0:1], s[2:3], s1 +; CI-NEXT: s_or_b32 s2, s6, s4 ; CI-NEXT: v_mov_b32_e32 v4, s2 ; CI-NEXT: v_mov_b32_e32 v5, s2 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir index a8c930d27c9be..1922adf5ee6cf 100644 --- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -10,38 +10,9 @@ define amdgpu_kernel void @vmem_gt_8dw_store() { ret void } define amdgpu_kernel void @readwrite_lane() { ret void } define amdgpu_kernel void @rfe() { ret void } - define amdgpu_kernel void @s_mov_fed_b32() { ret void } define amdgpu_kernel void @s_movrel() { ret void } define amdgpu_kernel void @v_interp() { ret void } define amdgpu_kernel void @dpp() { ret void } - - define amdgpu_kernel void @mov_fed_hazard_crash_on_dbg_value(i32 addrspace(1)* %A) { - entry: - %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5) - store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4 - call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !5, metadata !11), !dbg !12 - ret void - } - - declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 - - !llvm.dbg.cu = !{!0} - !llvm.module.flags = !{!3, !4} - - !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 268929)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) - !1 = !DIFile(filename: "test01.cl", directory: "/dev/null") - !2 = !{} - !3 = !{i32 2, !"Dwarf Version", i32 2} - !4 = !{i32 2, !"Debug Info Version", i32 3} - !5 = !DILocalVariable(name: "A", arg: 1, scope: !6, file: !1, line: 1, type: !9) - !6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) - !7 = !DISubroutineType(types: !8) - !8 = !{null, !9} - !9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 32) - !10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) - !11 = !DIExpression() - !12 = !DILocation(line: 1, column: 30, scope: !6) - ... --- # GCN-LABEL: name: div_fmas @@ -267,7 +238,7 @@ body: | # GCN-LABEL: name: readwrite_lane # GCN-LABEL: bb.0: -# GCN: V_ADD_I32 +# GCN: V_ADD_CO_U32 # GCN: S_NOP # GCN: S_NOP # GCN: S_NOP @@ -275,7 +246,7 @@ body: | # GCN: V_READLANE_B32 # GCN-LABEL: bb.1: -# GCN: V_ADD_I32 +# GCN: V_ADD_CO_U32 # GCN: S_NOP # GCN: S_NOP # GCN: S_NOP @@ -283,7 +254,7 @@ body: | # GCN: V_WRITELANE_B32 # GCN-LABEL: bb.2: -# GCN: V_ADD_I32 +# GCN: V_ADD_CO_U32 # GCN: S_NOP # GCN: S_NOP # GCN: S_NOP @@ -291,7 +262,7 @@ body: | # GCN: V_READLANE_B32 # GCN-LABEL: bb.3: -# GCN: V_ADD_I32 +# GCN: V_ADD_CO_U32 # GCN: S_NOP # GCN: S_NOP # GCN: S_NOP @@ -302,23 +273,23 @@ name: readwrite_lane body: | bb.0: - $vgpr0,$sgpr0_sgpr1 = V_ADD_I32_e64 $vgpr1, $vgpr2, implicit $vcc, 0, implicit $exec + $vgpr0,$sgpr0_sgpr1 = V_ADD_CO_U32_e64 $vgpr1, $vgpr2, implicit $vcc, 0, implicit $exec $sgpr4 = V_READLANE_B32 $vgpr4, $sgpr0 S_BRANCH %bb.1 bb.1: - $vgpr0,$sgpr0_sgpr1 = V_ADD_I32_e64 $vgpr1, $vgpr2, implicit $vcc, 0, implicit $exec + $vgpr0,$sgpr0_sgpr1 = V_ADD_CO_U32_e64 $vgpr1, $vgpr2, implicit $vcc, 0, implicit $exec $vgpr4 = V_WRITELANE_B32 $sgpr0, $sgpr0, $vgpr4 S_BRANCH %bb.2 bb.2: - $vgpr0,implicit $vcc = V_ADD_I32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + $vgpr0,implicit $vcc = V_ADD_CO_U32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec $sgpr4 = V_READLANE_B32 $vgpr4, $vcc_lo S_BRANCH %bb.3 bb.3: $m0 = S_MOV_B32 $sgpr4 - $vgpr0,implicit $vcc = V_ADD_I32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + $vgpr0,implicit $vcc = V_ADD_CO_U32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec $vgpr4 = V_WRITELANE_B32 $m0, $vcc_lo, $vgpr4 S_ENDPGM 0 @@ -356,35 +327,6 @@ body: | ... --- -# GCN-LABEL: name: s_mov_fed_b32 - -# GCN-LABEL: bb.0: -# GCN: S_MOV_FED_B32 -# GFX9: S_NOP -# GCN-NEXT: S_MOV_B32 - -# GCN-LABEL: bb.1: -# GCN: S_MOV_FED_B32 -# GFX9: S_NOP -# GCN-NEXT: V_MOV_B32 -name: s_mov_fed_b32 - -body: | - bb.0: - $sgpr0 = S_MOV_FED_B32 $sgpr0 - $sgpr0 = S_MOV_B32 $sgpr0 - S_BRANCH %bb.1 - - bb.1: - $sgpr0 = S_MOV_FED_B32 $sgpr0 - $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec - S_ENDPGM 0 - -... - -... ---- - # GCN-LABEL: name: s_movrel # GCN-LABEL: bb.0: @@ -513,51 +455,3 @@ body: | $vgpr3 = V_MOV_B32_dpp $vgpr3, $vgpr0, 0, 15, 15, 0, implicit $exec S_ENDPGM 0 ... ---- -name: mov_fed_hazard_crash_on_dbg_value -alignment: 1 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '$sgpr4_sgpr5' } - - { reg: '$sgpr6_sgpr7' } - - { reg: '$sgpr9' } - - { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 16 - offsetAdjustment: 0 - maxAlignment: 8 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -stack: - - { id: 0, name: A.addr, offset: 0, size: 8, alignment: 8, local-offset: 0 } - - { id: 1, offset: 8, size: 4, alignment: 4 } -body: | - bb.0.entry: - liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9, $sgpr0_sgpr1_sgpr2_sgpr3 - - $flat_scr_lo = S_ADD_U32 $sgpr6, $sgpr9, implicit-def $scc - $flat_scr_hi = S_ADDC_U32 $sgpr7, 0, implicit-def $scc, implicit $scc - DBG_VALUE $noreg, 2, !5, !11, debug-location !12 - $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - dead $sgpr6_sgpr7 = KILL $sgpr4_sgpr5 - $sgpr8 = S_MOV_B32 $sgpr5 - $vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec - BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr + 4) - $sgpr8 = S_MOV_B32 $sgpr4, implicit killed $sgpr4_sgpr5 - $vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec - BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr) - S_ENDPGM 0 - -... diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll index 38fddcafaa290..49c2bf08ba3e1 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll @@ -855,10 +855,10 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32, ; multiple. ; FUNC-LABEL: {{^}}packed_struct_argument_alignment: ; HSA-GFX9: kernarg_segment_byte_size = 28 -; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 -; HSA-GFX9: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x4 ; HSA-GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:17 ; HSA-GFX9: global_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:13 +; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 +; HSA-GFX9: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x4 define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) { %val0 = extractvalue <{i32, i64}> %arg0, 0 %val1 = extractvalue <{i32, i64}> %arg0, 1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll index c2aa65bddb7e0..20ef90db98319 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll @@ -5,44 +5,37 @@ declare i32 @llvm.amdgcn.ballot.i32(i1) ; Test ballot(0) -define i32 @test0() { -; CHECK-LABEL: test0: +define amdgpu_cs i32 @constant_false() { +; CHECK-LABEL: constant_false: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: ; implicit-def: $vcc_hi -; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: ; return to shader part epilog %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 0) ret i32 %ballot } ; Test ballot(1) -define i32 @test1() { -; CHECK-LABEL: test1: +define amdgpu_cs i32 @constant_true() { +; CHECK-LABEL: constant_true: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: v_mov_b32_e32 v0, exec_lo +; CHECK-NEXT: s_mov_b32 s0, exec_lo ; CHECK-NEXT: ; implicit-def: $vcc_hi -; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: ; return to shader part epilog %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 1) ret i32 %ballot } ; Test ballot of a non-comparison operation -define i32 @test2(i32 %x) { -; CHECK-LABEL: test2: +define amdgpu_cs i32 @non_compare(i32 %x) { +; CHECK-LABEL: non_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 ; CHECK-NEXT: ; implicit-def: $vcc_hi -; CHECK-NEXT: v_cmp_ne_u32_e64 s4, 0, v0 -; CHECK-NEXT: v_mov_b32_e32 v0, s4 -; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 +; CHECK-NEXT: ; return to shader part epilog %trunc = trunc i32 %x to i1 %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %trunc) ret i32 %ballot @@ -50,43 +43,34 @@ define i32 @test2(i32 %x) { ; Test ballot of comparisons -define i32 @test3(i32 %x, i32 %y) { -; CHECK-LABEL: test3: +define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) { +; CHECK-LABEL: compare_ints: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: v_cmp_eq_u32_e64 s4, v0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e64 s0, v0, v1 ; CHECK-NEXT: ; implicit-def: $vcc_hi -; CHECK-NEXT: v_mov_b32_e32 v0, s4 -; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: ; return to shader part epilog %cmp = icmp eq i32 %x, %y %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp) ret i32 %ballot } -define i32 @test4(i32 %x) { -; CHECK-LABEL: test4: +define amdgpu_cs i32 @compare_int_with_constant(i32 %x) { +; CHECK-LABEL: compare_int_with_constant: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: v_cmp_lt_i32_e64 s4, 0x62, v0 +; CHECK-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0 ; CHECK-NEXT: ; implicit-def: $vcc_hi -; CHECK-NEXT: v_mov_b32_e32 v0, s4 -; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: ; return to shader part epilog %cmp = icmp sge i32 %x, 99 %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp) ret i32 %ballot } -define i32 @test5(float %x, float %y) { -; CHECK-LABEL: test5: +define amdgpu_cs i32 @compare_floats(float %x, float %y) { +; CHECK-LABEL: compare_floats: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: v_cmp_gt_f32_e64 s4, v0, v1 +; CHECK-NEXT: v_cmp_gt_f32_e64 s0, v0, v1 ; CHECK-NEXT: ; implicit-def: $vcc_hi -; CHECK-NEXT: v_mov_b32_e32 v0, s4 -; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: ; return to shader part epilog %cmp = fcmp ogt float %x, %y %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp) ret i32 %ballot diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll index 97678bf309cbc..69066011a56c4 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll @@ -5,41 +5,36 @@ declare i64 @llvm.amdgcn.ballot.i64(i1) ; Test ballot(0) -define i64 @test0() { -; CHECK-LABEL: test0: +define amdgpu_cs i64 @constant_false() { +; CHECK-LABEL: constant_false: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: s_mov_b32 s0, 0 +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: ; return to shader part epilog %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 0) ret i64 %ballot } ; Test ballot(1) -define i64 @test1() { -; CHECK-LABEL: test1: +define amdgpu_cs i64 @constant_true() { +; CHECK-LABEL: constant_true: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v0, exec_lo -; CHECK-NEXT: v_mov_b32_e32 v1, exec_hi -; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: s_mov_b32 s0, exec_lo +; CHECK-NEXT: s_mov_b32 s1, exec_hi +; CHECK-NEXT: ; return to shader part epilog %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 1) ret i64 %ballot } ; Test ballot of a non-comparison operation -define i64 @test2(i32 %x) { -; CHECK-LABEL: test2: +define amdgpu_cs i64 @non_compare(i32 %x) { +; CHECK-LABEL: non_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 -; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 -; CHECK-NEXT: v_mov_b32_e32 v0, s4 -; CHECK-NEXT: v_mov_b32_e32 v1, s5 -; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v0 +; CHECK-NEXT: ; return to shader part epilog %trunc = trunc i32 %x to i1 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %trunc) ret i64 %ballot @@ -47,41 +42,32 @@ define i64 @test2(i32 %x) { ; Test ballot of comparisons -define i64 @test3(i32 %x, i32 %y) { -; CHECK-LABEL: test3: +define amdgpu_cs i64 @compare_ints(i32 %x, i32 %y) { +; CHECK-LABEL: compare_ints: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v1 -; CHECK-NEXT: v_mov_b32_e32 v0, s4 -; CHECK-NEXT: v_mov_b32_e32 v1, s5 -; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], v0, v1 +; CHECK-NEXT: ; return to shader part epilog %cmp = icmp eq i32 %x, %y %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) ret i64 %ballot } -define i64 @test4(i32 %x) { -; CHECK-LABEL: test4: +define amdgpu_cs i64 @compare_int_with_constant(i32 %x) { +; CHECK-LABEL: compare_int_with_constant: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s4, 0x62 -; CHECK-NEXT: v_cmp_lt_i32_e64 s[4:5], s4, v0 -; CHECK-NEXT: v_mov_b32_e32 v0, s4 -; CHECK-NEXT: v_mov_b32_e32 v1, s5 -; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: s_movk_i32 s0, 0x62 +; CHECK-NEXT: v_cmp_lt_i32_e64 s[0:1], s0, v0 +; CHECK-NEXT: ; return to shader part epilog %cmp = icmp sge i32 %x, 99 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) ret i64 %ballot } -define i64 @test5(float %x, float %y) { -; CHECK-LABEL: test5: +define amdgpu_cs i64 @compare_floats(float %x, float %y) { +; CHECK-LABEL: compare_floats: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cmp_gt_f32_e64 s[4:5], v0, v1 -; CHECK-NEXT: v_mov_b32_e32 v0, s4 -; CHECK-NEXT: v_mov_b32_e32 v1, s5 -; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: v_cmp_gt_f32_e64 s[0:1], v0, v1 +; CHECK-NEXT: ; return to shader part epilog %cmp = fcmp ogt float %x, %y %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) ret i64 %ballot diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll index e47dc1ea61c92..42fc7ccbc15d4 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll @@ -11,6 +11,7 @@ ;CHECK: buffer_atomic_swap v0, v2, s[0:3], 0 offen glc ;CHECK: s_waitcnt vmcnt(0) ;CHECK: buffer_atomic_swap v0, v[1:2], s[0:3], 0 idxen offen glc +;SICI: v_mov_b32_e32 v1, 0x2000 ;CHECK: s_waitcnt vmcnt(0) ;CHECK: buffer_atomic_swap v0, v2, s[0:3], 0 offen offset:42 glc ;CHECK-DAG: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll index e7668f0f76df2..822fd79c2ec38 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll @@ -79,7 +79,7 @@ main_body: ;CHECK-NOT: s_waitcnt ;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 idxen glc ;CHECK: s_waitcnt vmcnt(0) -;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v2, s[0:3], 0 idxen glc +;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 idxen glc ;CHECK: s_waitcnt vmcnt(0) ;CHECK: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc ;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen glc diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll index 3f3807b274dfb..5ba8edb2c1c04 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll @@ -1559,24 +1559,22 @@ define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out ; VI-LABEL: simplify_bfe_u32_multi_use_arg: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_mov_b32 s10, s2 -; VI-NEXT: s_mov_b32 s11, s3 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: s_mov_b32 s2, s10 +; VI-NEXT: s_mov_b32 s3, s11 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s4, s6 -; VI-NEXT: s_mov_b32 s5, s7 -; VI-NEXT: s_mov_b32 s6, s2 -; VI-NEXT: s_mov_b32 s7, s3 +; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; VI-NEXT: s_mov_b32 s8, s4 +; VI-NEXT: s_mov_b32 s9, s5 +; VI-NEXT: s_mov_b32 s0, s6 +; VI-NEXT: s_mov_b32 s1, s7 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_and_b32_e32 v0, 63, v0 ; VI-NEXT: v_bfe_u32 v1, v0, 2, 2 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0 -; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: buffer_store_dword v1, off, s[8:11], 0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll index 27002f6bfb8b5..b86f444440cec 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll @@ -118,20 +118,20 @@ define amdgpu_kernel void @cos_v2f16(<2 x half> addrspace(1)* %r, <2 x half> add ; GFX9-LABEL: cos_v2f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x3118 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: v_mov_b32_e32 v1, 0x3118 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mul_f16_e32 v1, 0.15915494, v0 -; GFX9-NEXT: v_cos_f16_e32 v3, v1 -; GFX9-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_cos_f16_e32 v2, v0 +; GFX9-NEXT: v_mul_f16_e32 v2, 0.15915494, v0 +; GFX9-NEXT: v_cos_f16_e32 v2, v2 +; GFX9-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_cos_f16_e32 v3, v0 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v3 +; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: s_endpgm %a.val = load <2 x half>, <2 x half> addrspace(1)* %a diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll index 7e7af3e586a75..70d6c2c173c11 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll @@ -173,8 +173,8 @@ define amdgpu_kernel void @fma_v2f16( ; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] -; VI-DAG: v_fma_f16 v[[R_F16_1:[0-9]+]], v[[C_F16_1]], s[[A_F16]], v[[B_F16_1]] -; VI-DAG: v_fma_f16 v[[R_F16_0:[0-9]+]], v[[C_V2_F16]], s[[A_F16]], v[[B_V2_F16]] +; VI-DAG: v_fma_f16 v[[R_F16_1:[0-9]+]], v[[B_F16_1]], s[[A_F16]], v[[C_F16_1]] +; VI-DAG: v_fma_f16 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], s[[A_F16]], v[[C_V2_F16]] ; GFX9: v_pk_fma_f16 v[[R_V2_F16:[0-9]+]], v[[C_V2_F16]], s[[A_F16]], v[[B_V2_F16]] @@ -198,8 +198,9 @@ define amdgpu_kernel void @fma_v2f16_imm_a( ; SI: buffer_load_dword v[[C_V2_F16:[0-9]+]] ; SI: buffer_load_dword v[[A_V2_F16:[0-9]+]] -; VIGFX9: buffer_load_dword v[[A_V2_F16:[0-9]+]] -; VIGFX9: buffer_load_dword v[[C_V2_F16:[0-9]+]] +; VI: buffer_load_dword v[[C_V2_F16:[0-9]+]] +; VIGFX9: buffer_load_dword v[[A_V2_F16:[0-9]+]] +; GFX9: buffer_load_dword v[[C_V2_F16:[0-9]+]] ; SI: s_mov_b32 s[[B_F32:[0-9]+]], 0x40400000{{$}} ; VIGFX9: s_movk_i32 s[[B_F16:[0-9]+]], 0x4200{{$}} @@ -243,8 +244,9 @@ define amdgpu_kernel void @fma_v2f16_imm_b( ; SI: buffer_load_dword v[[B_V2_F16:[0-9]+]] ; SI: buffer_load_dword v[[A_V2_F16:[0-9]+]] -; VIGFX9: buffer_load_dword v[[A_V2_F16:[0-9]+]] +; GFX9: buffer_load_dword v[[A_V2_F16:[0-9]+]] ; VIGFX9: buffer_load_dword v[[B_V2_F16:[0-9]+]] +; VI: buffer_load_dword v[[A_V2_F16:[0-9]+]] ; SI: s_mov_b32 s[[C_F32:[0-9]+]], 0x40400000{{$}} ; VIGFX9: s_movk_i32 s[[C_F16:[0-9]+]], 0x4200{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll index c0c6f4f4b93b0..60f1e71a7a222 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll @@ -43,17 +43,17 @@ define amdgpu_kernel void @maxnum_f16( ; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_mov_b32 s10, s2 +; VI-NEXT: s_mov_b32 s14, s2 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s4, s6 -; VI-NEXT: s_mov_b32 s5, s7 -; VI-NEXT: s_mov_b32 s6, s2 -; VI-NEXT: s_mov_b32 s7, s3 +; VI-NEXT: s_mov_b32 s12, s6 +; VI-NEXT: s_mov_b32 s13, s7 +; VI-NEXT: s_mov_b32 s15, s3 +; VI-NEXT: s_mov_b32 s10, s2 ; VI-NEXT: s_mov_b32 s11, s3 -; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 +; VI-NEXT: buffer_load_ushort v0, off, s[12:15], 0 ; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 +; VI-NEXT: s_mov_b32 s0, s4 +; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: v_max_f16_e32 v0, v0, v0 ; VI-NEXT: s_waitcnt vmcnt(0) @@ -68,17 +68,17 @@ define amdgpu_kernel void @maxnum_f16( ; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: s_mov_b32 s10, s2 +; GFX9-NEXT: s_mov_b32 s14, s2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s2 -; GFX9-NEXT: s_mov_b32 s7, s3 +; GFX9-NEXT: s_mov_b32 s12, s6 +; GFX9-NEXT: s_mov_b32 s13, s7 +; GFX9-NEXT: s_mov_b32 s15, s3 +; GFX9-NEXT: s_mov_b32 s10, s2 ; GFX9-NEXT: s_mov_b32 s11, s3 -; GFX9-NEXT: buffer_load_ushort v0, off, s[4:7], 0 +; GFX9-NEXT: buffer_load_ushort v0, off, s[12:15], 0 ; GFX9-NEXT: buffer_load_ushort v1, off, s[8:11], 0 +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s1, s5 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -292,17 +292,17 @@ define amdgpu_kernel void @maxnum_v2f16( ; GFX9-LABEL: maxnum_v2f16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dword s6, s[6:7], 0x0 +; GFX9-NEXT: s_load_dword s7, s[0:1], 0x0 ; GFX9-NEXT: s_mov_b32 s0, s4 ; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NEXT: s_load_dword s5, s[8:9], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v1, s4, s4 -; GFX9-NEXT: v_pk_max_f16 v0, s5, s5 +; GFX9-NEXT: v_pk_max_f16 v1, s6, s6 +; GFX9-NEXT: v_pk_max_f16 v0, s7, s7 ; GFX9-NEXT: v_pk_max_f16 v0, v1, v0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm @@ -362,18 +362,18 @@ define amdgpu_kernel void @maxnum_v2f16_imm_a( ; ; GFX9-LABEL: maxnum_v2f16_imm_a: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NEXT: s_mov_b32 s1, s5 +; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX9-NEXT: s_mov_b32 s4, s0 +; GFX9-NEXT: s_mov_b32 s0, 0x44004200 +; GFX9-NEXT: s_mov_b32 s5, s1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, s4, s4 -; GFX9-NEXT: s_mov_b32 s4, 0x44004200 -; GFX9-NEXT: v_pk_max_f16 v0, v0, s4 -; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX9-NEXT: v_pk_max_f16 v0, s2, s2 +; GFX9-NEXT: v_pk_max_f16 v0, v0, s0 +; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %b) #0 { @@ -429,18 +429,18 @@ define amdgpu_kernel void @maxnum_v2f16_imm_b( ; ; GFX9-LABEL: maxnum_v2f16_imm_b: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NEXT: s_mov_b32 s1, s5 +; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX9-NEXT: s_mov_b32 s4, s0 +; GFX9-NEXT: s_mov_b32 s0, 0x42004400 +; GFX9-NEXT: s_mov_b32 s5, s1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, s4, s4 -; GFX9-NEXT: s_mov_b32 s4, 0x42004400 -; GFX9-NEXT: v_pk_max_f16 v0, v0, s4 -; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX9-NEXT: v_pk_max_f16 v0, s2, s2 +; GFX9-NEXT: v_pk_max_f16 v0, v0, s0 +; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a) #0 { @@ -735,12 +735,12 @@ define amdgpu_kernel void @fmax_v4f16_imm_a( ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 ; GFX9-NEXT: s_mov_b32 s0, s4 ; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, s5, s5 -; GFX9-NEXT: v_pk_max_f16 v2, s4, s4 +; GFX9-NEXT: v_pk_max_f16 v0, s7, s7 +; GFX9-NEXT: v_pk_max_f16 v2, s6, s6 ; GFX9-NEXT: v_pk_max_f16 v1, v0, s8 ; GFX9-NEXT: v_pk_max_f16 v0, v2, s9 ; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll index fd3e3212a8ceb..6b0811d4bcd97 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll @@ -43,17 +43,17 @@ define amdgpu_kernel void @minnum_f16_ieee( ; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_mov_b32 s10, s2 +; VI-NEXT: s_mov_b32 s14, s2 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s4, s6 -; VI-NEXT: s_mov_b32 s5, s7 -; VI-NEXT: s_mov_b32 s6, s2 -; VI-NEXT: s_mov_b32 s7, s3 +; VI-NEXT: s_mov_b32 s12, s6 +; VI-NEXT: s_mov_b32 s13, s7 +; VI-NEXT: s_mov_b32 s15, s3 +; VI-NEXT: s_mov_b32 s10, s2 ; VI-NEXT: s_mov_b32 s11, s3 -; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 +; VI-NEXT: buffer_load_ushort v0, off, s[12:15], 0 ; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 +; VI-NEXT: s_mov_b32 s0, s4 +; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: v_max_f16_e32 v0, v0, v0 ; VI-NEXT: s_waitcnt vmcnt(0) @@ -68,17 +68,17 @@ define amdgpu_kernel void @minnum_f16_ieee( ; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: s_mov_b32 s10, s2 +; GFX9-NEXT: s_mov_b32 s14, s2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s2 -; GFX9-NEXT: s_mov_b32 s7, s3 +; GFX9-NEXT: s_mov_b32 s12, s6 +; GFX9-NEXT: s_mov_b32 s13, s7 +; GFX9-NEXT: s_mov_b32 s15, s3 +; GFX9-NEXT: s_mov_b32 s10, s2 ; GFX9-NEXT: s_mov_b32 s11, s3 -; GFX9-NEXT: buffer_load_ushort v0, off, s[4:7], 0 +; GFX9-NEXT: buffer_load_ushort v0, off, s[12:15], 0 ; GFX9-NEXT: buffer_load_ushort v1, off, s[8:11], 0 +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s1, s5 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -315,17 +315,17 @@ define amdgpu_kernel void @minnum_v2f16_ieee( ; GFX9-LABEL: minnum_v2f16_ieee: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dword s6, s[6:7], 0x0 +; GFX9-NEXT: s_load_dword s7, s[0:1], 0x0 ; GFX9-NEXT: s_mov_b32 s0, s4 ; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NEXT: s_load_dword s5, s[8:9], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v1, s4, s4 -; GFX9-NEXT: v_pk_max_f16 v0, s5, s5 +; GFX9-NEXT: v_pk_max_f16 v1, s6, s6 +; GFX9-NEXT: v_pk_max_f16 v0, s7, s7 ; GFX9-NEXT: v_pk_min_f16 v0, v1, v0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm @@ -415,18 +415,18 @@ define amdgpu_kernel void @minnum_v2f16_imm_a( ; ; GFX9-LABEL: minnum_v2f16_imm_a: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NEXT: s_mov_b32 s1, s5 +; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX9-NEXT: s_mov_b32 s4, s0 +; GFX9-NEXT: s_mov_b32 s0, 0x44004200 +; GFX9-NEXT: s_mov_b32 s5, s1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, s4, s4 -; GFX9-NEXT: s_mov_b32 s4, 0x44004200 -; GFX9-NEXT: v_pk_min_f16 v0, v0, s4 -; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX9-NEXT: v_pk_max_f16 v0, s2, s2 +; GFX9-NEXT: v_pk_min_f16 v0, v0, s0 +; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %b) #0 { @@ -482,18 +482,18 @@ define amdgpu_kernel void @minnum_v2f16_imm_b( ; ; GFX9-LABEL: minnum_v2f16_imm_b: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NEXT: s_mov_b32 s1, s5 +; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX9-NEXT: s_mov_b32 s4, s0 +; GFX9-NEXT: s_mov_b32 s0, 0x42004400 +; GFX9-NEXT: s_mov_b32 s5, s1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, s4, s4 -; GFX9-NEXT: s_mov_b32 s4, 0x42004400 -; GFX9-NEXT: v_pk_min_f16 v0, v0, s4 -; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX9-NEXT: v_pk_max_f16 v0, s2, s2 +; GFX9-NEXT: v_pk_min_f16 v0, v0, s0 +; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a) #0 { @@ -788,12 +788,12 @@ define amdgpu_kernel void @fmin_v4f16_imm_a( ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 ; GFX9-NEXT: s_mov_b32 s0, s4 ; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, s5, s5 -; GFX9-NEXT: v_pk_max_f16 v2, s4, s4 +; GFX9-NEXT: v_pk_max_f16 v0, s7, s7 +; GFX9-NEXT: v_pk_max_f16 v2, s6, s6 ; GFX9-NEXT: v_pk_min_f16 v1, v0, s8 ; GFX9-NEXT: v_pk_min_f16 v0, v2, s9 ; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll new file mode 100644 index 0000000000000..5618256dbcdfa --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll @@ -0,0 +1,250 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s + +define i16 @v_powi_f16(i16 %l, i32 %r) { +; GCN-LABEL: v_powi_f16: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GCN-NEXT: v_cvt_f32_i32_e32 v1, v1 +; GCN-NEXT: v_log_f32_e32 v0, v0 +; GCN-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 +; GCN-NEXT: v_exp_f32_e32 v0, v0 +; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] + %l.cast = bitcast i16 %l to half + %res = call half @llvm.powi.f16(half %l.cast, i32 %r) + %res.cast = bitcast half %res to i16 + ret i16 %res.cast +} + +define float @v_powi_f32(float %l, i32 %r) { +; GCN-LABEL: v_powi_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_log_f32_e32 v0, v0 +; GCN-NEXT: v_cvt_f32_i32_e32 v1, v1 +; GCN-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 +; GCN-NEXT: v_exp_f32_e32 v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] + %res = call float @llvm.powi.f32(float %l, i32 %r) + ret float %res +} + +define float @v_powi_0_f32(float %l) { +; GCN-LABEL: v_powi_0_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, 1.0 +; GCN-NEXT: s_setpc_b64 s[30:31] + %res = call float @llvm.powi.f32(float %l, i32 0) + ret float %res +} + +define float @v_powi_1_f32(float %l) { +; GCN-LABEL: v_powi_1_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %res = call float @llvm.powi.f32(float %l, i32 1) + ret float %res +} + +define float @v_powi_neg1_f32(float %l) { +; GFX7-LABEL: v_powi_neg1_f32: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 +; GFX7-NEXT: v_rcp_f32_e32 v2, v1 +; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 +; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 +; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 +; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 +; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 +; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 +; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 +; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_powi_neg1_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 +; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 +; GFX8-NEXT: v_rcp_f32_e32 v3, v1 +; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 +; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 +; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 +; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 +; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 +; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 +; GFX8-NEXT: s_setpc_b64 s[30:31] + %res = call float @llvm.powi.f32(float %l, i32 -1) + ret float %res +} + +define float @v_powi_2_f32(float %l) { +; GCN-LABEL: v_powi_2_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] + %res = call float @llvm.powi.f32(float %l, i32 2) + ret float %res +} + +define float @v_powi_neg2_f32(float %l) { +; GFX7-LABEL: v_powi_neg2_f32: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 +; GFX7-NEXT: v_rcp_f32_e32 v2, v1 +; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 +; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 +; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 +; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 +; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 +; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 +; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 +; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_powi_neg2_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 +; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 +; GFX8-NEXT: v_rcp_f32_e32 v3, v1 +; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 +; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 +; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 +; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 +; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 +; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 +; GFX8-NEXT: s_setpc_b64 s[30:31] + %res = call float @llvm.powi.f32(float %l, i32 -2) + ret float %res +} + +define float @v_powi_4_f32(float %l) { +; GCN-LABEL: v_powi_4_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] + %res = call float @llvm.powi.f32(float %l, i32 4) + ret float %res +} + +define float @v_powi_8_f32(float %l) { +; GCN-LABEL: v_powi_8_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] + %res = call float @llvm.powi.f32(float %l, i32 8) + ret float %res +} + +define float @v_powi_16_f32(float %l) { +; GCN-LABEL: v_powi_16_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] + %res = call float @llvm.powi.f32(float %l, i32 16) + ret float %res +} + +define float @v_powi_128_f32(float %l) { +; GCN-LABEL: v_powi_128_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] + %res = call float @llvm.powi.f32(float %l, i32 128) + ret float %res +} + +define float @v_powi_neg128_f32(float %l) { +; GFX7-LABEL: v_powi_neg128_f32: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 +; GFX7-NEXT: v_rcp_f32_e32 v2, v1 +; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 +; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 +; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 +; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 +; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 +; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 +; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 +; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_powi_neg128_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 +; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 +; GFX8-NEXT: v_rcp_f32_e32 v3, v1 +; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 +; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 +; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 +; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 +; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 +; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 +; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 +; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 +; GFX8-NEXT: s_setpc_b64 s[30:31] + %res = call float @llvm.powi.f32(float %l, i32 -128) + ret float %res +} + +; FIXME: f64 broken +; define double @v_powi_f64(double %l, i32 %r) { +; %res = call double @llvm.powi.f64(double %l, i32 %r) +; ret double %res +; } + +declare half @llvm.powi.f16(half, i32) #0 +declare float @llvm.powi.f32(float, i32) #0 +declare double @llvm.powi.f64(double, i32) #0 + +attributes #0 = { nounwind readnone speculatable willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll index 7d0d4eee1f042..76a218760e8e8 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll @@ -77,15 +77,15 @@ define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspa ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b64 s[0:1], s[6:7] ; SI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 -; SI-NEXT: s_movk_i32 s9, 0xfc01 -; SI-NEXT: s_mov_b32 s7, 0xfffff -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_brev_b32 s8, -2 +; SI-NEXT: s_movk_i32 s7, 0xfc01 +; SI-NEXT: s_mov_b32 s1, 0xfffff +; SI-NEXT: s_mov_b32 s0, -1 +; SI-NEXT: s_brev_b32 s6, -2 ; SI-NEXT: v_mov_b32_e32 v8, 0x3ff00000 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_bfe_u32 v4, v3, 20, 11 -; SI-NEXT: v_add_i32_e32 v6, vcc, s9, v4 -; SI-NEXT: v_lshr_b64 v[4:5], s[6:7], v6 +; SI-NEXT: v_add_i32_e32 v6, vcc, s7, v4 +; SI-NEXT: v_lshr_b64 v[4:5], s[0:1], v6 ; SI-NEXT: v_and_b32_e32 v7, 0x80000000, v3 ; SI-NEXT: v_not_b32_e32 v4, v4 ; SI-NEXT: v_not_b32_e32 v5, v5 @@ -98,7 +98,7 @@ define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspa ; SI-NEXT: v_cndmask_b32_e32 v5, v5, v3, vcc ; SI-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc ; SI-NEXT: v_add_f64 v[6:7], v[2:3], -v[4:5] -; SI-NEXT: v_bfi_b32 v2, s8, v8, v3 +; SI-NEXT: v_bfi_b32 v2, s6, v8, v3 ; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[6:7]|, 0.5 ; SI-NEXT: s_mov_b64 s[6:7], s[2:3] ; SI-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc @@ -117,14 +117,14 @@ define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspa ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_mov_b64 s[0:1], s[6:7] ; CI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 -; CI-NEXT: s_brev_b32 s6, -2 +; CI-NEXT: s_brev_b32 s0, -2 ; CI-NEXT: v_mov_b32_e32 v8, 0x3ff00000 +; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_trunc_f64_e32 v[4:5], v[2:3] ; CI-NEXT: v_add_f64 v[6:7], v[2:3], -v[4:5] -; CI-NEXT: v_bfi_b32 v2, s6, v8, v3 +; CI-NEXT: v_bfi_b32 v2, s0, v8, v3 ; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[6:7]|, 0.5 -; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc ; CI-NEXT: v_mov_b32_e32 v2, 0 ; CI-NEXT: v_add_f64 v[2:3], v[4:5], v[2:3] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll index bd08e37030284..f04c5b2ebf7ab 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll @@ -118,20 +118,20 @@ define amdgpu_kernel void @sin_v2f16(<2 x half> addrspace(1)* %r, <2 x half> add ; GFX9-LABEL: sin_v2f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x3118 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: v_mov_b32_e32 v1, 0x3118 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mul_f16_e32 v1, 0.15915494, v0 -; GFX9-NEXT: v_sin_f16_e32 v3, v1 -; GFX9-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_sin_f16_e32 v2, v0 +; GFX9-NEXT: v_mul_f16_e32 v2, 0.15915494, v0 +; GFX9-NEXT: v_sin_f16_e32 v2, v2 +; GFX9-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_sin_f16_e32 v3, v0 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v3 +; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: s_endpgm %a.val = load <2 x half>, <2 x half> addrspace(1)* %a diff --git a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll index ae8dad231a122..2ac06d9240d22 100644 --- a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll +++ b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll @@ -119,12 +119,12 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_mov_b64 s[0:1], s[6:7] +; GCN-NEXT: buffer_load_dword v1, v[1:2], s[0:3], 0 addr64 ; GCN-NEXT: v_lshlrev_b32_e32 v3, 3, v0 -; GCN-NEXT: buffer_load_dword v0, v[1:2], s[0:3], 0 addr64 ; GCN-NEXT: s_mov_b64 s[6:7], s[2:3] ; GCN-NEXT: v_mov_b32_e32 v4, v2 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v0, 0x800000, v0 +; GCN-NEXT: v_or_b32_e32 v0, 0x800000, v1 ; GCN-NEXT: v_mul_i32_i24_e32 v0, -7, v0 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 ; GCN-NEXT: buffer_store_dwordx2 v[1:2], v[3:4], s[4:7], 0 addr64 diff --git a/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll b/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll index ee9f8fa49a27f..a6196ce7e5708 100644 --- a/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll @@ -87,23 +87,23 @@ define amdgpu_kernel void @v_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> ; VI-LABEL: v_lshr_v2i16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 +; VI-NEXT: v_lshlrev_b32_e32 v4, 2, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: v_add_u32_e32 v2, vcc, 4, v0 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc +; VI-NEXT: flat_load_dword v5, v[0:1] +; VI-NEXT: flat_load_dword v2, v[2:3] +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v4 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: v_add_u32_e32 v4, vcc, 4, v0 -; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: flat_load_dword v1, v[4:5] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b16_e32 v4, v1, v0 -; VI-NEXT: v_lshrrev_b16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; VI-NEXT: v_or_b32_e32 v0, v4, v0 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_lshrrev_b16_e32 v3, v2, v5 +; VI-NEXT: v_lshrrev_b16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_or_b32_e32 v2, v3, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; CI-LABEL: v_lshr_v2i16: @@ -117,14 +117,14 @@ define amdgpu_kernel void @v_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> ; CI-NEXT: s_mov_b64 s[0:1], s[6:7] ; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 -; CI-NEXT: s_mov_b32 s8, 0xffff +; CI-NEXT: s_mov_b32 s0, 0xffff ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: s_waitcnt vmcnt(1) ; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v3 -; CI-NEXT: v_and_b32_e32 v2, s8, v2 -; CI-NEXT: v_and_b32_e32 v3, s8, v3 +; CI-NEXT: v_and_b32_e32 v2, s0, v2 +; CI-NEXT: v_and_b32_e32 v3, s0, v3 ; CI-NEXT: v_lshr_b32_e32 v2, v2, v3 ; CI-NEXT: v_lshr_b32_e32 v3, v4, v5 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 @@ -171,39 +171,39 @@ define amdgpu_kernel void @lshr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] +; VI-NEXT: flat_load_dword v3, v[0:1] ; VI-NEXT: s_lshr_b32 s1, s0, 16 -; VI-NEXT: v_mov_b32_e32 v4, s1 -; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: v_add_u32_e32 v2, vcc, s4, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v2 +; VI-NEXT: v_mov_b32_e32 v2, s1 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b16_e32 v1, s0, v0 -; VI-NEXT: v_lshrrev_b16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_lshrrev_b16_e32 v4, s0, v3 +; VI-NEXT: v_lshrrev_b16_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_or_b32_e32 v2, v4, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; CI-LABEL: lshr_v_s_v2i16: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; CI-NEXT: s_load_dword s0, s[0:1], 0xd -; CI-NEXT: s_mov_b32 s8, 0xffff +; CI-NEXT: s_load_dword s8, s[0:1], 0xd ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_lshr_b32 s9, s0, 16 -; CI-NEXT: s_and_b32 s10, s0, s8 ; CI-NEXT: s_mov_b64 s[0:1], s[6:7] ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 +; CI-NEXT: s_mov_b32 s0, 0xffff +; CI-NEXT: s_lshr_b32 s1, s8, 16 +; CI-NEXT: s_and_b32 s8, s8, s0 ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; CI-NEXT: v_and_b32_e32 v2, s8, v2 -; CI-NEXT: v_lshrrev_b32_e32 v3, s9, v3 -; CI-NEXT: v_lshrrev_b32_e32 v2, s10, v2 +; CI-NEXT: v_and_b32_e32 v2, s0, v2 +; CI-NEXT: v_lshrrev_b32_e32 v3, s1, v3 +; CI-NEXT: v_lshrrev_b32_e32 v2, s8, v2 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 @@ -246,39 +246,39 @@ define amdgpu_kernel void @lshr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] +; VI-NEXT: flat_load_dword v3, v[0:1] ; VI-NEXT: s_lshr_b32 s1, s0, 16 -; VI-NEXT: v_mov_b32_e32 v4, s1 -; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: v_add_u32_e32 v2, vcc, s4, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v2 +; VI-NEXT: v_mov_b32_e32 v2, s1 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b16_e64 v1, v0, s0 -; VI-NEXT: v_lshrrev_b16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_lshrrev_b16_e64 v4, v3, s0 +; VI-NEXT: v_lshrrev_b16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v2, v4, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; CI-LABEL: lshr_s_v_v2i16: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; CI-NEXT: s_load_dword s0, s[0:1], 0xd -; CI-NEXT: s_mov_b32 s8, 0xffff +; CI-NEXT: s_load_dword s8, s[0:1], 0xd ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_lshr_b32 s9, s0, 16 -; CI-NEXT: s_and_b32 s10, s0, s8 ; CI-NEXT: s_mov_b64 s[0:1], s[6:7] ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 +; CI-NEXT: s_mov_b32 s0, 0xffff +; CI-NEXT: s_lshr_b32 s1, s8, 16 +; CI-NEXT: s_and_b32 s8, s8, s0 ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; CI-NEXT: v_and_b32_e32 v2, s8, v2 -; CI-NEXT: v_lshr_b32_e32 v3, s9, v3 -; CI-NEXT: v_lshr_b32_e32 v2, s10, v2 +; CI-NEXT: v_and_b32_e32 v2, s0, v2 +; CI-NEXT: v_lshr_b32_e32 v3, s1, v3 +; CI-NEXT: v_lshr_b32_e32 v2, s8, v2 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 @@ -320,15 +320,15 @@ define amdgpu_kernel void @lshr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b16_e64 v1, v0, 8 -; VI-NEXT: v_lshrrev_b16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_lshrrev_b16_e64 v2, v3, 8 +; VI-NEXT: v_lshrrev_b16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v2, v2, v3 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; CI-LABEL: lshr_imm_v_v2i16: @@ -428,45 +428,45 @@ define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> ; GFX9-LABEL: v_lshr_v4i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: global_load_dwordx2 v[4:5], v[0:1], off +; GFX9-NEXT: global_load_dwordx2 v[2:3], v[0:1], off ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off offset:8 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: v_mov_b32_e32 v5, s1 +; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, s0, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_pk_lshrrev_b16 v1, v1, v5 -; GFX9-NEXT: v_pk_lshrrev_b16 v0, v0, v4 -; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX9-NEXT: v_pk_lshrrev_b16 v1, v1, v3 +; GFX9-NEXT: v_pk_lshrrev_b16 v0, v0, v2 +; GFX9-NEXT: global_store_dwordx2 v[4:5], v[0:1], off ; GFX9-NEXT: s_endpgm ; ; VI-LABEL: v_lshr_v4i16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; VI-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: v_add_u32_e32 v4, vcc, 8, v0 -; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; VI-NEXT: v_add_u32_e32 v2, vcc, 8, v0 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; VI-NEXT: flat_load_dwordx2 v[4:5], v[4:5] +; VI-NEXT: flat_load_dwordx2 v[2:3], v[2:3] +; VI-NEXT: v_mov_b32_e32 v5, s1 +; VI-NEXT: v_add_u32_e32 v4, vcc, s0, v4 +; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b16_e32 v6, v5, v1 -; VI-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; VI-NEXT: v_lshrrev_b16_e32 v5, v4, v0 -; VI-NEXT: v_lshrrev_b16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_lshrrev_b16_e32 v6, v3, v1 +; VI-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_lshrrev_b16_e32 v3, v2, v0 +; VI-NEXT: v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-NEXT: v_or_b32_e32 v1, v6, v1 -; VI-NEXT: v_or_b32_e32 v0, v5, v0 -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-NEXT: v_or_b32_e32 v0, v3, v0 +; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] ; VI-NEXT: s_endpgm ; ; CI-LABEL: v_lshr_v4i16: @@ -480,7 +480,7 @@ define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> ; CI-NEXT: s_mov_b64 s[0:1], s[6:7] ; CI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 ; CI-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8 -; CI-NEXT: s_mov_b32 s8, 0xffff +; CI-NEXT: s_mov_b32 s0, 0xffff ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: s_waitcnt vmcnt(1) ; CI-NEXT: v_lshrrev_b32_e32 v6, 16, v2 @@ -488,10 +488,10 @@ define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; CI-NEXT: v_lshrrev_b32_e32 v9, 16, v5 -; CI-NEXT: v_and_b32_e32 v2, s8, v2 -; CI-NEXT: v_and_b32_e32 v4, s8, v4 -; CI-NEXT: v_and_b32_e32 v3, s8, v3 -; CI-NEXT: v_and_b32_e32 v5, s8, v5 +; CI-NEXT: v_and_b32_e32 v2, s0, v2 +; CI-NEXT: v_and_b32_e32 v4, s0, v4 +; CI-NEXT: v_and_b32_e32 v3, s0, v3 +; CI-NEXT: v_and_b32_e32 v5, s0, v5 ; CI-NEXT: v_lshr_b32_e32 v3, v3, v5 ; CI-NEXT: v_lshr_b32_e32 v5, v7, v9 ; CI-NEXT: v_lshr_b32_e32 v2, v2, v4 @@ -565,13 +565,13 @@ define amdgpu_kernel void @lshr_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_mov_b64 s[0:1], s[6:7] ; CI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 -; CI-NEXT: s_mov_b32 s8, 0xff00ff +; CI-NEXT: s_mov_b32 s0, 0xff00ff ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v3, 8, v3 ; CI-NEXT: v_lshrrev_b32_e32 v2, 8, v2 -; CI-NEXT: v_and_b32_e32 v3, s8, v3 -; CI-NEXT: v_and_b32_e32 v2, s8, v2 +; CI-NEXT: v_and_b32_e32 v3, s0, v3 +; CI-NEXT: v_and_b32_e32 v2, s0, v2 ; CI-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64 ; CI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir b/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir index a394d344cdd65..4ed0e400b7a75 100644 --- a/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir +++ b/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir @@ -2,7 +2,7 @@ # GCN-LABEL: name: cluster_add_addc # GCN: S_NOP 0, implicit-def $vcc -# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec +# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec # GCN: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %3, 0, implicit $exec name: cluster_add_addc registers: @@ -19,7 +19,7 @@ body: | bb.0: %0 = V_MOV_B32_e32 0, implicit $exec %1 = V_MOV_B32_e32 0, implicit $exec - %2, %3 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2, %3 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec %6 = V_MOV_B32_e32 0, implicit $exec %7 = V_MOV_B32_e32 0, implicit $exec S_NOP 0, implicit def $vcc @@ -27,9 +27,9 @@ body: | ... # GCN-LABEL: name: interleave_add64s -# GCN: dead %8:vgpr_32, %9:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec +# GCN: dead %8:vgpr_32, %9:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec # GCN-NEXT: dead %12:vgpr_32, dead %13:sreg_64_xexec = V_ADDC_U32_e64 %4, %5, %9, 0, implicit $exec -# GCN-NEXT: dead %10:vgpr_32, %11:sreg_64_xexec = V_ADD_I32_e64 %2, %3, 0, implicit $exec +# GCN-NEXT: dead %10:vgpr_32, %11:sreg_64_xexec = V_ADD_CO_U32_e64 %2, %3, 0, implicit $exec # GCN-NEXT: dead %14:vgpr_32, dead %15:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %11, 0, implicit $exec name: interleave_add64s registers: @@ -61,8 +61,8 @@ body: | %6 = V_MOV_B32_e32 0, implicit $exec %7 = V_MOV_B32_e32 0, implicit $exec - %8, %9 = V_ADD_I32_e64 %0, %1, 0, implicit $exec - %10, %11 = V_ADD_I32_e64 %2, %3, 0, implicit $exec + %8, %9 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec + %10, %11 = V_ADD_CO_U32_e64 %2, %3, 0, implicit $exec %12, %13 = V_ADDC_U32_e64 %4, %5, %9, 0, implicit $exec @@ -93,7 +93,7 @@ body: | ... # GCN-LABEL: name: no_cluster_add_addc_diff_sgpr -# GCN: dead %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec +# GCN: dead %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec # GCN-NEXT: %6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec # GCN-NEXT: %7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec # GCN-NEXT: S_NOP 0, implicit-def $vcc @@ -115,7 +115,7 @@ body: | %0 = V_MOV_B32_e32 0, implicit $exec %1 = V_MOV_B32_e32 0, implicit $exec %8 = S_MOV_B64 0 - %2, %3 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2, %3 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec %6 = V_MOV_B32_e32 0, implicit $exec %7 = V_MOV_B32_e32 0, implicit $exec S_NOP 0, implicit def $vcc @@ -123,7 +123,7 @@ body: | ... # GCN-LABEL: name: cluster_sub_subb # GCN: S_NOP 0, implicit-def $vcc -# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_SUB_I32_e64 %0, %1, 0, implicit $exec +# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_SUB_CO_U32_e64 %0, %1, 0, implicit $exec # GCN: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_SUBB_U32_e64 %6, %7, %3, 0, implicit $exec name: cluster_sub_subb registers: @@ -140,7 +140,7 @@ body: | bb.0: %0 = V_MOV_B32_e32 0, implicit $exec %1 = V_MOV_B32_e32 0, implicit $exec - %2, %3 = V_SUB_I32_e64 %0, %1, 0, implicit $exec + %2, %3 = V_SUB_CO_U32_e64 %0, %1, 0, implicit $exec %6 = V_MOV_B32_e32 0, implicit $exec %7 = V_MOV_B32_e32 0, implicit $exec S_NOP 0, implicit def $vcc @@ -149,7 +149,7 @@ body: | # GCN-LABEL: name: cluster_subrev_subbrev # GCN: S_NOP 0, implicit-def $vcc -# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_SUBREV_I32_e64 %0, %1, 0, implicit $exec +# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_SUBREV_CO_U32_e64 %0, %1, 0, implicit $exec # GCN: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_SUBBREV_U32_e64 %6, %7, %3, 0, implicit $exec name: cluster_subrev_subbrev registers: @@ -166,7 +166,7 @@ body: | bb.0: %0 = V_MOV_B32_e32 0, implicit $exec %1 = V_MOV_B32_e32 0, implicit $exec - %2, %3 = V_SUBREV_I32_e64 %0, %1, 0, implicit $exec + %2, %3 = V_SUBREV_CO_U32_e64 %0, %1, 0, implicit $exec %6 = V_MOV_B32_e32 0, implicit $exec %7 = V_MOV_B32_e32 0, implicit $exec S_NOP 0, implicit def $vcc diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll index c92244b4f9903..d7134729c149f 100644 --- a/llvm/test/CodeGen/AMDGPU/madak.ll +++ b/llvm/test/CodeGen/AMDGPU/madak.ll @@ -39,7 +39,8 @@ define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float add ; it. ; GCN-LABEL: {{^}}madak_2_use_f32: -; GFX8_9_10: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 +; GFX9: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 +; GFX10: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 ; GFX6-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GFX6-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; GFX6-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 @@ -47,6 +48,7 @@ define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float add ; GFX8_9_10: {{flat|global}}_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}} ; GFX8_9_10: {{flat|global}}_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}} ; GFX6-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 +; GFX8-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 ; GFX6_8_9-DAG: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 ; GFX10-MAD-DAG:v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 ; FMA-DAG: v_fmaak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 diff --git a/llvm/test/CodeGen/AMDGPU/max.i16.ll b/llvm/test/CodeGen/AMDGPU/max.i16.ll index 7f7f2bae57aea..b77bb5bb14b57 100644 --- a/llvm/test/CodeGen/AMDGPU/max.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/max.i16.ll @@ -73,16 +73,16 @@ define amdgpu_kernel void @v_test_imax_sge_v2i16(<2 x i16> addrspace(1)* %out, < ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v4 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: flat_load_dword v1, v[2:3] -; VI-NEXT: v_mov_b32_e32 v5, s5 -; VI-NEXT: v_add_u32_e32 v4, vcc, s4, v4 -; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; VI-NEXT: flat_load_dword v5, v[0:1] +; VI-NEXT: flat_load_dword v2, v[2:3] +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v4 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_max_i16_e32 v2, v0, v1 -; VI-NEXT: v_max_i16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; VI-NEXT: v_or_b32_e32 v0, v2, v0 -; VI-NEXT: flat_store_dword v[4:5], v0 +; VI-NEXT: v_max_i16_e32 v3, v5, v2 +; VI-NEXT: v_max_i16_sdwa v2, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_or_b32_e32 v2, v3, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_imax_sge_v2i16: @@ -124,63 +124,64 @@ define amdgpu_kernel void @v_test_imax_sge_v3i16(<3 x i16> addrspace(1)* %out, < ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; VI-NEXT: v_lshlrev_b32_e32 v4, 3, v0 +; VI-NEXT: v_lshlrev_b32_e32 v6, 3, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s7 -; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v4 +; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v6 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v4 +; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v6 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: v_mov_b32_e32 v5, s5 -; VI-NEXT: v_add_u32_e32 v4, vcc, s4, v4 -; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; VI-NEXT: v_add_u32_e32 v6, vcc, 4, v0 -; VI-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc -; VI-NEXT: flat_load_ushort v6, v[6:7] -; VI-NEXT: flat_load_dword v7, v[0:1] +; VI-NEXT: v_add_u32_e32 v4, vcc, 4, v0 +; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; VI-NEXT: flat_load_ushort v4, v[4:5] +; VI-NEXT: flat_load_dword v5, v[0:1] ; VI-NEXT: v_add_u32_e32 v0, vcc, 4, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc -; VI-NEXT: flat_load_ushort v0, v[0:1] -; VI-NEXT: flat_load_dword v8, v[2:3] -; VI-NEXT: v_add_u32_e32 v2, vcc, 4, v4 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v5, vcc +; VI-NEXT: flat_load_dword v7, v[2:3] +; VI-NEXT: flat_load_ushort v8, v[0:1] +; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v6 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: v_add_u32_e32 v2, vcc, 4, v0 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) -; VI-NEXT: v_max_i16_e32 v0, v6, v0 +; VI-NEXT: v_max_i16_e32 v6, v5, v7 +; VI-NEXT: v_max_i16_sdwa v5, v5, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_max_i16_e32 v1, v7, v8 -; VI-NEXT: v_max_i16_sdwa v7, v7, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; VI-NEXT: v_or_b32_e32 v1, v1, v7 -; VI-NEXT: flat_store_short v[2:3], v0 -; VI-NEXT: flat_store_dword v[4:5], v1 +; VI-NEXT: v_max_i16_e32 v4, v4, v8 +; VI-NEXT: v_or_b32_e32 v5, v6, v5 +; VI-NEXT: flat_store_short v[2:3], v4 +; VI-NEXT: flat_store_dword v[0:1], v5 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_imax_sge_v3i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v5, 3, v0 +; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 -; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s7 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v4 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v5 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v4 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v5 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc -; GFX9-NEXT: global_load_short_d16 v7, v[0:1], off offset:4 -; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: global_load_short_d16 v6, v[2:3], off offset:4 -; GFX9-NEXT: global_load_dword v1, v[2:3], off -; GFX9-NEXT: v_mov_b32_e32 v5, s5 -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, s4, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc +; GFX9-NEXT: global_load_short_d16 v6, v[0:1], off offset:4 +; GFX9-NEXT: global_load_dword v7, v[0:1], off +; GFX9-NEXT: global_load_short_d16 v4, v[2:3], off offset:4 +; GFX9-NEXT: global_load_dword v2, v[2:3], off +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s4, v5 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: v_pk_max_i16 v3, v6, v4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_pk_max_i16 v0, v0, v1 -; GFX9-NEXT: v_pk_max_i16 v1, v7, v6 -; GFX9-NEXT: global_store_short v[4:5], v1, off offset:4 -; GFX9-NEXT: global_store_dword v[4:5], v0, off +; GFX9-NEXT: v_pk_max_i16 v2, v7, v2 +; GFX9-NEXT: global_store_short v[0:1], v3, off offset:4 +; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep0 = getelementptr <3 x i16>, <3 x i16> addrspace(1)* %aptr, i32 %tid @@ -441,16 +442,16 @@ define amdgpu_kernel void @v_test_umax_ugt_v2i16(<2 x i16> addrspace(1)* %out, < ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v4 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: flat_load_dword v1, v[2:3] -; VI-NEXT: v_mov_b32_e32 v5, s5 -; VI-NEXT: v_add_u32_e32 v4, vcc, s4, v4 -; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; VI-NEXT: flat_load_dword v5, v[0:1] +; VI-NEXT: flat_load_dword v2, v[2:3] +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v4 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_max_u16_e32 v2, v0, v1 -; VI-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; VI-NEXT: v_or_b32_e32 v0, v2, v0 -; VI-NEXT: flat_store_dword v[4:5], v0 +; VI-NEXT: v_max_u16_e32 v3, v5, v2 +; VI-NEXT: v_max_u16_sdwa v2, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_or_b32_e32 v2, v3, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_umax_ugt_v2i16: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir index 99348a57b9f6a..a9545e6641587 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir @@ -54,7 +54,7 @@ body: | S_WAITCNT 127 $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc - $vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec + $vgpr0 = V_ADD_CO_U32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`) $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5 $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir index f52275af48c9e..6a037a77784e4 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir @@ -148,7 +148,7 @@ body: | S_WAITCNT 127 $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc - $vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec + $vgpr0 = V_ADD_CO_U32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (non-temporal load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr) $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5 $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir index c543b80454b62..0dfa137999f3a 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir @@ -128,7 +128,7 @@ body: | S_WAITCNT 127 $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc - $vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec + $vgpr0 = V_ADD_CO_U32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr) $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5 $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.ll b/llvm/test/CodeGen/AMDGPU/memory_clause.ll index bc4c0d03db932..a5baa34ea3c79 100644 --- a/llvm/test/CodeGen/AMDGPU/memory_clause.ll +++ b/llvm/test/CodeGen/AMDGPU/memory_clause.ll @@ -51,38 +51,38 @@ bb: define amdgpu_kernel void @scalar_clause(<4 x i32> addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture %arg1) { ; GCN-LABEL: scalar_clause: ; GCN: ; %bb.0: ; %bb -; GCN-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x24 -; GCN-NEXT: s_load_dwordx2 s[18:19], s[0:1], 0x2c -; GCN-NEXT: s_nop 0 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_load_dwordx4 s[0:3], s[16:17], 0x0 -; GCN-NEXT: s_load_dwordx4 s[4:7], s[16:17], 0x10 -; GCN-NEXT: s_load_dwordx4 s[8:11], s[16:17], 0x20 -; GCN-NEXT: s_load_dwordx4 s[12:15], s[16:17], 0x30 -; GCN-NEXT: v_mov_b32_e32 v16, s18 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_mov_b32_e32 v4, s4 -; GCN-NEXT: v_mov_b32_e32 v8, s8 -; GCN-NEXT: v_mov_b32_e32 v12, s12 -; GCN-NEXT: v_mov_b32_e32 v17, s19 -; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: v_mov_b32_e32 v2, s2 -; GCN-NEXT: v_mov_b32_e32 v3, s3 -; GCN-NEXT: v_mov_b32_e32 v5, s5 -; GCN-NEXT: v_mov_b32_e32 v6, s6 -; GCN-NEXT: v_mov_b32_e32 v7, s7 -; GCN-NEXT: v_mov_b32_e32 v9, s9 -; GCN-NEXT: v_mov_b32_e32 v10, s10 -; GCN-NEXT: v_mov_b32_e32 v11, s11 -; GCN-NEXT: v_mov_b32_e32 v13, s13 -; GCN-NEXT: v_mov_b32_e32 v14, s14 -; GCN-NEXT: v_mov_b32_e32 v15, s15 -; GCN-NEXT: global_store_dwordx4 v[16:17], v[0:3], off -; GCN-NEXT: global_store_dwordx4 v[16:17], v[4:7], off offset:16 -; GCN-NEXT: global_store_dwordx4 v[16:17], v[8:11], off offset:32 -; GCN-NEXT: global_store_dwordx4 v[16:17], v[12:15], off offset:48 -; GCN-NEXT: s_endpgm +; GCN-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x24 +; GCN-NEXT: s_load_dwordx2 s[18:19], s[0:1], 0x2c +; GCN-NEXT: s_nop 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_load_dwordx4 s[0:3], s[16:17], 0x0 +; GCN-NEXT: s_load_dwordx4 s[4:7], s[16:17], 0x10 +; GCN-NEXT: s_load_dwordx4 s[8:11], s[16:17], 0x20 +; GCN-NEXT: s_load_dwordx4 s[12:15], s[16:17], 0x30 +; GCN-NEXT: v_mov_b32_e32 v12, s18 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v4, s4 +; GCN-NEXT: v_mov_b32_e32 v8, s8 +; GCN-NEXT: v_mov_b32_e32 v13, s19 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_mov_b32_e32 v2, s2 +; GCN-NEXT: v_mov_b32_e32 v3, s3 +; GCN-NEXT: v_mov_b32_e32 v5, s5 +; GCN-NEXT: v_mov_b32_e32 v6, s6 +; GCN-NEXT: v_mov_b32_e32 v7, s7 +; GCN-NEXT: global_store_dwordx4 v[12:13], v[0:3], off +; GCN-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:16 +; GCN-NEXT: v_mov_b32_e32 v0, s12 +; GCN-NEXT: v_mov_b32_e32 v9, s9 +; GCN-NEXT: v_mov_b32_e32 v10, s10 +; GCN-NEXT: v_mov_b32_e32 v11, s11 +; GCN-NEXT: v_mov_b32_e32 v1, s13 +; GCN-NEXT: v_mov_b32_e32 v2, s14 +; GCN-NEXT: v_mov_b32_e32 v3, s15 +; GCN-NEXT: global_store_dwordx4 v[12:13], v[8:11], off offset:32 +; GCN-NEXT: global_store_dwordx4 v[12:13], v[0:3], off offset:48 +; GCN-NEXT: s_endpgm bb: %tmp = load <4 x i32>, <4 x i32> addrspace(1)* %arg, align 16 %tmp2 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 1 @@ -108,46 +108,56 @@ define void @mubuf_clause(<4 x i32> addrspace(5)* noalias nocapture readonly %ar ; GCN-NEXT: v_and_b32_e32 v2, 0x3ff, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 4, v2 ; GCN-NEXT: v_add_u32_e32 v0, v0, v2 +; GCN-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:4 +; GCN-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:8 +; GCN-NEXT: buffer_load_dword v6, v0, s[0:3], 0 offen offset:12 +; GCN-NEXT: buffer_load_dword v7, v0, s[0:3], 0 offen offset:16 +; GCN-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen offset:20 +; GCN-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen offset:24 +; GCN-NEXT: buffer_load_dword v10, v0, s[0:3], 0 offen offset:28 +; GCN-NEXT: buffer_load_dword v11, v0, s[0:3], 0 offen offset:32 +; GCN-NEXT: buffer_load_dword v12, v0, s[0:3], 0 offen offset:36 +; GCN-NEXT: buffer_load_dword v13, v0, s[0:3], 0 offen offset:40 +; GCN-NEXT: buffer_load_dword v14, v0, s[0:3], 0 offen offset:44 +; GCN-NEXT: buffer_load_dword v15, v0, s[0:3], 0 offen offset:48 +; GCN-NEXT: buffer_load_dword v16, v0, s[0:3], 0 offen offset:52 +; GCN-NEXT: buffer_load_dword v17, v0, s[0:3], 0 offen offset:56 ; GCN-NEXT: v_add_u32_e32 v1, v1, v2 -; GCN-NEXT: buffer_load_dword v6, v0, s[0:3], 0 offen offset:20 -; GCN-NEXT: buffer_load_dword v7, v0, s[0:3], 0 offen offset:24 -; GCN-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen offset:28 -; GCN-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen offset:32 -; GCN-NEXT: buffer_load_dword v10, v0, s[0:3], 0 offen offset:36 -; GCN-NEXT: buffer_load_dword v11, v0, s[0:3], 0 offen offset:40 -; GCN-NEXT: buffer_load_dword v12, v0, s[0:3], 0 offen offset:44 -; GCN-NEXT: buffer_load_dword v13, v0, s[0:3], 0 offen offset:48 -; GCN-NEXT: buffer_load_dword v14, v0, s[0:3], 0 offen offset:52 -; GCN-NEXT: buffer_load_dword v15, v0, s[0:3], 0 offen offset:56 -; GCN-NEXT: buffer_load_dword v16, v0, s[0:3], 0 offen offset:60 -; GCN-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 -; GCN-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8 -; GCN-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12 -; GCN-NEXT: s_nop 0 -; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:16 +; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:60 ; GCN-NEXT: s_nop 0 -; GCN-NEXT: s_waitcnt vmcnt(4) -; GCN-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen -; GCN-NEXT: s_waitcnt vmcnt(4) -; GCN-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 -; GCN-NEXT: s_waitcnt vmcnt(4) -; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8 -; GCN-NEXT: s_waitcnt vmcnt(4) -; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12 -; GCN-NEXT: s_waitcnt vmcnt(4) -; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen offset:16 -; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen offset:20 -; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen offset:24 -; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen offset:28 -; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen offset:32 -; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen offset:36 -; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen offset:40 -; GCN-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen offset:44 -; GCN-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen offset:48 -; GCN-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen offset:52 -; GCN-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen offset:56 -; GCN-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen offset:60 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:4 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:8 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen offset:12 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen offset:16 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen offset:20 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen offset:24 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen offset:28 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen offset:32 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen offset:36 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen offset:40 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen offset:44 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen offset:48 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen offset:52 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v17, v1, s[0:3], 0 offen offset:56 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen offset:60 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] bb: diff --git a/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir b/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir index 5e13ed178aaa0..deee04f7ce213 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir @@ -8,9 +8,9 @@ # GCN-LABEL: name: ds_combine_base_offset{{$}} -# VI: V_ADD_I32_e64 %6, %0, +# VI: V_ADD_CO_U32_e64 %6, %0, # VI-NEXT: DS_WRITE2_B32 killed %7, %0, %3, 0, 8, -# VI: V_ADD_I32_e64 %10, %3, +# VI: V_ADD_CO_U32_e64 %10, %3, # VI-NEXT: DS_READ2_B32 killed %11, 0, 8, # GFX9: V_ADD_U32_e64 %6, %0, @@ -91,9 +91,9 @@ body: | # GCN-LABEL: name: ds_combine_base_offset_subreg{{$}} -# VI: V_ADD_I32_e64 %6, %0.sub0, +# VI: V_ADD_CO_U32_e64 %6, %0.sub0, # VI-NEXT: DS_WRITE2_B32 killed %7, %0.sub0, %3.sub0, 0, 8, -# VI: V_ADD_I32_e64 %10, %3.sub0, +# VI: V_ADD_CO_U32_e64 %10, %3.sub0, # VI-NEXT: DS_READ2_B32 killed %11, 0, 8, # GFX9: V_ADD_U32_e64 %6, %0.sub0, diff --git a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir index dcc58ba956e30..743594b91bbe2 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir @@ -107,7 +107,7 @@ body: | %6:vreg_64 = DS_READ2_B32 %1, 16, 17, 0, implicit $m0, implicit $exec :: (load 8 from %ir.ptr.64, align 4) %3:vgpr_32 = COPY %6.sub0 %4:vgpr_32 = DS_READ_B32 %1, 4, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.4) - %5:vgpr_32 = V_ADD_I32_e32 killed %3, killed %4, implicit-def $vcc, implicit $exec + %5:vgpr_32 = V_ADD_CO_U32_e32 killed %3, killed %4, implicit-def $vcc, implicit $exec DS_WRITE_B32 killed %1, %5, 0, 0, implicit killed $m0, implicit $exec :: (store 4 into %ir.ptr.0) S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir index be3e6284b103b..8efdccadf6273 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir @@ -223,7 +223,7 @@ body: | # ADDR64: %16:sgpr_32 = S_MOV_B32 0 # ADDR64: %17:sgpr_32 = S_MOV_B32 61440 # ADDR64: %18:sgpr_128 = REG_SEQUENCE %15, %subreg.sub0_sub1, %16, %subreg.sub2, %17, %subreg.sub3 -# ADDR64: %9:vgpr_32, %12:sreg_64_xexec = V_ADD_I32_e64 %14.sub0, %4.sub0, 0, implicit $exec +# ADDR64: %9:vgpr_32, %12:sreg_64_xexec = V_ADD_CO_U32_e64 %14.sub0, %4.sub0, 0, implicit $exec # ADDR64: %10:vgpr_32, dead %13:sreg_64_xexec = V_ADDC_U32_e64 %14.sub1, %4.sub1, killed %12, 0, implicit $exec # ADDR64: %11:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %10, %subreg.sub1 # ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %18, 0, 0, 0, 0, 0, 0, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll index 7328529596a22..cb619955f4d13 100644 --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -57,7 +57,7 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3 ; GFX9-NEXT: v_and_b32_e32 v5, 1, v18 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v5 ; GFX9-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1 -; GFX9-NEXT: s_and_saveexec_b64 s[10:11], s[4:5] +; GFX9-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] ; GFX9-NEXT: s_cbranch_execz BB1_3 ; GFX9-NEXT: ; %bb.1: ; %bb19 ; GFX9-NEXT: v_cvt_f32_u32_e32 v7, v6 @@ -67,7 +67,7 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v4, v7 ; GFX9-NEXT: v_lshlrev_b32_e32 v6, 2, v2 ; GFX9-NEXT: v_add_u32_e32 v7, v17, v12 -; GFX9-NEXT: s_mov_b64 s[12:13], 0 +; GFX9-NEXT: s_mov_b64 s[10:11], 0 ; GFX9-NEXT: BB1_2: ; %bb23 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_cvt_f32_u32_e32 v8, v0 @@ -76,32 +76,32 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 ; GFX9-NEXT: v_madak_f32 v8, v8, v4, 0x3727c5ac ; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1 ; GFX9-NEXT: v_mul_u32_u24_e32 v18, v8, v5 ; GFX9-NEXT: v_add_u32_e32 v8, v8, v16 -; GFX9-NEXT: v_cmp_lt_u32_e64 s[6:7], v8, v13 +; GFX9-NEXT: v_cmp_lt_u32_e64 s[4:5], v8, v13 ; GFX9-NEXT: v_mul_lo_u32 v8, v8, v15 ; GFX9-NEXT: v_sub_u32_e32 v19, v9, v18 -; GFX9-NEXT: v_cmp_lt_u32_e64 s[8:9], v19, v14 -; GFX9-NEXT: s_and_b64 s[6:7], s[6:7], s[8:9] +; GFX9-NEXT: v_cmp_lt_u32_e64 s[6:7], v19, v14 +; GFX9-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GFX9-NEXT: v_sub_u32_e32 v12, v12, v18 -; GFX9-NEXT: s_and_b64 s[6:7], s[6:7], vcc ; GFX9-NEXT: v_add_u32_e32 v8, v12, v8 +; GFX9-NEXT: s_and_b64 s[4:5], s[4:5], vcc ; GFX9-NEXT: v_mov_b32_e32 v9, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] ; GFX9-NEXT: v_lshlrev_b64 v[8:9], 2, v[8:9] -; GFX9-NEXT: s_or_b64 s[12:13], s[4:5], s[12:13] -; GFX9-NEXT: v_add_co_u32_e64 v8, s[4:5], v10, v8 -; GFX9-NEXT: v_addc_co_u32_e64 v9, s[4:5], v11, v9, s[4:5] +; GFX9-NEXT: v_add_co_u32_e64 v8, s[6:7], v10, v8 +; GFX9-NEXT: v_addc_co_u32_e64 v9, s[6:7], v11, v9, s[6:7] ; GFX9-NEXT: global_load_dword v8, v[8:9], off +; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v0, v1 +; GFX9-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] ; GFX9-NEXT: ds_write_b32 v3, v8 ; GFX9-NEXT: v_add_u32_e32 v3, v3, v6 -; GFX9-NEXT: s_andn2_b64 exec, exec, s[12:13] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[10:11] ; GFX9-NEXT: s_cbranch_execnz BB1_2 ; GFX9-NEXT: BB1_3: ; %Flow3 -; GFX9-NEXT: s_or_b64 exec, exec, s[10:11] +; GFX9-NEXT: s_or_b64 exec, exec, s[8:9] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] bb: diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll index 144b3f2599bf0..147d406a14f1a 100644 --- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll +++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll @@ -367,7 +367,6 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 ; GCN: v_cmp_ne_u32_e32 vcc, 7, v0 ; GCN: {{^}}[[FLOW]]: -; GCN: s_cbranch_vccnz [[FLOW1:BB[0-9]+]] ; GCN: s_or_b64 exec, exec ; GCN: v_mov_b32_e32 v0, 2.0 diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll index f7538c081e6d4..1b36d3003eb74 100644 --- a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll @@ -103,9 +103,9 @@ define i8 @flat_inst_valu_offset_neg_11bit_max(i8* %p) { ; GFX9-LABEL: flat_inst_valu_offset_neg_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -483,10 +483,10 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(i8* %p) { ; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc -; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -510,10 +510,10 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(i8* %p) { ; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc -; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -537,10 +537,10 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(i8* %p) { ; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc -; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -591,10 +591,10 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(i8* %p) { ; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc -; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -773,9 +773,9 @@ define amdgpu_kernel void @flat_inst_salu_offset_neg_11bit_max(i8* %p) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_store_byte v[0:1], v0 ; GFX9-NEXT: s_endpgm @@ -1269,10 +1269,11 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split0(i8* ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc -; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_store_byte v[0:1], v0 ; GFX9-NEXT: s_endpgm @@ -1303,10 +1304,11 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split1(i8* ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc -; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_store_byte v[0:1], v0 ; GFX9-NEXT: s_endpgm @@ -1337,10 +1339,11 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split0(i8* ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc -; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_store_byte v[0:1], v0 ; GFX9-NEXT: s_endpgm @@ -1408,9 +1411,9 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split0(i8* ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc -; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_store_byte v[0:1], v0 ; GFX9-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll index add4e687926b1..731a95b2a3e9b 100644 --- a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll @@ -471,10 +471,10 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc -; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2049 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -482,10 +482,10 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0, v0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo -; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761 @@ -498,10 +498,10 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc -; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -509,10 +509,10 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo -; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760 @@ -525,10 +525,10 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc -; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -552,10 +552,10 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc -; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1213,10 +1213,11 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc -; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2049 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_byte v[0:1], v0, off ; GFX9-NEXT: s_endpgm @@ -1227,9 +1228,9 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0, s0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, s0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo -; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm @@ -1246,10 +1247,11 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc -; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_byte v[0:1], v0, off ; GFX9-NEXT: s_endpgm @@ -1260,9 +1262,9 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, s0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo -; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm @@ -1279,10 +1281,11 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(i ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc -; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_byte v[0:1], v0, off ; GFX9-NEXT: s_endpgm @@ -1314,9 +1317,9 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(i ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc -; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_byte v[0:1], v0, off ; GFX9-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir index 4a69057b1f10a..e5a7421dbd5de 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir @@ -104,7 +104,7 @@ body: | ; GCN-LABEL: name: cndmask_cmp_cbranch_fold_undef ; GCN: bb.0: ; GCN: successors: %bb.1(0x80000000) - ; GCN: $vcc = S_ANDN2_B64 $exec, undef %1:sreg_64_xexec, implicit-def $scc + ; GCN: $vcc = S_ANDN2_B64 $exec, undef %1:sreg_64_xexec, implicit-def dead $scc ; GCN: S_CBRANCH_VCCZ %bb.1, implicit $vcc ; GCN: bb.1: bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir index 91bb625ddad7c..7da54744b8c43 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir @@ -2,7 +2,7 @@ # GCN: name: negated_cond_vop2 # GCN: %0:sgpr_32 = IMPLICIT_DEF -# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def $scc +# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop2 @@ -24,7 +24,7 @@ body: | # GCN: name: negated_cond_vop3 # GCN: %0:sgpr_32 = IMPLICIT_DEF -# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def $scc +# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop3 @@ -116,7 +116,7 @@ body: | # GCN: name: negated_cond_vop3_imp_vcc # GCN: $vcc_lo = IMPLICIT_DEF -# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, $vcc_lo, implicit-def $scc +# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, $vcc_lo, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop3_imp_vcc @@ -138,7 +138,7 @@ body: | # GCN: name: negated_cond_vop2_imp_vcc # GCN: $vcc_lo = IMPLICIT_DEF -# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, $vcc_lo, implicit-def $scc +# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, $vcc_lo, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop2_imp_vcc @@ -187,7 +187,7 @@ body: | # GCN: name: negated_cond_vop2_used_sel # GCN: %0:sgpr_32 = IMPLICIT_DEF # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec -# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def $scc +# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop2_used_sel @@ -213,7 +213,7 @@ body: | # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec # GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec # GCN-NEXT: $sgpr0_sgpr1 = COPY $vcc -# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def $scc +# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop2_used_vcc @@ -289,7 +289,7 @@ body: | # GCN: name: negated_cond_vop3_sel_right_subreg1 # GCN: %0:sgpr_32 = IMPLICIT_DEF # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF -# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def $scc +# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop3_sel_right_subreg1 @@ -313,7 +313,7 @@ body: | # GCN: name: negated_cond_vop3_sel_right_subreg2 # GCN: %0:sgpr_32 = IMPLICIT_DEF # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF -# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def $scc +# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop3_sel_right_subreg2 diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir index c70474bf8c390..24e1ec81cb3ad 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir @@ -2,7 +2,7 @@ # GCN: name: negated_cond_vop2 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF -# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc +# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop2 @@ -24,7 +24,7 @@ body: | # GCN: name: negated_cond_vop3 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF -# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc +# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop3 @@ -142,7 +142,7 @@ body: | # GCN: name: negated_cond_vop3_imp_vcc # GCN: $vcc = IMPLICIT_DEF -# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, $vcc, implicit-def $scc +# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, $vcc, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop3_imp_vcc @@ -164,7 +164,7 @@ body: | # GCN: name: negated_cond_vop2_imp_vcc # GCN: $vcc = IMPLICIT_DEF -# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, $vcc, implicit-def $scc +# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, $vcc, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop2_imp_vcc @@ -213,7 +213,7 @@ body: | # GCN: name: negated_cond_vop2_used_sel # GCN: %0:sreg_64_xexec = IMPLICIT_DEF # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec -# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc +# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop2_used_sel @@ -239,7 +239,7 @@ body: | # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec # GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec # GCN-NEXT: $sgpr0_sgpr1 = COPY $vcc -# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc +# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop2_used_vcc @@ -315,7 +315,7 @@ body: | # GCN: name: negated_cond_vop3_sel_right_subreg1 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF -# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc +# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop3_sel_right_subreg1 @@ -339,7 +339,7 @@ body: | # GCN: name: negated_cond_vop3_sel_right_subreg2 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF -# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc +# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_vop3_sel_right_subreg2 @@ -388,7 +388,7 @@ body: | # GCN: name: negated_cond_vop2_dominated_blocks # GCN: %0:sreg_64_xexec = IMPLICIT_DEF -# GCN: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc +# GCN: $vcc = S_ANDN2_B64 $exec, %0, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit $vcc --- name: negated_cond_vop2_dominated_blocks @@ -466,7 +466,7 @@ body: | # GCN: name: negated_cond_subreg # GCN: %0.sub0_sub1:sgpr_128 = IMPLICIT_DEF -# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0.sub0_sub1, implicit-def $scc +# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0.sub0_sub1, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_subreg diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir index cbb5fa2b68e00..a591713b0b4f3 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir @@ -117,7 +117,7 @@ body: | ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; CHECK: $sgpr28 = S_MOV_B32 8192 - ; CHECK: $vgpr2, dead $sgpr28_sgpr29 = V_ADD_I32_e64 killed $sgpr28, killed $vgpr2, 0, implicit $exec + ; CHECK: $vgpr2, dead $sgpr28_sgpr29 = V_ADD_CO_U32_e64 killed $sgpr28, killed $vgpr2, 0, implicit $exec ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31 ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc ; CHECK: $sgpr33 = frame-setup COPY $sgpr27 @@ -156,7 +156,7 @@ body: | ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31 ; CHECK: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; CHECK: $vcc_lo = S_MOV_B32 8192 - ; CHECK: $vgpr2, dead $vcc = V_ADD_I32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec + ; CHECK: $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31 ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc ; CHECK: $sgpr33 = frame-setup COPY $sgpr27 diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir index 579ba6dfc3f93..96cd14e947e27 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir @@ -32,7 +32,7 @@ body: | ; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) ; GFX8: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX8: $vcc_lo = S_MOV_B32 8192 - ; GFX8: $vgpr3, dead $vcc = V_ADD_I32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec + ; GFX8: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec ; GFX8: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec ; GFX8: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc ; GFX8: $sgpr33 = V_READLANE_B32_vi $vgpr2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/perfhint.ll b/llvm/test/CodeGen/AMDGPU/perfhint.ll index a8990be7f19e1..1fef1423ac4f4 100644 --- a/llvm/test/CodeGen/AMDGPU/perfhint.ll +++ b/llvm/test/CodeGen/AMDGPU/perfhint.ll @@ -1,8 +1,8 @@ ; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_membound: -; MemoryBound: 1 -; WaveLimiterHint : 1 +; GCN: MemoryBound: 1 +; GCN: WaveLimiterHint : 1 define amdgpu_kernel void @test_membound(<4 x i32> addrspace(1)* nocapture readonly %arg, <4 x i32> addrspace(1)* nocapture %arg1) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -30,28 +30,31 @@ bb: } ; GCN-LABEL: {{^}}test_large_stride: -; MemoryBound: 0 -; WaveLimiterHint : 1 +; GCN: MemoryBound: 0 +; GCN: WaveLimiterHint : 1 define amdgpu_kernel void @test_large_stride(i32 addrspace(1)* nocapture %arg) { bb: %tmp = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4096 %tmp1 = load i32, i32 addrspace(1)* %tmp, align 4 + %mul1 = mul i32 %tmp1, %tmp1 %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 - store i32 %tmp1, i32 addrspace(1)* %tmp2, align 4 + store i32 %mul1, i32 addrspace(1)* %tmp2, align 4 %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8192 %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4 + %mul4 = mul i32 %tmp4, %tmp4 %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 - store i32 %tmp4, i32 addrspace(1)* %tmp5, align 4 + store i32 %mul4, i32 addrspace(1)* %tmp5, align 4 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 12288 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4 + %mul7 = mul i32 %tmp7, %tmp7 %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3 - store i32 %tmp7, i32 addrspace(1)* %tmp8, align 4 + store i32 %mul7, i32 addrspace(1)* %tmp8, align 4 ret void } ; GCN-LABEL: {{^}}test_indirect: -; MemoryBound: 0 -; WaveLimiterHint : 1 +; GCN: MemoryBound: 0 +; GCN: WaveLimiterHint : 1 define amdgpu_kernel void @test_indirect(i32 addrspace(1)* nocapture %arg) { bb: %tmp = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 diff --git a/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir index 9059c8edf3e48..1a2d187bed825 100644 --- a/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir +++ b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir @@ -37,7 +37,7 @@ body: | %24:sreg_64 = PHI %20, %bb.3, %22, %bb.0 %23:vgpr_32 = PHI %19, %bb.3, %18, %bb.0 SI_END_CF %24, implicit-def dead $exec, implicit-def dead $scc, implicit $exec - %3:vgpr_32, dead %10:sreg_64 = nsw V_ADD_I32_e64 1, %23, 0, implicit $exec + %3:vgpr_32, dead %10:sreg_64 = nsw V_ADD_CO_U32_e64 1, %23, 0, implicit $exec bb.3: successors: %bb.3(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir index c4fd98098032b..cde23cb76089f 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir @@ -23,20 +23,20 @@ body: | %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 - %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec + %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec - %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 4096 - %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %25, %21, 0, implicit $exec + %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %25, %21, 0, implicit $exec %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec %32:sgpr_32 = S_MOV_B32 6144 - %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec + %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec @@ -45,15 +45,15 @@ body: | # GFX10-LABEL: name: LowestInMiddle # GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 6400 -# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] -# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_5]] +# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] +# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]] # GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1 # GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 1600, 0, 0 # GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0, # # GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 11200 -# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]] -# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_7]] +# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]] +# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]] # GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1 # GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 0, @@ -76,25 +76,25 @@ body: | %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 - %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec + %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec - %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 8000 - %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec + %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec %32:sgpr_32 = S_MOV_B32 6400 - %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec + %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec %39:sgpr_32 = S_MOV_B32 11200 - %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec + %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec %42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec @@ -103,14 +103,14 @@ body: | # GFX10-LABEL: name: NegativeDistance # GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 8192 -# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] -# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_5]] +# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] +# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]] # GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1 # GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0, 0 # GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0 # GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 10240 -# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]] -# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_7]] +# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]] +# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]] # GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1 # GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 0 @@ -133,25 +133,25 @@ body: | %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 - %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec + %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec - %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 6144 - %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec + %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec %32:sgpr_32 = S_MOV_B32 8192 - %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec + %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec %39:sgpr_32 = S_MOV_B32 10240 - %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec + %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec %42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec @@ -178,16 +178,16 @@ body: | %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 - %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec + %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec - %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 6144 - %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec + %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec @@ -205,13 +205,13 @@ body: | %0:vreg_64 = COPY $vgpr0_vgpr1 %1:sgpr_32 = S_MOV_B32 4000 - %2:vgpr_32, %3:sreg_32_xm0_xexec = V_ADD_I32_e64 %0.sub0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec %4:vgpr_32, dead %5:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, 0, 0, implicit $exec %8:sgpr_32 = S_MOV_B32 3000 - %9:vgpr_32, %10:sreg_32_xm0_xexec = V_ADD_I32_e64 %0.sub0, %8, 0, implicit $exec + %9:vgpr_32, %10:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec %11:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, 0, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll index 4c6fa2f0f4c8c..a9a60b93ef54d 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -17,8 +17,8 @@ define amdgpu_kernel void @clmem_read_simplified(i8 addrspace(1)* %buffer) { ; ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 @@ -86,28 +86,29 @@ define hidden amdgpu_kernel void @clmem_read(i8 addrspace(1)* %buffer) { ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off ; -; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 ; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 ; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 ; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 -; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 -; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} entry: %call = tail call i64 @_Z13get_global_idj(i32 0) %conv = and i64 %call, 255 @@ -299,9 +300,9 @@ define amdgpu_kernel void @Offset64(i8 addrspace(1)* %buffer) { ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; ; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 @@ -455,11 +456,11 @@ define amdgpu_kernel void @ReverseOrder(i8 addrspace(1)* %buffer) { ; ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; ; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} @@ -518,11 +519,11 @@ define hidden amdgpu_kernel void @negativeoffset(i8 addrspace(1)* nocapture %buf ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; -; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 -; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} entry: %call = tail call i64 @_Z13get_global_idj(i32 0) #2 %conv = and i64 %call, 255 diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir index aa4bdfe238d68..ec1095040898a 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir @@ -23,20 +23,20 @@ body: | %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 - %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec + %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec - %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 4096 - %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %25, %21, 0, implicit $exec + %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %25, %21, 0, implicit $exec %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec %32:sgpr_32 = S_MOV_B32 6144 - %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec + %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec @@ -45,14 +45,14 @@ body: | # GFX9-LABEL: name: LowestInMiddle # GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11200 -# GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] -# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_5]] +# GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] +# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]] # GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1 # GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -3200, 0, 0 # # GFX9: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 6400 -# GFX9: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]] -# GFX9: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_7]] +# GFX9: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]] +# GFX9: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]] # GFX9: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1 # GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 0, # GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0, @@ -76,25 +76,25 @@ body: | %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 - %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec + %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec - %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 8000 - %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec + %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec %32:sgpr_32 = S_MOV_B32 6400 - %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec + %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec %39:sgpr_32 = S_MOV_B32 11200 - %40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec + %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec @@ -103,9 +103,9 @@ body: | # GFX9-LABEL: name: NegativeDistance # GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10240 -# GFX9: [[V_ADD_I32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] -# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_5]] -# GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_4]], %subreg.sub0, [[BASE_HI]], %subreg.sub1 +# GFX9: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] +# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]] +# GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_4]], %subreg.sub0, [[BASE_HI]], %subreg.sub1 # GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -4096, 0, 0 # GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0, 0 # GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0 @@ -129,25 +129,25 @@ body: | %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 - %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec + %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec - %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 6144 - %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec + %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec %32:sgpr_32 = S_MOV_B32 8192 - %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec + %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec %39:sgpr_32 = S_MOV_B32 10240 - %40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec + %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec @@ -174,16 +174,16 @@ body: | %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 - %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec + %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec - %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 6144 - %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec + %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec @@ -201,13 +201,13 @@ body: | %0:vreg_64 = COPY $vgpr0_vgpr1 %1:sgpr_32 = S_MOV_B32 4000 - %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %0.sub0, %1, 0, implicit $exec + %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, 0, 0, implicit $exec %8:sgpr_32 = S_MOV_B32 3000 - %9:vgpr_32, %10:sreg_64_xexec = V_ADD_I32_e64 %0.sub0, %8, 0, implicit $exec + %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, 0, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir index e4e33026da4b0..93129f20d5a9d 100644 --- a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir +++ b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir @@ -196,7 +196,7 @@ body: | successors: %bb.30(0x30000000), %bb.36(0x50000000) %53 = COPY killed %62 - %47 = V_ADD_I32_e32 -1, %46, implicit-def dead $vcc, implicit $exec + %47 = V_ADD_CO_U32_e32 -1, %46, implicit-def dead $vcc, implicit $exec %48 = V_OR_B32_e32 killed %47, %26, implicit $exec %49 = COPY %53 %49.sub2 = COPY undef %48 diff --git a/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir b/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir index 42f34646f6976..3b4c0a4ef28f8 100644 --- a/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir +++ b/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir @@ -18,12 +18,12 @@ body: | ; GCN: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GCN: [[COPY6:%[0-9]+]]:sgpr_32 = COPY [[COPY3]] ; GCN: [[V_MUL_LO_U32_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[COPY]], [[COPY4]], implicit $exec - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 killed [[V_MUL_LO_U32_]], [[COPY6]], 0, implicit $exec + ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 killed [[V_MUL_LO_U32_]], [[COPY6]], 0, implicit $exec ; GCN: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY4]], [[COPY5]] ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -614296167 ; GCN: [[V_MUL_LO_U32_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[COPY]], [[COPY3]], implicit $exec ; GCN: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_]] - ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 killed [[V_MUL_LO_U32_1]], [[COPY7]], [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 killed [[V_MUL_LO_U32_1]], [[COPY7]], [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GCN: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY4]], [[V_ADDC_U32_e64_]], implicit $exec ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -181084736 ; GCN: [[V_MUL_LO_U32_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_MUL_HI_U32_]], [[S_MOV_B32_1]], implicit $exec @@ -38,7 +38,7 @@ body: | %5:sreg_32 = COPY $sgpr2 %20:vgpr_32 = COPY %3 %7:sreg_32 = S_MUL_I32 %6, %4 - %9:vgpr_32, %10:sreg_64_xexec = V_ADD_I32_e64 killed %7, %20, 0, implicit $exec + %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 killed %7, %20, 0, implicit $exec %8:sreg_32 = S_MUL_HI_U32 %4, %5 %11:sreg_32 = S_MOV_B32 -614296167 %12:sreg_32 = S_MUL_I32 %6, %3 diff --git a/llvm/test/CodeGen/AMDGPU/saddo.ll b/llvm/test/CodeGen/AMDGPU/saddo.ll index f0a7a80670813..fe9e6275e0d58 100644 --- a/llvm/test/CodeGen/AMDGPU/saddo.ll +++ b/llvm/test/CodeGen/AMDGPU/saddo.ll @@ -166,20 +166,18 @@ define amdgpu_kernel void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* ; SI-NEXT: s_mov_b32 s14, s10 ; SI-NEXT: s_mov_b32 s15, s11 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s8, s0 -; SI-NEXT: s_mov_b32 s9, s1 -; SI-NEXT: s_mov_b32 s12, s2 -; SI-NEXT: s_mov_b32 s13, s3 -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_mov_b32 s2, s10 -; SI-NEXT: s_mov_b32 s3, s11 +; SI-NEXT: s_mov_b32 s12, s4 +; SI-NEXT: s_mov_b32 s13, s5 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s7 ; SI-NEXT: s_mov_b32 s6, s10 ; SI-NEXT: s_mov_b32 s7, s11 -; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; SI-NEXT: buffer_load_dword v1, off, s[4:7], 0 +; SI-NEXT: s_mov_b32 s8, s0 +; SI-NEXT: s_mov_b32 s9, s1 +; SI-NEXT: s_mov_b32 s4, s2 +; SI-NEXT: s_mov_b32 s5, s3 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_add_i32_e32 v2, vcc, v1, v0 ; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 @@ -187,19 +185,19 @@ define amdgpu_kernel void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* ; SI-NEXT: s_xor_b64 s[0:1], vcc, s[0:1] ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 -; SI-NEXT: buffer_store_byte v0, off, s[12:15], 0 +; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: v_saddo_i32: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v4, s4 -; VI-NEXT: v_mov_b32_e32 v5, s5 -; VI-NEXT: v_mov_b32_e32 v6, s6 -; VI-NEXT: v_mov_b32_e32 v7, s7 -; VI-NEXT: flat_load_dword v4, v[4:5] -; VI-NEXT: flat_load_dword v5, v[6:7] +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: flat_load_dword v4, v[0:1] +; VI-NEXT: flat_load_dword v5, v[2:3] ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s2 @@ -218,12 +216,12 @@ define amdgpu_kernel void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v4, s4 -; GFX9-NEXT: v_mov_b32_e32 v5, s5 -; GFX9-NEXT: v_mov_b32_e32 v6, s6 -; GFX9-NEXT: v_mov_b32_e32 v7, s7 -; GFX9-NEXT: global_load_dword v4, v[4:5], off -; GFX9-NEXT: global_load_dword v5, v[6:7], off +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_load_dword v4, v[0:1], off +; GFX9-NEXT: global_load_dword v5, v[2:3], off ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 @@ -335,20 +333,18 @@ define amdgpu_kernel void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* ; SI-NEXT: s_mov_b32 s14, s10 ; SI-NEXT: s_mov_b32 s15, s11 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s8, s0 -; SI-NEXT: s_mov_b32 s9, s1 -; SI-NEXT: s_mov_b32 s12, s2 -; SI-NEXT: s_mov_b32 s13, s3 -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_mov_b32 s2, s10 -; SI-NEXT: s_mov_b32 s3, s11 +; SI-NEXT: s_mov_b32 s12, s4 +; SI-NEXT: s_mov_b32 s13, s5 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s7 ; SI-NEXT: s_mov_b32 s6, s10 ; SI-NEXT: s_mov_b32 s7, s11 -; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0 ; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0 +; SI-NEXT: s_mov_b32 s8, s0 +; SI-NEXT: s_mov_b32 s9, s1 +; SI-NEXT: s_mov_b32 s4, s2 +; SI-NEXT: s_mov_b32 s5, s3 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_add_i32_e32 v4, vcc, v0, v2 ; SI-NEXT: v_addc_u32_e32 v5, vcc, v1, v3, vcc @@ -357,57 +353,57 @@ define amdgpu_kernel void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* ; SI-NEXT: buffer_store_dwordx2 v[4:5], off, s[8:11], 0 ; SI-NEXT: s_xor_b64 s[0:1], vcc, s[0:1] ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; SI-NEXT: buffer_store_byte v0, off, s[12:15], 0 +; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: v_saddo_i64: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v4, s4 -; VI-NEXT: v_mov_b32_e32 v5, s5 -; VI-NEXT: v_mov_b32_e32 v6, s6 -; VI-NEXT: v_mov_b32_e32 v7, s7 -; VI-NEXT: flat_load_dwordx2 v[4:5], v[4:5] -; VI-NEXT: flat_load_dwordx2 v[6:7], v[6:7] -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v3, s3 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; VI-NEXT: flat_load_dwordx2 v[2:3], v[2:3] +; VI-NEXT: v_mov_b32_e32 v4, s0 +; VI-NEXT: v_mov_b32_e32 v5, s1 +; VI-NEXT: v_mov_b32_e32 v6, s2 +; VI-NEXT: v_mov_b32_e32 v7, s3 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u32_e32 v8, vcc, v4, v6 -; VI-NEXT: v_addc_u32_e32 v9, vcc, v5, v7, vcc -; VI-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[6:7] -; VI-NEXT: v_cmp_lt_i64_e64 s[0:1], v[8:9], v[4:5] -; VI-NEXT: flat_store_dwordx2 v[0:1], v[8:9] +; VI-NEXT: v_add_u32_e32 v8, vcc, v0, v2 +; VI-NEXT: v_addc_u32_e32 v9, vcc, v1, v3, vcc +; VI-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[2:3] +; VI-NEXT: v_cmp_lt_i64_e64 s[0:1], v[8:9], v[0:1] +; VI-NEXT: flat_store_dwordx2 v[4:5], v[8:9] ; VI-NEXT: s_xor_b64 s[0:1], vcc, s[0:1] ; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; VI-NEXT: flat_store_byte v[2:3], v0 +; VI-NEXT: flat_store_byte v[6:7], v0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_saddo_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v4, s4 -; GFX9-NEXT: v_mov_b32_e32 v5, s5 -; GFX9-NEXT: v_mov_b32_e32 v6, s6 -; GFX9-NEXT: v_mov_b32_e32 v7, s7 -; GFX9-NEXT: global_load_dwordx2 v[4:5], v[4:5], off -; GFX9-NEXT: global_load_dwordx2 v[6:7], v[6:7], off -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: global_load_dwordx2 v[2:3], v[2:3], off +; GFX9-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-NEXT: v_mov_b32_e32 v5, s1 +; GFX9-NEXT: v_mov_b32_e32 v6, s2 +; GFX9-NEXT: v_mov_b32_e32 v7, s3 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v4, v6 -; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, v5, v7, vcc -; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[6:7] -; GFX9-NEXT: v_cmp_lt_i64_e64 s[0:1], v[8:9], v[4:5] -; GFX9-NEXT: global_store_dwordx2 v[0:1], v[8:9], off +; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, v1, v3, vcc +; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[2:3] +; GFX9-NEXT: v_cmp_lt_i64_e64 s[0:1], v[8:9], v[0:1] +; GFX9-NEXT: global_store_dwordx2 v[4:5], v[8:9], off ; GFX9-NEXT: s_xor_b64 s[0:1], vcc, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; GFX9-NEXT: global_store_byte v[2:3], v0, off +; GFX9-NEXT: global_store_byte v[6:7], v0, off ; GFX9-NEXT: s_endpgm %a = load i64, i64 addrspace(1)* %aptr, align 4 %b = load i64, i64 addrspace(1)* %bptr, align 4 @@ -428,20 +424,18 @@ define amdgpu_kernel void @v_saddo_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ; SI-NEXT: s_mov_b32 s14, s10 ; SI-NEXT: s_mov_b32 s15, s11 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s8, s0 -; SI-NEXT: s_mov_b32 s9, s1 -; SI-NEXT: s_mov_b32 s12, s2 -; SI-NEXT: s_mov_b32 s13, s3 -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_mov_b32 s2, s10 -; SI-NEXT: s_mov_b32 s3, s11 +; SI-NEXT: s_mov_b32 s12, s4 +; SI-NEXT: s_mov_b32 s13, s5 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s7 ; SI-NEXT: s_mov_b32 s6, s10 ; SI-NEXT: s_mov_b32 s7, s11 -; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0 ; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0 +; SI-NEXT: s_mov_b32 s8, s0 +; SI-NEXT: s_mov_b32 s9, s1 +; SI-NEXT: s_mov_b32 s12, s2 +; SI-NEXT: s_mov_b32 s13, s3 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_add_i32_e32 v5, vcc, v1, v3 ; SI-NEXT: v_add_i32_e32 v4, vcc, v0, v2 @@ -461,58 +455,58 @@ define amdgpu_kernel void @v_saddo_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v4, s4 -; VI-NEXT: v_mov_b32_e32 v5, s5 -; VI-NEXT: v_mov_b32_e32 v6, s6 -; VI-NEXT: v_mov_b32_e32 v7, s7 -; VI-NEXT: flat_load_dwordx2 v[4:5], v[4:5] -; VI-NEXT: flat_load_dwordx2 v[6:7], v[6:7] -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v3, s3 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; VI-NEXT: flat_load_dwordx2 v[2:3], v[2:3] +; VI-NEXT: v_mov_b32_e32 v4, s0 +; VI-NEXT: v_mov_b32_e32 v5, s1 +; VI-NEXT: v_mov_b32_e32 v6, s2 +; VI-NEXT: v_mov_b32_e32 v7, s3 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u32_e32 v9, vcc, v5, v7 -; VI-NEXT: v_add_u32_e32 v8, vcc, v4, v6 -; VI-NEXT: v_cmp_gt_i32_e64 s[0:1], 0, v7 -; VI-NEXT: v_cmp_lt_i32_e64 s[4:5], v9, v5 -; VI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v6 -; VI-NEXT: v_cmp_lt_i32_e64 s[2:3], v8, v4 +; VI-NEXT: v_add_u32_e32 v9, vcc, v1, v3 +; VI-NEXT: v_add_u32_e32 v8, vcc, v0, v2 +; VI-NEXT: v_cmp_gt_i32_e64 s[0:1], 0, v3 +; VI-NEXT: v_cmp_lt_i32_e64 s[4:5], v9, v1 ; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] -; VI-NEXT: flat_store_dwordx2 v[0:1], v[8:9] +; VI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 +; VI-NEXT: v_cmp_lt_i32_e64 s[2:3], v8, v0 ; VI-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] ; VI-NEXT: s_xor_b64 s[0:1], vcc, s[2:3] ; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-NEXT: flat_store_dwordx2 v[4:5], v[8:9] +; VI-NEXT: flat_store_dwordx2 v[6:7], v[0:1] ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_saddo_v2i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v4, s4 -; GFX9-NEXT: v_mov_b32_e32 v5, s5 -; GFX9-NEXT: v_mov_b32_e32 v6, s6 -; GFX9-NEXT: v_mov_b32_e32 v7, s7 -; GFX9-NEXT: global_load_dwordx2 v[4:5], v[4:5], off -; GFX9-NEXT: global_load_dwordx2 v[6:7], v[6:7], off -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: global_load_dwordx2 v[2:3], v[2:3], off +; GFX9-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-NEXT: v_mov_b32_e32 v5, s1 +; GFX9-NEXT: v_mov_b32_e32 v6, s2 +; GFX9-NEXT: v_mov_b32_e32 v7, s3 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_add_u32_e32 v9, v5, v7 -; GFX9-NEXT: v_add_u32_e32 v8, v4, v6 -; GFX9-NEXT: v_cmp_gt_i32_e64 s[0:1], 0, v7 -; GFX9-NEXT: v_cmp_lt_i32_e64 s[4:5], v9, v5 -; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 0, v6 -; GFX9-NEXT: v_cmp_lt_i32_e64 s[2:3], v8, v4 +; GFX9-NEXT: v_add_u32_e32 v9, v1, v3 +; GFX9-NEXT: v_add_u32_e32 v8, v0, v2 +; GFX9-NEXT: v_cmp_gt_i32_e64 s[0:1], 0, v3 +; GFX9-NEXT: v_cmp_lt_i32_e64 s[4:5], v9, v1 ; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] -; GFX9-NEXT: global_store_dwordx2 v[0:1], v[8:9], off +; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 +; GFX9-NEXT: v_cmp_lt_i32_e64 s[2:3], v8, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] ; GFX9-NEXT: s_xor_b64 s[0:1], vcc, s[2:3] ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX9-NEXT: global_store_dwordx2 v[4:5], v[8:9], off +; GFX9-NEXT: global_store_dwordx2 v[6:7], v[0:1], off ; GFX9-NEXT: s_endpgm %a = load <2 x i32>, <2 x i32> addrspace(1)* %aptr, align 4 %b = load <2 x i32>, <2 x i32> addrspace(1)* %bptr, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll index 091ed34e11121..8b27ee9e652a4 100644 --- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -173,9 +173,9 @@ entry: ; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}} ; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}} ; CI-NOHSA-NOT: v_add -; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16 ; CI-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} ; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} +; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16 ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} @@ -203,9 +203,9 @@ entry: ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16: ; SI: s_mov_b32 {{s[0-9]+}}, 0x13480 -; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64 ; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16 ; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:32 +; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64 ; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:48 ; CI-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}} ; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir index 79af30b8d59ca..d10192d8f0980 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir @@ -36,10 +36,10 @@ body: | ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def dead [[COPY1]], 851978 /* regdef:VGPR_LO16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1 ; CHECK: %11.sub0:vreg_512 = COPY [[COPY]].sub0 ; CHECK: %11.sub3:vreg_512 = COPY [[COPY]].sub3 - ; CHECK: dead %10:vgpr_32 = V_ADD_I32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec ; CHECK: %11.sub2:vreg_512 = COPY undef [[V_MOV_B32_e32_]] ; CHECK: %11.sub5:vreg_512 = COPY undef [[V_MOV_B32_e32_]] ; CHECK: [[COPY2:%[0-9]+]]:vreg_512 = COPY %11 + ; CHECK: dead %10:vgpr_32 = V_ADD_CO_U32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec ; CHECK: S_BRANCH %bb.1 bb.0: liveins: $sgpr6_sgpr7 @@ -55,7 +55,7 @@ body: | %6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec %8:vreg_64 = DS_READ_B64_gfx9 %1, 0, 0, implicit $exec %9:vreg_128 = DS_READ_B128_gfx9 %2, 0, 0, implicit $exec - %10:vgpr_32 = V_ADD_I32_e32 4, %3, implicit-def dead $vcc, implicit $exec + %10:vgpr_32 = V_ADD_CO_U32_e32 4, %3, implicit-def dead $vcc, implicit $exec undef %11.sub0:vreg_512 = COPY %4.sub0 %12:vgpr_32 = COPY %4.sub0 %11.sub1:vreg_512 = COPY %4.sub1 diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir index fd435d4adbe64..b4e4356d4fb31 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir @@ -25,6 +25,10 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, 0, 0, implicit $exec ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, 0, 0, implicit $exec + ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; CHECK: undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec + ; CHECK: dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec + ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, 0, implicit $exec ; CHECK: undef %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec ; CHECK: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; CHECK: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF @@ -32,11 +36,7 @@ body: | ; CHECK: undef %11.sub1:vreg_64 = IMPLICIT_DEF ; CHECK: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]] - ; CHECK: undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec - ; CHECK: dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec ; CHECK: [[DEF6:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, 0, implicit $exec ; CHECK: undef %19.sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $mode, implicit $exec ; CHECK: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; CHECK: %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir index aac40b73a41e0..6e0d016125c53 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir @@ -210,12 +210,12 @@ body: | %12:sreg_32_xm0 = S_MUL_I32 %11, %10.sub1 %13:vgpr_32 = V_MUL_LO_I32 0, %0, implicit $exec %14:vgpr_32 = V_MUL_LO_I32 %1, %10.sub1, implicit $exec - %15:vgpr_32 = V_ADD_I32_e32 0, %13, implicit-def dead $vcc, implicit $exec - %16:vgpr_32 = V_ADD_I32_e32 0, %15, implicit-def dead $vcc, implicit $exec + %15:vgpr_32 = V_ADD_CO_U32_e32 0, %13, implicit-def dead $vcc, implicit $exec + %16:vgpr_32 = V_ADD_CO_U32_e32 0, %15, implicit-def dead $vcc, implicit $exec %17:vgpr_32 = IMPLICIT_DEF %18:sreg_64 = S_MOV_B64 0 %19:sreg_32_xm0_xexec = IMPLICIT_DEF - %20:vgpr_32 = V_ADD_I32_e32 %19, %0, implicit-def dead $vcc, implicit $exec + %20:vgpr_32 = V_ADD_CO_U32_e32 %19, %0, implicit-def dead $vcc, implicit $exec %21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32 %20, 12, %7, 0, implicit $exec %23:vgpr_32 = GLOBAL_LOAD_DWORD %21, 4, 0, 0, 0, implicit $exec %24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32 %20, 48, %8, 0, implicit $exec @@ -236,21 +236,21 @@ body: | undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0, implicit $exec %38.sub0:vreg_64 = COPY %37.sub0 %39:vreg_64 = V_LSHLREV_B64 3, %38, implicit $exec - undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_I32_e64 0, %39.sub0, 0, implicit $exec + undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_CO_U32_e64 0, %39.sub0, 0, implicit $exec %42:vgpr_32 = COPY %33 %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42, %39.sub1, %41, 0, implicit $exec %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40, 0, 0, 0, 0, implicit $exec :: (load 8 from %ir.tmp34) undef %45.sub1:vreg_64 = IMPLICIT_DEF %45.sub0:vreg_64 = COPY %37.sub1 %46:vreg_64 = V_LSHLREV_B64 3, %45, implicit $exec - undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_I32_e64 %32, %46.sub0, 0, implicit $exec + undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_CO_U32_e64 %32, %46.sub0, 0, implicit $exec %49:vgpr_32 = COPY %33 %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49, %46.sub1, %48, 0, implicit $exec %51:vreg_64 = IMPLICIT_DEF undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35, 40, 0, 0, 0, implicit $exec :: (load 4 from %ir.18 + 8) %52.sub1:vreg_64 = IMPLICIT_DEF %53:vreg_64 = V_LSHLREV_B64 3, %52, implicit $exec - undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_I32_e64 0, %53.sub0, 0, implicit $exec + undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_CO_U32_e64 0, %53.sub0, 0, implicit $exec %56:vgpr_32 = COPY %33 %54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1, %55, 0, implicit $exec %58:vreg_64 = IMPLICIT_DEF @@ -262,14 +262,14 @@ body: | undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0, implicit $exec %63.sub0:vreg_64 = COPY %62.sub0 %64:vreg_64 = IMPLICIT_DEF - undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_I32_e64 %60, %64.sub0, 0, implicit $exec + undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_CO_U32_e64 %60, %64.sub0, 0, implicit $exec %67:vgpr_32 = COPY %61 %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67, %64.sub1, %66, 0, implicit $exec %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65, 0, 0, 0, 0, implicit $exec :: (load 16 from %ir.tmp58) undef %70.sub1:vreg_64 = IMPLICIT_DEF %70.sub0:vreg_64 = IMPLICIT_DEF %71:vreg_64 = IMPLICIT_DEF - undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_I32_e64 %60, %71.sub0, 0, implicit $exec + undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_CO_U32_e64 %60, %71.sub0, 0, implicit $exec %74:vgpr_32 = COPY %61 %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1, %73, 0, implicit $exec %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72, 0, 0, 0, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir index d0eea78bd2353..7d1661746087f 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir @@ -64,7 +64,7 @@ body: | ; CHECK: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_SUB_U32_e32_]], [[DEF]].sub0, implicit $exec ; CHECK: [[V_SUB_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 [[V_MUL_LO_U32_1]], [[V_MUL_LO_U32_]], implicit $exec ; CHECK: [[DEF]].sub0:vreg_64 = V_ADD_U32_e32 [[V_SUB_U32_e32_1]], [[V_ADD_U32_e32_1]], implicit $exec - ; CHECK: undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_I32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[DEF]].sub0, 0, implicit $exec + ; CHECK: undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[DEF]].sub0, 0, implicit $exec ; CHECK: undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 [[COPY1]], [[DEF]].sub1, %39, 0, implicit $exec ; CHECK: undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec ; CHECK: %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec @@ -118,7 +118,7 @@ body: | %37:vgpr_32 = COPY %3.sub1 undef %8.sub0:vreg_64 = V_ADD_U32_e32 %36, %35, implicit $exec %8.sub1:vreg_64 = COPY %6 - undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_I32_e64 %3.sub0, %8.sub0, 0, implicit $exec + undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_CO_U32_e64 %3.sub0, %8.sub0, 0, implicit $exec undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 %37, %8.sub1, %39, 0, implicit $exec undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/sdiv.ll b/llvm/test/CodeGen/AMDGPU/sdiv.ll index f51d152fa157d..41c11f3b91538 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv.ll @@ -203,14 +203,14 @@ define amdgpu_kernel void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s10, s2 +; GCN-NEXT: s_mov_b32 s11, s3 ; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s6 +; GCN-NEXT: s_mov_b32 s9, s7 +; GCN-NEXT: buffer_load_dword v0, off, s[8:11], 0 ; GCN-NEXT: s_mov_b32 s0, s4 ; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s2 -; GCN-NEXT: s_mov_b32 s7, s3 -; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: v_lshrrev_b32_e32 v1, 30, v1 @@ -224,14 +224,14 @@ define amdgpu_kernel void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* ; TONGA-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; TONGA-NEXT: s_mov_b32 s3, 0xf000 ; TONGA-NEXT: s_mov_b32 s2, -1 +; TONGA-NEXT: s_mov_b32 s10, s2 +; TONGA-NEXT: s_mov_b32 s11, s3 ; TONGA-NEXT: s_waitcnt lgkmcnt(0) +; TONGA-NEXT: s_mov_b32 s8, s6 +; TONGA-NEXT: s_mov_b32 s9, s7 +; TONGA-NEXT: buffer_load_dword v0, off, s[8:11], 0 ; TONGA-NEXT: s_mov_b32 s0, s4 ; TONGA-NEXT: s_mov_b32 s1, s5 -; TONGA-NEXT: s_mov_b32 s4, s6 -; TONGA-NEXT: s_mov_b32 s5, s7 -; TONGA-NEXT: s_mov_b32 s6, s2 -; TONGA-NEXT: s_mov_b32 s7, s3 -; TONGA-NEXT: buffer_load_dword v0, off, s[4:7], 0 ; TONGA-NEXT: s_waitcnt vmcnt(0) ; TONGA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; TONGA-NEXT: v_lshrrev_b32_e32 v1, 30, v1 @@ -694,14 +694,14 @@ define amdgpu_kernel void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s10, s2 +; GCN-NEXT: s_mov_b32 s11, s3 ; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s6 +; GCN-NEXT: s_mov_b32 s9, s7 +; GCN-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 ; GCN-NEXT: s_mov_b32 s0, s4 ; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s2 -; GCN-NEXT: s_mov_b32 s7, s3 -; GCN-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_ashrrev_i32_e32 v2, 31, v0 ; GCN-NEXT: v_ashrrev_i32_e32 v3, 31, v1 @@ -719,14 +719,14 @@ define amdgpu_kernel void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> ; TONGA-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; TONGA-NEXT: s_mov_b32 s3, 0xf000 ; TONGA-NEXT: s_mov_b32 s2, -1 +; TONGA-NEXT: s_mov_b32 s10, s2 +; TONGA-NEXT: s_mov_b32 s11, s3 ; TONGA-NEXT: s_waitcnt lgkmcnt(0) +; TONGA-NEXT: s_mov_b32 s8, s6 +; TONGA-NEXT: s_mov_b32 s9, s7 +; TONGA-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 ; TONGA-NEXT: s_mov_b32 s0, s4 ; TONGA-NEXT: s_mov_b32 s1, s5 -; TONGA-NEXT: s_mov_b32 s4, s6 -; TONGA-NEXT: s_mov_b32 s5, s7 -; TONGA-NEXT: s_mov_b32 s6, s2 -; TONGA-NEXT: s_mov_b32 s7, s3 -; TONGA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 ; TONGA-NEXT: s_waitcnt vmcnt(0) ; TONGA-NEXT: v_ashrrev_i32_e32 v2, 31, v0 ; TONGA-NEXT: v_ashrrev_i32_e32 v3, 31, v1 @@ -744,14 +744,14 @@ define amdgpu_kernel void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_mov_b32 s10, s2 +; GFX9-NEXT: s_mov_b32 s11, s3 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s8, s6 +; GFX9-NEXT: s_mov_b32 s9, s7 +; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 ; GFX9-NEXT: s_mov_b32 s0, s4 ; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s2 -; GFX9-NEXT: s_mov_b32 s7, s3 -; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v0 ; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v1 @@ -1073,16 +1073,16 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: s_mov_b32 s11, 0xf000 ; GFX9-NEXT: s_mov_b32 s10, -1 -; GFX9-NEXT: s_mov_b32 s4, 0x4f7ffffe +; GFX9-NEXT: s_mov_b32 s6, s10 +; GFX9-NEXT: s_mov_b32 s7, s11 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s4, s2 +; GFX9-NEXT: s_mov_b32 s5, s3 +; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 +; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; GFX9-NEXT: s_mov_b32 s2, 0x4f7ffffe ; GFX9-NEXT: s_mov_b32 s8, s0 ; GFX9-NEXT: s_mov_b32 s9, s1 -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s10 -; GFX9-NEXT: s_mov_b32 s3, s11 -; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v0 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -1120,14 +1120,14 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; GFX9-NEXT: v_cvt_f32_u32_e32 v14, v7 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v10, v10 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v12, v12 -; GFX9-NEXT: v_mul_f32_e32 v8, s4, v8 +; GFX9-NEXT: v_mul_f32_e32 v8, s2, v8 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v14, v14 ; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GFX9-NEXT: v_mul_f32_e32 v10, s4, v10 -; GFX9-NEXT: v_mul_f32_e32 v12, s4, v12 +; GFX9-NEXT: v_mul_f32_e32 v10, s2, v10 +; GFX9-NEXT: v_mul_f32_e32 v12, s2, v12 ; GFX9-NEXT: v_cvt_u32_f32_e32 v10, v10 ; GFX9-NEXT: v_sub_u32_e32 v9, 0, v4 -; GFX9-NEXT: v_mul_f32_e32 v14, s4, v14 +; GFX9-NEXT: v_mul_f32_e32 v14, s2, v14 ; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v12 ; GFX9-NEXT: v_mul_lo_u32 v9, v9, v8 ; GFX9-NEXT: v_cvt_u32_f32_e32 v14, v14 @@ -1330,14 +1330,14 @@ define amdgpu_kernel void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s10, s2 +; GCN-NEXT: s_mov_b32 s11, s3 ; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s6 +; GCN-NEXT: s_mov_b32 s9, s7 +; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; GCN-NEXT: s_mov_b32 s0, s4 ; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s2 -; GCN-NEXT: s_mov_b32 s7, s3 -; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_ashrrev_i32_e32 v4, 31, v0 ; GCN-NEXT: v_ashrrev_i32_e32 v5, 31, v1 @@ -1363,14 +1363,14 @@ define amdgpu_kernel void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> ; TONGA-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; TONGA-NEXT: s_mov_b32 s3, 0xf000 ; TONGA-NEXT: s_mov_b32 s2, -1 +; TONGA-NEXT: s_mov_b32 s10, s2 +; TONGA-NEXT: s_mov_b32 s11, s3 ; TONGA-NEXT: s_waitcnt lgkmcnt(0) +; TONGA-NEXT: s_mov_b32 s8, s6 +; TONGA-NEXT: s_mov_b32 s9, s7 +; TONGA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; TONGA-NEXT: s_mov_b32 s0, s4 ; TONGA-NEXT: s_mov_b32 s1, s5 -; TONGA-NEXT: s_mov_b32 s4, s6 -; TONGA-NEXT: s_mov_b32 s5, s7 -; TONGA-NEXT: s_mov_b32 s6, s2 -; TONGA-NEXT: s_mov_b32 s7, s3 -; TONGA-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 ; TONGA-NEXT: s_waitcnt vmcnt(0) ; TONGA-NEXT: v_ashrrev_i32_e32 v4, 31, v0 ; TONGA-NEXT: v_ashrrev_i32_e32 v5, 31, v1 @@ -1396,14 +1396,14 @@ define amdgpu_kernel void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_mov_b32 s10, s2 +; GFX9-NEXT: s_mov_b32 s11, s3 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s8, s6 +; GFX9-NEXT: s_mov_b32 s9, s7 +; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; GFX9-NEXT: s_mov_b32 s0, s4 ; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s2 -; GFX9-NEXT: s_mov_b32 s7, s3 -; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v0 ; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v1 @@ -1619,17 +1619,17 @@ define amdgpu_kernel void @v_sdiv_i23(i32 addrspace(1)* %out, i23 addrspace(1)* ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s10, s2 +; GCN-NEXT: s_mov_b32 s11, s3 ; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s6 +; GCN-NEXT: s_mov_b32 s9, s7 +; GCN-NEXT: buffer_load_ushort v0, off, s[8:11], 0 +; GCN-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:2 +; GCN-NEXT: buffer_load_ushort v2, off, s[8:11], 0 offset:4 +; GCN-NEXT: buffer_load_ubyte v3, off, s[8:11], 0 offset:6 ; GCN-NEXT: s_mov_b32 s0, s4 ; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s2 -; GCN-NEXT: s_mov_b32 s7, s3 -; GCN-NEXT: buffer_load_ushort v0, off, s[4:7], 0 -; GCN-NEXT: buffer_load_ubyte v1, off, s[4:7], 0 offset:2 -; GCN-NEXT: buffer_load_ushort v2, off, s[4:7], 0 offset:4 -; GCN-NEXT: buffer_load_ubyte v3, off, s[4:7], 0 offset:6 ; GCN-NEXT: s_waitcnt vmcnt(2) ; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GCN-NEXT: v_or_b32_e32 v0, v0, v1 @@ -1660,17 +1660,17 @@ define amdgpu_kernel void @v_sdiv_i23(i32 addrspace(1)* %out, i23 addrspace(1)* ; TONGA-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; TONGA-NEXT: s_mov_b32 s3, 0xf000 ; TONGA-NEXT: s_mov_b32 s2, -1 +; TONGA-NEXT: s_mov_b32 s10, s2 +; TONGA-NEXT: s_mov_b32 s11, s3 ; TONGA-NEXT: s_waitcnt lgkmcnt(0) +; TONGA-NEXT: s_mov_b32 s8, s6 +; TONGA-NEXT: s_mov_b32 s9, s7 +; TONGA-NEXT: buffer_load_ushort v0, off, s[8:11], 0 +; TONGA-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:2 +; TONGA-NEXT: buffer_load_ushort v2, off, s[8:11], 0 offset:4 +; TONGA-NEXT: buffer_load_ubyte v3, off, s[8:11], 0 offset:6 ; TONGA-NEXT: s_mov_b32 s0, s4 ; TONGA-NEXT: s_mov_b32 s1, s5 -; TONGA-NEXT: s_mov_b32 s4, s6 -; TONGA-NEXT: s_mov_b32 s5, s7 -; TONGA-NEXT: s_mov_b32 s6, s2 -; TONGA-NEXT: s_mov_b32 s7, s3 -; TONGA-NEXT: buffer_load_ushort v0, off, s[4:7], 0 -; TONGA-NEXT: buffer_load_ubyte v1, off, s[4:7], 0 offset:2 -; TONGA-NEXT: buffer_load_ushort v2, off, s[4:7], 0 offset:4 -; TONGA-NEXT: buffer_load_ubyte v3, off, s[4:7], 0 offset:6 ; TONGA-NEXT: s_waitcnt vmcnt(2) ; TONGA-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; TONGA-NEXT: v_or_b32_e32 v0, v0, v1 @@ -1701,17 +1701,17 @@ define amdgpu_kernel void @v_sdiv_i23(i32 addrspace(1)* %out, i23 addrspace(1)* ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_mov_b32 s10, s2 +; GFX9-NEXT: s_mov_b32 s11, s3 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s8, s6 +; GFX9-NEXT: s_mov_b32 s9, s7 +; GFX9-NEXT: buffer_load_ushort v0, off, s[8:11], 0 +; GFX9-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:2 +; GFX9-NEXT: buffer_load_ushort v2, off, s[8:11], 0 offset:4 +; GFX9-NEXT: buffer_load_ubyte v3, off, s[8:11], 0 offset:6 ; GFX9-NEXT: s_mov_b32 s0, s4 ; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s2 -; GFX9-NEXT: s_mov_b32 s7, s3 -; GFX9-NEXT: buffer_load_ushort v0, off, s[4:7], 0 -; GFX9-NEXT: buffer_load_ubyte v1, off, s[4:7], 0 offset:2 -; GFX9-NEXT: buffer_load_ushort v2, off, s[4:7], 0 offset:4 -; GFX9-NEXT: buffer_load_ubyte v3, off, s[4:7], 0 offset:6 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 @@ -1802,17 +1802,17 @@ define amdgpu_kernel void @v_sdiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)* ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s10, s2 +; GCN-NEXT: s_mov_b32 s11, s3 ; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s6 +; GCN-NEXT: s_mov_b32 s9, s7 +; GCN-NEXT: buffer_load_ushort v0, off, s[8:11], 0 +; GCN-NEXT: buffer_load_sbyte v1, off, s[8:11], 0 offset:2 +; GCN-NEXT: buffer_load_ushort v2, off, s[8:11], 0 offset:4 +; GCN-NEXT: buffer_load_sbyte v3, off, s[8:11], 0 offset:6 ; GCN-NEXT: s_mov_b32 s0, s4 ; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s2 -; GCN-NEXT: s_mov_b32 s7, s3 -; GCN-NEXT: buffer_load_ushort v0, off, s[4:7], 0 -; GCN-NEXT: buffer_load_sbyte v1, off, s[4:7], 0 offset:2 -; GCN-NEXT: buffer_load_ushort v2, off, s[4:7], 0 offset:4 -; GCN-NEXT: buffer_load_sbyte v3, off, s[4:7], 0 offset:6 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_lshlrev_b32_e32 v4, 16, v3 ; GCN-NEXT: v_or_b32_e32 v2, v2, v4 @@ -1840,17 +1840,17 @@ define amdgpu_kernel void @v_sdiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)* ; TONGA-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; TONGA-NEXT: s_mov_b32 s3, 0xf000 ; TONGA-NEXT: s_mov_b32 s2, -1 +; TONGA-NEXT: s_mov_b32 s10, s2 +; TONGA-NEXT: s_mov_b32 s11, s3 ; TONGA-NEXT: s_waitcnt lgkmcnt(0) +; TONGA-NEXT: s_mov_b32 s8, s6 +; TONGA-NEXT: s_mov_b32 s9, s7 +; TONGA-NEXT: buffer_load_ushort v0, off, s[8:11], 0 +; TONGA-NEXT: buffer_load_sbyte v1, off, s[8:11], 0 offset:2 +; TONGA-NEXT: buffer_load_ushort v2, off, s[8:11], 0 offset:4 +; TONGA-NEXT: buffer_load_sbyte v3, off, s[8:11], 0 offset:6 ; TONGA-NEXT: s_mov_b32 s0, s4 ; TONGA-NEXT: s_mov_b32 s1, s5 -; TONGA-NEXT: s_mov_b32 s4, s6 -; TONGA-NEXT: s_mov_b32 s5, s7 -; TONGA-NEXT: s_mov_b32 s6, s2 -; TONGA-NEXT: s_mov_b32 s7, s3 -; TONGA-NEXT: buffer_load_ushort v0, off, s[4:7], 0 -; TONGA-NEXT: buffer_load_sbyte v1, off, s[4:7], 0 offset:2 -; TONGA-NEXT: buffer_load_ushort v2, off, s[4:7], 0 offset:4 -; TONGA-NEXT: buffer_load_sbyte v3, off, s[4:7], 0 offset:6 ; TONGA-NEXT: s_waitcnt vmcnt(0) ; TONGA-NEXT: v_lshlrev_b32_e32 v4, 16, v3 ; TONGA-NEXT: v_or_b32_e32 v2, v2, v4 @@ -2214,16 +2214,14 @@ define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocaptu ; GCN-NEXT: s_mov_b32 s0, s4 ; GCN-NEXT: s_mov_b32 s1, s5 ; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GCN-NEXT: s_mov_b32 s0, 0x1389c755 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s2 -; GCN-NEXT: s_mov_b32 s7, s3 +; GCN-NEXT: s_mov_b32 s4, 0x1389c755 +; GCN-NEXT: s_mov_b32 s0, s6 +; GCN-NEXT: s_mov_b32 s1, s7 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mul_hi_i32 v0, v0, s0 -; GCN-NEXT: v_mul_hi_i32 v1, v1, s0 -; GCN-NEXT: v_mul_hi_i32 v2, v2, s0 -; GCN-NEXT: v_mul_hi_i32 v3, v3, s0 +; GCN-NEXT: v_mul_hi_i32 v0, v0, s4 +; GCN-NEXT: v_mul_hi_i32 v1, v1, s4 +; GCN-NEXT: v_mul_hi_i32 v2, v2, s4 +; GCN-NEXT: v_mul_hi_i32 v3, v3, s4 ; GCN-NEXT: v_lshrrev_b32_e32 v4, 31, v0 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 12, v0 ; GCN-NEXT: v_lshrrev_b32_e32 v5, 31, v1 @@ -2236,7 +2234,7 @@ define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocaptu ; GCN-NEXT: v_add_i32_e32 v1, vcc, v1, v5 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; GCN-NEXT: s_endpgm ; ; TONGA-LABEL: scalarize_mulhs_4xi32: @@ -2248,16 +2246,14 @@ define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocaptu ; TONGA-NEXT: s_mov_b32 s0, s4 ; TONGA-NEXT: s_mov_b32 s1, s5 ; TONGA-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; TONGA-NEXT: s_mov_b32 s0, 0x1389c755 -; TONGA-NEXT: s_mov_b32 s4, s6 -; TONGA-NEXT: s_mov_b32 s5, s7 -; TONGA-NEXT: s_mov_b32 s6, s2 -; TONGA-NEXT: s_mov_b32 s7, s3 +; TONGA-NEXT: s_mov_b32 s4, 0x1389c755 +; TONGA-NEXT: s_mov_b32 s0, s6 +; TONGA-NEXT: s_mov_b32 s1, s7 ; TONGA-NEXT: s_waitcnt vmcnt(0) -; TONGA-NEXT: v_mul_hi_i32 v0, v0, s0 -; TONGA-NEXT: v_mul_hi_i32 v1, v1, s0 -; TONGA-NEXT: v_mul_hi_i32 v2, v2, s0 -; TONGA-NEXT: v_mul_hi_i32 v3, v3, s0 +; TONGA-NEXT: v_mul_hi_i32 v0, v0, s4 +; TONGA-NEXT: v_mul_hi_i32 v1, v1, s4 +; TONGA-NEXT: v_mul_hi_i32 v2, v2, s4 +; TONGA-NEXT: v_mul_hi_i32 v3, v3, s4 ; TONGA-NEXT: v_lshrrev_b32_e32 v4, 31, v0 ; TONGA-NEXT: v_ashrrev_i32_e32 v0, 12, v0 ; TONGA-NEXT: v_lshrrev_b32_e32 v5, 31, v1 @@ -2270,7 +2266,7 @@ define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocaptu ; TONGA-NEXT: v_add_u32_e32 v1, vcc, v1, v5 ; TONGA-NEXT: v_add_u32_e32 v2, vcc, v2, v6 ; TONGA-NEXT: v_add_u32_e32 v3, vcc, v3, v7 -; TONGA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; TONGA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; TONGA-NEXT: s_endpgm ; ; GFX9-LABEL: scalarize_mulhs_4xi32: @@ -2282,16 +2278,14 @@ define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocaptu ; GFX9-NEXT: s_mov_b32 s0, s4 ; GFX9-NEXT: s_mov_b32 s1, s5 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_mov_b32 s0, 0x1389c755 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s2 -; GFX9-NEXT: s_mov_b32 s7, s3 +; GFX9-NEXT: s_mov_b32 s4, 0x1389c755 +; GFX9-NEXT: s_mov_b32 s0, s6 +; GFX9-NEXT: s_mov_b32 s1, s7 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mul_hi_i32 v0, v0, s0 -; GFX9-NEXT: v_mul_hi_i32 v1, v1, s0 -; GFX9-NEXT: v_mul_hi_i32 v2, v2, s0 -; GFX9-NEXT: v_mul_hi_i32 v3, v3, s0 +; GFX9-NEXT: v_mul_hi_i32 v0, v0, s4 +; GFX9-NEXT: v_mul_hi_i32 v1, v1, s4 +; GFX9-NEXT: v_mul_hi_i32 v2, v2, s4 +; GFX9-NEXT: v_mul_hi_i32 v3, v3, s4 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v0 ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 12, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 31, v1 @@ -2304,7 +2298,7 @@ define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocaptu ; GFX9-NEXT: v_add_u32_e32 v1, v1, v5 ; GFX9-NEXT: v_add_u32_e32 v2, v2, v6 ; GFX9-NEXT: v_add_u32_e32 v3, v3, v7 -; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; ; EG-LABEL: scalarize_mulhs_4xi32: diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll index e0a469e11ed59..07df1108df56c 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -1867,56 +1867,56 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) { define amdgpu_kernel void @s_test_sdiv24_k_num_i64(i64 addrspace(1)* %out, i64 %x) { ; GCN-LABEL: s_test_sdiv24_k_num_i64: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_mov_b32 s3, 0xf000 -; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_ashr_i64 s[6:7], s[6:7], 40 -; GCN-NEXT: v_cvt_f32_i32_e32 v0, s6 -; GCN-NEXT: s_mov_b32 s7, 0x41c00000 -; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: s_ashr_i32 s4, s6, 30 +; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], 40 +; GCN-NEXT: v_cvt_f32_i32_e32 v0, s2 +; GCN-NEXT: s_mov_b32 s3, 0x41c00000 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_ashr_i32 s0, s2, 30 ; GCN-NEXT: v_rcp_iflag_f32_e32 v1, v0 -; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: s_or_b32 s6, s4, 1 -; GCN-NEXT: v_mul_f32_e32 v1, s7, v1 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_or_b32 s2, s0, 1 +; GCN-NEXT: v_mul_f32_e32 v1, s3, v1 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-NEXT: v_mad_f32 v2, -v1, v0, s7 +; GCN-NEXT: v_mad_f32 v2, -v1, v0, s3 ; GCN-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GCN-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, |v0| -; GCN-NEXT: s_cmp_lg_u32 s4, 0 -; GCN-NEXT: s_cselect_b32 s4, s6, 0 -; GCN-NEXT: v_add_i32_e32 v0, vcc, s4, v1 +; GCN-NEXT: v_cmp_ge_f32_e64 s[0:1], |v2|, |v0| +; GCN-NEXT: s_cmp_lg_u32 s0, 0 +; GCN-NEXT: s_cselect_b32 s0, s2, 0 +; GCN-NEXT: v_add_i32_e32 v0, vcc, s0, v1 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; ; GCN-IR-LABEL: s_test_sdiv24_k_num_i64: ; GCN-IR: ; %bb.0: -; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 -; GCN-IR-NEXT: s_mov_b32 s2, -1 +; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 +; GCN-IR-NEXT: s_mov_b32 s6, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[6:7], 40 -; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s6 -; GCN-IR-NEXT: s_mov_b32 s7, 0x41c00000 -; GCN-IR-NEXT: s_mov_b32 s0, s4 -; GCN-IR-NEXT: s_ashr_i32 s4, s6, 30 +; GCN-IR-NEXT: s_ashr_i64 s[2:3], s[2:3], 40 +; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s2 +; GCN-IR-NEXT: s_mov_b32 s3, 0x41c00000 +; GCN-IR-NEXT: s_mov_b32 s4, s0 +; GCN-IR-NEXT: s_ashr_i32 s0, s2, 30 ; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v1, v0 -; GCN-IR-NEXT: s_mov_b32 s1, s5 -; GCN-IR-NEXT: s_or_b32 s6, s4, 1 -; GCN-IR-NEXT: v_mul_f32_e32 v1, s7, v1 +; GCN-IR-NEXT: s_mov_b32 s5, s1 +; GCN-IR-NEXT: s_or_b32 s2, s0, 1 +; GCN-IR-NEXT: v_mul_f32_e32 v1, s3, v1 ; GCN-IR-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-IR-NEXT: v_mad_f32 v2, -v1, v0, s7 +; GCN-IR-NEXT: v_mad_f32 v2, -v1, v0, s3 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, |v0| -; GCN-IR-NEXT: s_cmp_lg_u32 s4, 0 -; GCN-IR-NEXT: s_cselect_b32 s4, s6, 0 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s4, v1 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[0:1], |v2|, |v0| +; GCN-IR-NEXT: s_cmp_lg_u32 s0, 0 +; GCN-IR-NEXT: s_cselect_b32 s0, s2, 0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s0, v1 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-IR-NEXT: s_endpgm %x.shr = ashr i64 %x, 40 %result = sdiv i64 24, %x.shr diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir b/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir index 192bce362c4f9..2a202294e1995 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir @@ -6,12 +6,12 @@ # GCN: [[SMOV:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 123 # CI: [[SHIFT:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, %{{[0-9]+}}, implicit $exec -# CI: %{{[0-9]+}}:vgpr_32 = V_ADD_I32_e32 [[SMOV]], killed [[SHIFT]], implicit-def $vcc, implicit $exec +# CI: %{{[0-9]+}}:vgpr_32 = V_ADD_CO_U32_e32 [[SMOV]], killed [[SHIFT]], implicit-def $vcc, implicit $exec # VI: [[VMOV:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[SMOV]], implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_ADD_I32_sdwa 0, [[VMOV]], 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit-def $vcc, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_ADD_CO_U32_sdwa 0, [[VMOV]], 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit-def $vcc, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_ADD_I32_sdwa 0, [[SMOV]], 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit-def $vcc, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_ADD_CO_U32_sdwa 0, [[SMOV]], 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit-def $vcc, implicit $exec --- name: add_shr_i32 @@ -40,7 +40,7 @@ body: | %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) %12 = S_MOV_B32 123 %10 = V_LSHRREV_B32_e64 16, %3, implicit $exec - %11 = V_ADD_I32_e32 %12, killed %10, implicit-def $vcc, implicit $exec + %11 = V_ADD_CO_U32_e32 %12, killed %10, implicit-def $vcc, implicit $exec FLAT_STORE_DWORD %0, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir b/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir index c181f51e747fd..2546775582c9e 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir @@ -3,11 +3,11 @@ # test for 3 consecutive _sdwa's # GFX9-LABEL: name: test1_add_co_sdwa -# GFX9: = nsw V_ADD_I32_sdwa +# GFX9: = nsw V_ADD_CO_U32_sdwa # GFX9-NEXT: = nuw V_ADDC_U32_e32 -# GFX9: V_ADD_I32_sdwa +# GFX9: V_ADD_CO_U32_sdwa # GFX9-NEXT: V_ADDC_U32_e32 -# GFX9: V_ADD_I32_sdwa +# GFX9: V_ADD_CO_U32_sdwa # GFX9-NEXT: V_ADDC_U32_e32 --- name: test1_add_co_sdwa @@ -26,19 +26,19 @@ body: | %22:sreg_32_xm0 = S_MOV_B32 255 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %30:vreg_64 = COPY $sgpr0_sgpr1 - %63:vgpr_32, %65:sreg_64_xexec = nsw V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = nsw V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec %64:vgpr_32, dead %66:sreg_64_xexec = nuw V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, 0, implicit $exec + %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) %171:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %173:vgpr_32, %175:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %171, 0, implicit $exec + %173:vgpr_32, %175:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %171, 0, implicit $exec %174:vgpr_32, dead %176:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %175, 0, implicit $exec %172:vreg_64 = REG_SEQUENCE %173, %subreg.sub0, %174, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %172, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -47,9 +47,9 @@ body: | # test for VCC interference on sdwa, should generate 1 xform only # GFX9-LABEL: name: test2_add_co_sdwa -# GFX9: V_ADD_I32_sdwa +# GFX9: V_ADD_CO_U32_sdwa # GFX9: V_ADDC_U32_e32 -# GFX9-NOT: V_ADD_I32_sdwa +# GFX9-NOT: V_ADD_CO_U32_sdwa # GFX9-NOT: V_ADDC_U32_e32 --- name: test2_add_co_sdwa @@ -68,10 +68,10 @@ body: | %22:sreg_32_xm0 = S_MOV_B32 255 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %30:vreg_64 = COPY $sgpr0_sgpr1 - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, 0, implicit $exec + %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1 @@ -80,7 +80,7 @@ body: | GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, 0, implicit $exec + %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -89,9 +89,9 @@ body: | # test for CarryOut used, should reject # GFX9-LABEL: name: test3_add_co_sdwa -# GFX9: V_ADD_I32_e64 +# GFX9: V_ADD_CO_U32_e64 # GFX9: V_ADDC_U32_e64 -# GFX9-NOT: V_ADD_I32_sdwa +# GFX9-NOT: V_ADD_CO_U32_sdwa # GFX9-NOT: V_ADDC_U32_e32 --- name: test3_add_co_sdwa @@ -110,7 +110,7 @@ body: | %22:sreg_32_xm0 = S_MOV_B32 255 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %30:vreg_64 = COPY $sgpr0_sgpr1 - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %66, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -119,9 +119,9 @@ body: | # test for CarryIn used more than once, should reject # GFX9-LABEL: name: test4_add_co_sdwa -# GFX9: V_ADD_I32_e64 +# GFX9: V_ADD_CO_U32_e64 # GFX9: V_ADDC_U32_e64 -# GFX9-NOT: V_ADD_I32_sdwa +# GFX9-NOT: V_ADD_CO_U32_sdwa # GFX9-NOT: V_ADDC_U32_e32 --- name: test4_add_co_sdwa @@ -140,7 +140,7 @@ body: | %22:sreg_32_xm0 = S_MOV_B32 255 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %30:vreg_64 = COPY $sgpr0_sgpr1 - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %65, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -150,7 +150,7 @@ body: | # test for simple example, should generate sdwa # GFX9-LABEL: name: test5_add_co_sdwa -# GFX9: V_ADD_I32_sdwa +# GFX9: V_ADD_CO_U32_sdwa # GFX9: V_ADDC_U32_e32 --- name: test5_add_co_sdwa @@ -169,7 +169,7 @@ body: | %22:sreg_32_xm0 = S_MOV_B32 255 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %30:vreg_64 = COPY $sgpr0_sgpr1 - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -177,10 +177,10 @@ body: | ... -# test for V_ADD_I32_e64 only, should reject +# test for V_ADD_CO_U32_e64 only, should reject # GFX9-LABEL: name: test6_add_co_sdwa -# GFX9: V_ADD_I32_e64 -# GFX9-NOT: V_ADD_I32_sdwa +# GFX9: V_ADD_CO_U32_e64 +# GFX9-NOT: V_ADD_CO_U32_sdwa # GFX9-NOT: V_ADDC_U32_e32 --- name: test6_add_co_sdwa @@ -199,7 +199,7 @@ body: | %22:sreg_32_xm0 = S_MOV_B32 255 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %30:vreg_64 = COPY $sgpr0_sgpr1 - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %23, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -209,7 +209,7 @@ body: | # test for V_ADDC_U32_e64 only, should reject # GFX9-LABEL: name: test7_add_co_sdwa # GFX9: V_ADDC_U32_e64 -# GFX9-NOT: V_ADD_I32_sdwa +# GFX9-NOT: V_ADD_CO_U32_sdwa # GFX9-NOT: V_ADDC_U32_e32 --- name: test7_add_co_sdwa @@ -239,7 +239,7 @@ body: | # test for $vcc defined between two adds, should not generate # GFX9-LABEL: name: test8_add_co_sdwa -# GFX9-NOT: V_ADD_I32_sdwa +# GFX9-NOT: V_ADD_CO_U32_sdwa # GFX9: V_ADDC_U32_e64 --- name: test8_add_co_sdwa @@ -258,7 +258,7 @@ body: | %22:sreg_32_xm0 = S_MOV_B32 255 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %30:vreg_64 = COPY $sgpr0_sgpr1 - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec $vcc = COPY %30 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %31:vreg_64 = COPY $vcc @@ -270,7 +270,7 @@ body: | # test for non dead $vcc, should not generate # GFX9-LABEL: name: test9_add_co_sdwa -# GFX9-NOT: V_ADD_I32_sdwa +# GFX9-NOT: V_ADD_CO_U32_sdwa # GFX9: V_ADDC_U32_e64 --- name: test9_add_co_sdwa @@ -290,7 +290,7 @@ body: | %30:vreg_64 = COPY $sgpr0_sgpr1 $vcc = COPY %30 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %31:vreg_64 = COPY $vcc %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 @@ -300,7 +300,7 @@ body: | # test for def $vcc_lo, should not generate # GFX9-LABEL: name: test10_add_co_sdwa -# GFX9-NOT: V_ADD_I32_sdwa +# GFX9-NOT: V_ADD_CO_U32_sdwa # GFX9: V_ADDC_U32_e64 --- name: test10_add_co_sdwa @@ -320,7 +320,7 @@ body: | %30:vreg_64 = COPY $sgpr0_sgpr1 $vcc_lo = COPY %30.sub0 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec %31:vgpr_32 = COPY $vcc_lo %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec @@ -331,7 +331,7 @@ body: | # test for read $vcc_hi, should not generate # GFX9-LABEL: name: test11_add_co_sdwa -# GFX9-NOT: V_ADD_I32_sdwa +# GFX9-NOT: V_ADD_CO_U32_sdwa # GFX9: V_ADDC_U32_e64 --- name: test11_add_co_sdwa @@ -351,7 +351,7 @@ body: | %30:vreg_64 = COPY $sgpr0_sgpr1 $vcc_hi = COPY %30.sub0 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec %31:vgpr_32 = COPY $vcc_hi %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec @@ -362,7 +362,7 @@ body: | # test for $vcc defined and used between adds, should not generate # GFX9-LABEL: name: test12_add_co_sdwa -# GFX9-NOT: V_ADD_I32_sdwa +# GFX9-NOT: V_ADD_CO_U32_sdwa # GFX9: V_ADDC_U32_e64 --- name: test12_add_co_sdwa @@ -381,7 +381,7 @@ body: | %22:sreg_32_xm0 = S_MOV_B32 255 %30:vreg_64 = COPY $sgpr0_sgpr1 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec $vcc = COPY %30 %31:vreg_64 = COPY killed $vcc %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll index ae836e447cc56..7850d92bf473c 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll +++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll @@ -73,7 +73,7 @@ entry: ; GCN-LABEL: {{^}}mul_v2i16: ; NOSDWA: v_lshrrev_b32_e32 v[[DST0:[0-9]+]], 16, v{{[0-9]+}} ; NOSDWA: v_lshrrev_b32_e32 v[[DST1:[0-9]+]], 16, v{{[0-9]+}} -; NOSDWA: v_mul_u32_u24_e32 v[[DST_MUL:[0-9]+]], v[[DST1]], v[[DST0]] +; NOSDWA: v_mul_u32_u24_e32 v[[DST_MUL:[0-9]+]], v[[DST0]], v[[DST1]] ; NOSDWA: v_lshlrev_b32_e32 v[[DST_SHL:[0-9]+]], 16, v[[DST_MUL]] ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[DST_SHL]] ; NOSDWA-NOT: v_mul_u32_u24_sdwa diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir b/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir index ed12cdd9d25b5..47469c5f9109f 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir @@ -224,7 +224,7 @@ body: | %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45) %60 = V_BFE_U32 %17, 8, 8, implicit $exec %61 = V_LSHLREV_B32_e32 2, killed %60, implicit $exec - %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec + %70 = V_ADD_CO_U32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec %66 = COPY %13 %65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec %67 = REG_SEQUENCE %70, %subreg.sub0, killed %65, %subreg.sub1 @@ -237,7 +237,7 @@ body: | %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep) %73 = V_BFE_U32 %40, 8, 8, implicit $exec %74 = V_LSHLREV_B32_e32 2, killed %73, implicit $exec - %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec + %83 = V_ADD_CO_U32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec %78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec %80 = REG_SEQUENCE %83, %subreg.sub0, killed %78, %subreg.sub1 FLAT_STORE_DWORD %80, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17) @@ -387,7 +387,7 @@ body: | %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45) %60 = V_BFE_U32 %17, 8, 8, implicit $exec %61 = V_LSHLREV_B32_e32 %84, killed %60, implicit $exec - %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec + %70 = V_ADD_CO_U32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec %66 = COPY %13 %65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec %67 = REG_SEQUENCE %70, %subreg.sub0, killed %65, %subreg.sub1 @@ -400,7 +400,7 @@ body: | %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep) %73 = V_BFE_U32 %40, 8, 8, implicit $exec %74 = V_LSHLREV_B32_e32 %84, killed %73, implicit $exec - %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec + %83 = V_ADD_CO_U32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec %78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec %80 = REG_SEQUENCE %83, %subreg.sub0, killed %78, %subreg.sub1 FLAT_STORE_DWORD %80, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17) diff --git a/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll new file mode 100644 index 0000000000000..746a277ea59a3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone +declare i32 @llvm.amdgcn.sffbh.i32(i32) nounwind readnone speculatable +define amdgpu_kernel void @select_constant_cttz(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind { +; GCN-LABEL: select_constant_cttz: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_load_dword s8, s[2:3], 0x0 +; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s0, 1, s8 +; GCN-NEXT: s_ff1_i32_b32 s0, s0 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_cmp_ne_u32_e64 s[2:3], s8, 0 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -1, s[2:3] +; GCN-NEXT: v_ffbh_i32_e32 v1, v0 +; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0 +; GCN-NEXT: v_sub_i32_e32 v0, vcc, 31, v1 +; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -1, s[0:1] +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GCN-NEXT: s_endpgm + %v = load i32, i32 addrspace(1)* %arrayidx, align 4 + %sr = lshr i32 1, %v + %cmp = icmp ne i32 %v, 0 + %cttz = call i32 @llvm.cttz.i32(i32 %sr, i1 true), !range !0 + %sel = select i1 %cmp, i32 -1, i32 %cttz + %ffbh = call i32 @llvm.amdgcn.sffbh.i32(i32 %sel) + %sub = sub i32 31, %ffbh + %cmp2 = icmp eq i32 %sel, 0 + %or = or i1 %cmp, %cmp2 + %sel2 = select i1 %or, i32 -1, i32 %sub + store i32 %sel2, i32 addrspace(1)* %out + ret void +} + +!0 = !{i32 0, i32 33} diff --git a/llvm/test/CodeGen/AMDGPU/select.f16.ll b/llvm/test/CodeGen/AMDGPU/select.f16.ll index 5f4c49d970970..80ebcd54bfb20 100644 --- a/llvm/test/CodeGen/AMDGPU/select.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/select.f16.ll @@ -52,25 +52,25 @@ define amdgpu_kernel void @select_f16( ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_mov_b32 s18, s2 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s4, s6 -; VI-NEXT: s_mov_b32 s5, s7 -; VI-NEXT: s_mov_b32 s6, s2 -; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: s_mov_b32 s16, s8 -; VI-NEXT: s_mov_b32 s17, s9 +; VI-NEXT: s_mov_b32 s16, s6 +; VI-NEXT: s_mov_b32 s17, s7 +; VI-NEXT: s_mov_b32 s19, s3 +; VI-NEXT: s_mov_b32 s20, s8 +; VI-NEXT: s_mov_b32 s21, s9 ; VI-NEXT: s_mov_b32 s8, s10 ; VI-NEXT: s_mov_b32 s9, s11 -; VI-NEXT: s_mov_b32 s19, s3 +; VI-NEXT: s_mov_b32 s22, s2 +; VI-NEXT: s_mov_b32 s23, s3 ; VI-NEXT: s_mov_b32 s10, s2 ; VI-NEXT: s_mov_b32 s11, s3 ; VI-NEXT: s_mov_b32 s14, s2 ; VI-NEXT: s_mov_b32 s15, s3 -; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 -; VI-NEXT: buffer_load_ushort v1, off, s[16:19], 0 +; VI-NEXT: buffer_load_ushort v0, off, s[16:19], 0 +; VI-NEXT: buffer_load_ushort v1, off, s[20:23], 0 ; VI-NEXT: buffer_load_ushort v2, off, s[8:11], 0 ; VI-NEXT: buffer_load_ushort v3, off, s[12:15], 0 +; VI-NEXT: s_mov_b32 s0, s4 +; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: s_waitcnt vmcnt(2) ; VI-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1 ; VI-NEXT: s_waitcnt vmcnt(0) @@ -137,21 +137,21 @@ define amdgpu_kernel void @select_f16_imm_a( ; VI-NEXT: s_mov_b32 s14, s10 ; VI-NEXT: s_mov_b32 s15, s11 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s8, s0 -; VI-NEXT: s_mov_b32 s9, s1 -; VI-NEXT: s_mov_b32 s0, s2 -; VI-NEXT: s_mov_b32 s1, s3 -; VI-NEXT: s_mov_b32 s2, s10 -; VI-NEXT: s_mov_b32 s3, s11 -; VI-NEXT: s_mov_b32 s12, s4 -; VI-NEXT: s_mov_b32 s13, s5 +; VI-NEXT: s_mov_b32 s12, s2 +; VI-NEXT: s_mov_b32 s13, s3 +; VI-NEXT: s_mov_b32 s16, s4 +; VI-NEXT: s_mov_b32 s17, s5 ; VI-NEXT: s_mov_b32 s4, s6 ; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s18, s10 +; VI-NEXT: s_mov_b32 s19, s11 ; VI-NEXT: s_mov_b32 s6, s10 ; VI-NEXT: s_mov_b32 s7, s11 -; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 -; VI-NEXT: buffer_load_ushort v1, off, s[12:15], 0 +; VI-NEXT: buffer_load_ushort v0, off, s[12:15], 0 +; VI-NEXT: buffer_load_ushort v1, off, s[16:19], 0 ; VI-NEXT: buffer_load_ushort v2, off, s[4:7], 0 +; VI-NEXT: s_mov_b32 s8, s0 +; VI-NEXT: s_mov_b32 s9, s1 ; VI-NEXT: s_waitcnt vmcnt(2) ; VI-NEXT: v_cmp_lt_f16_e32 vcc, 0.5, v0 ; VI-NEXT: s_waitcnt vmcnt(0) @@ -216,21 +216,21 @@ define amdgpu_kernel void @select_f16_imm_b( ; VI-NEXT: s_mov_b32 s14, s10 ; VI-NEXT: s_mov_b32 s15, s11 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s8, s0 -; VI-NEXT: s_mov_b32 s9, s1 -; VI-NEXT: s_mov_b32 s0, s2 -; VI-NEXT: s_mov_b32 s1, s3 -; VI-NEXT: s_mov_b32 s2, s10 -; VI-NEXT: s_mov_b32 s3, s11 -; VI-NEXT: s_mov_b32 s12, s4 -; VI-NEXT: s_mov_b32 s13, s5 +; VI-NEXT: s_mov_b32 s12, s2 +; VI-NEXT: s_mov_b32 s13, s3 +; VI-NEXT: s_mov_b32 s16, s4 +; VI-NEXT: s_mov_b32 s17, s5 ; VI-NEXT: s_mov_b32 s4, s6 ; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s18, s10 +; VI-NEXT: s_mov_b32 s19, s11 ; VI-NEXT: s_mov_b32 s6, s10 ; VI-NEXT: s_mov_b32 s7, s11 -; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 -; VI-NEXT: buffer_load_ushort v1, off, s[12:15], 0 +; VI-NEXT: buffer_load_ushort v0, off, s[12:15], 0 +; VI-NEXT: buffer_load_ushort v1, off, s[16:19], 0 ; VI-NEXT: buffer_load_ushort v2, off, s[4:7], 0 +; VI-NEXT: s_mov_b32 s8, s0 +; VI-NEXT: s_mov_b32 s9, s1 ; VI-NEXT: s_waitcnt vmcnt(2) ; VI-NEXT: v_cmp_gt_f16_e32 vcc, 0.5, v0 ; VI-NEXT: s_waitcnt vmcnt(0) @@ -295,26 +295,26 @@ define amdgpu_kernel void @select_f16_imm_c( ; VI-NEXT: s_mov_b32 s14, s10 ; VI-NEXT: s_mov_b32 s15, s11 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s8, s0 -; VI-NEXT: s_mov_b32 s9, s1 -; VI-NEXT: s_mov_b32 s0, s2 -; VI-NEXT: s_mov_b32 s1, s3 -; VI-NEXT: s_mov_b32 s2, s10 -; VI-NEXT: s_mov_b32 s3, s11 -; VI-NEXT: s_mov_b32 s12, s4 -; VI-NEXT: s_mov_b32 s13, s5 -; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 -; VI-NEXT: buffer_load_ushort v1, off, s[12:15], 0 +; VI-NEXT: s_mov_b32 s12, s2 +; VI-NEXT: s_mov_b32 s13, s3 +; VI-NEXT: s_mov_b32 s16, s4 +; VI-NEXT: s_mov_b32 s17, s5 ; VI-NEXT: s_mov_b32 s4, s6 ; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s18, s10 +; VI-NEXT: s_mov_b32 s19, s11 ; VI-NEXT: s_mov_b32 s6, s10 ; VI-NEXT: s_mov_b32 s7, s11 -; VI-NEXT: buffer_load_ushort v3, off, s[4:7], 0 -; VI-NEXT: v_mov_b32_e32 v2, 0x3800 +; VI-NEXT: buffer_load_ushort v0, off, s[12:15], 0 +; VI-NEXT: buffer_load_ushort v1, off, s[16:19], 0 +; VI-NEXT: buffer_load_ushort v2, off, s[4:7], 0 +; VI-NEXT: v_mov_b32_e32 v3, 0x3800 +; VI-NEXT: s_mov_b32 s8, s0 +; VI-NEXT: s_mov_b32 s9, s1 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc ; VI-NEXT: buffer_store_short v0, off, s[8:11], 0 ; VI-NEXT: s_endpgm half addrspace(1)* %r, @@ -375,26 +375,26 @@ define amdgpu_kernel void @select_f16_imm_d( ; VI-NEXT: s_mov_b32 s14, s10 ; VI-NEXT: s_mov_b32 s15, s11 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s8, s0 -; VI-NEXT: s_mov_b32 s9, s1 -; VI-NEXT: s_mov_b32 s0, s2 -; VI-NEXT: s_mov_b32 s1, s3 -; VI-NEXT: s_mov_b32 s2, s10 -; VI-NEXT: s_mov_b32 s3, s11 -; VI-NEXT: s_mov_b32 s12, s4 -; VI-NEXT: s_mov_b32 s13, s5 -; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 -; VI-NEXT: buffer_load_ushort v1, off, s[12:15], 0 +; VI-NEXT: s_mov_b32 s12, s2 +; VI-NEXT: s_mov_b32 s13, s3 +; VI-NEXT: s_mov_b32 s16, s4 +; VI-NEXT: s_mov_b32 s17, s5 ; VI-NEXT: s_mov_b32 s4, s6 ; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s18, s10 +; VI-NEXT: s_mov_b32 s19, s11 ; VI-NEXT: s_mov_b32 s6, s10 ; VI-NEXT: s_mov_b32 s7, s11 -; VI-NEXT: buffer_load_ushort v3, off, s[4:7], 0 -; VI-NEXT: v_mov_b32_e32 v2, 0x3800 +; VI-NEXT: buffer_load_ushort v0, off, s[12:15], 0 +; VI-NEXT: buffer_load_ushort v1, off, s[16:19], 0 +; VI-NEXT: buffer_load_ushort v2, off, s[4:7], 0 +; VI-NEXT: v_mov_b32_e32 v3, 0x3800 +; VI-NEXT: s_mov_b32 s8, s0 +; VI-NEXT: s_mov_b32 s9, s1 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc ; VI-NEXT: buffer_store_short v0, off, s[8:11], 0 ; VI-NEXT: s_endpgm half addrspace(1)* %r, @@ -474,25 +474,25 @@ define amdgpu_kernel void @select_v2f16( ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_mov_b32 s18, s2 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s4, s6 -; VI-NEXT: s_mov_b32 s5, s7 -; VI-NEXT: s_mov_b32 s6, s2 -; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: s_mov_b32 s16, s8 -; VI-NEXT: s_mov_b32 s17, s9 +; VI-NEXT: s_mov_b32 s16, s6 +; VI-NEXT: s_mov_b32 s17, s7 +; VI-NEXT: s_mov_b32 s19, s3 +; VI-NEXT: s_mov_b32 s20, s8 +; VI-NEXT: s_mov_b32 s21, s9 ; VI-NEXT: s_mov_b32 s8, s10 ; VI-NEXT: s_mov_b32 s9, s11 -; VI-NEXT: s_mov_b32 s19, s3 +; VI-NEXT: s_mov_b32 s22, s2 +; VI-NEXT: s_mov_b32 s23, s3 ; VI-NEXT: s_mov_b32 s10, s2 ; VI-NEXT: s_mov_b32 s11, s3 ; VI-NEXT: s_mov_b32 s14, s2 ; VI-NEXT: s_mov_b32 s15, s3 -; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0 -; VI-NEXT: buffer_load_dword v1, off, s[16:19], 0 +; VI-NEXT: buffer_load_dword v0, off, s[16:19], 0 +; VI-NEXT: buffer_load_dword v1, off, s[20:23], 0 ; VI-NEXT: buffer_load_dword v2, off, s[12:15], 0 ; VI-NEXT: buffer_load_dword v3, off, s[8:11], 0 +; VI-NEXT: s_mov_b32 s0, s4 +; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: s_waitcnt vmcnt(3) ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v0 ; VI-NEXT: s_waitcnt vmcnt(2) @@ -534,15 +534,15 @@ define amdgpu_kernel void @select_v2f16_imm_a( ; SI-NEXT: s_mov_b32 s15, s11 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s12, s2 +; SI-NEXT: s_mov_b32 s13, s3 ; SI-NEXT: s_mov_b32 s16, s4 ; SI-NEXT: s_mov_b32 s17, s5 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s7 -; SI-NEXT: s_mov_b32 s13, s3 -; SI-NEXT: s_mov_b32 s6, s10 -; SI-NEXT: s_mov_b32 s7, s11 ; SI-NEXT: s_mov_b32 s18, s10 ; SI-NEXT: s_mov_b32 s19, s11 +; SI-NEXT: s_mov_b32 s6, s10 +; SI-NEXT: s_mov_b32 s7, s11 ; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; SI-NEXT: buffer_load_dword v1, off, s[16:19], 0 ; SI-NEXT: buffer_load_dword v2, off, s[4:7], 0 @@ -580,22 +580,22 @@ define amdgpu_kernel void @select_v2f16_imm_a( ; VI-NEXT: s_mov_b32 s14, s10 ; VI-NEXT: s_mov_b32 s15, s11 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s8, s0 -; VI-NEXT: s_mov_b32 s9, s1 -; VI-NEXT: s_mov_b32 s0, s2 -; VI-NEXT: s_mov_b32 s1, s3 -; VI-NEXT: s_mov_b32 s12, s4 -; VI-NEXT: s_mov_b32 s13, s5 +; VI-NEXT: s_mov_b32 s12, s2 +; VI-NEXT: s_mov_b32 s13, s3 +; VI-NEXT: s_mov_b32 s16, s4 +; VI-NEXT: s_mov_b32 s17, s5 ; VI-NEXT: s_mov_b32 s4, s6 ; VI-NEXT: s_mov_b32 s5, s7 -; VI-NEXT: s_mov_b32 s2, s10 -; VI-NEXT: s_mov_b32 s3, s11 +; VI-NEXT: s_mov_b32 s18, s10 +; VI-NEXT: s_mov_b32 s19, s11 ; VI-NEXT: s_mov_b32 s6, s10 ; VI-NEXT: s_mov_b32 s7, s11 -; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; VI-NEXT: buffer_load_dword v1, off, s[12:15], 0 +; VI-NEXT: buffer_load_dword v0, off, s[12:15], 0 +; VI-NEXT: buffer_load_dword v1, off, s[16:19], 0 ; VI-NEXT: buffer_load_dword v2, off, s[4:7], 0 -; VI-NEXT: s_movk_i32 s0, 0x3900 +; VI-NEXT: s_movk_i32 s2, 0x3900 +; VI-NEXT: s_mov_b32 s8, s0 +; VI-NEXT: s_mov_b32 s9, s1 ; VI-NEXT: s_waitcnt vmcnt(2) ; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; VI-NEXT: v_cmp_lt_f16_e32 vcc, 0.5, v0 @@ -603,7 +603,7 @@ define amdgpu_kernel void @select_v2f16_imm_a( ; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; VI-NEXT: v_cmp_lt_f16_e32 vcc, s0, v3 +; VI-NEXT: v_cmp_lt_f16_e32 vcc, s2, v3 ; VI-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc ; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD @@ -633,15 +633,15 @@ define amdgpu_kernel void @select_v2f16_imm_b( ; SI-NEXT: s_mov_b32 s15, s11 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s12, s2 +; SI-NEXT: s_mov_b32 s13, s3 ; SI-NEXT: s_mov_b32 s16, s4 ; SI-NEXT: s_mov_b32 s17, s5 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s7 -; SI-NEXT: s_mov_b32 s13, s3 -; SI-NEXT: s_mov_b32 s6, s10 -; SI-NEXT: s_mov_b32 s7, s11 ; SI-NEXT: s_mov_b32 s18, s10 ; SI-NEXT: s_mov_b32 s19, s11 +; SI-NEXT: s_mov_b32 s6, s10 +; SI-NEXT: s_mov_b32 s7, s11 ; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; SI-NEXT: buffer_load_dword v1, off, s[16:19], 0 ; SI-NEXT: buffer_load_dword v2, off, s[4:7], 0 @@ -679,22 +679,22 @@ define amdgpu_kernel void @select_v2f16_imm_b( ; VI-NEXT: s_mov_b32 s14, s10 ; VI-NEXT: s_mov_b32 s15, s11 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s8, s0 -; VI-NEXT: s_mov_b32 s9, s1 -; VI-NEXT: s_mov_b32 s0, s2 -; VI-NEXT: s_mov_b32 s1, s3 -; VI-NEXT: s_mov_b32 s12, s4 -; VI-NEXT: s_mov_b32 s13, s5 +; VI-NEXT: s_mov_b32 s12, s2 +; VI-NEXT: s_mov_b32 s13, s3 +; VI-NEXT: s_mov_b32 s16, s4 +; VI-NEXT: s_mov_b32 s17, s5 ; VI-NEXT: s_mov_b32 s4, s6 ; VI-NEXT: s_mov_b32 s5, s7 -; VI-NEXT: s_mov_b32 s2, s10 -; VI-NEXT: s_mov_b32 s3, s11 +; VI-NEXT: s_mov_b32 s18, s10 +; VI-NEXT: s_mov_b32 s19, s11 ; VI-NEXT: s_mov_b32 s6, s10 ; VI-NEXT: s_mov_b32 s7, s11 -; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; VI-NEXT: buffer_load_dword v1, off, s[12:15], 0 +; VI-NEXT: buffer_load_dword v0, off, s[12:15], 0 +; VI-NEXT: buffer_load_dword v1, off, s[16:19], 0 ; VI-NEXT: buffer_load_dword v2, off, s[4:7], 0 -; VI-NEXT: s_movk_i32 s0, 0x3900 +; VI-NEXT: s_movk_i32 s2, 0x3900 +; VI-NEXT: s_mov_b32 s8, s0 +; VI-NEXT: s_mov_b32 s9, s1 ; VI-NEXT: s_waitcnt vmcnt(2) ; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; VI-NEXT: v_cmp_gt_f16_e32 vcc, 0.5, v0 @@ -702,7 +702,7 @@ define amdgpu_kernel void @select_v2f16_imm_b( ; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; VI-NEXT: v_cmp_gt_f16_e32 vcc, s0, v3 +; VI-NEXT: v_cmp_gt_f16_e32 vcc, s2, v3 ; VI-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc ; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD @@ -731,38 +731,39 @@ define amdgpu_kernel void @select_v2f16_imm_c( ; SI-NEXT: s_mov_b32 s14, s10 ; SI-NEXT: s_mov_b32 s15, s11 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s16, s4 -; SI-NEXT: s_mov_b32 s17, s5 ; SI-NEXT: s_mov_b32 s12, s2 ; SI-NEXT: s_mov_b32 s13, s3 -; SI-NEXT: s_mov_b32 s18, s10 -; SI-NEXT: s_mov_b32 s19, s11 +; SI-NEXT: s_mov_b32 s16, s4 +; SI-NEXT: s_mov_b32 s17, s5 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s7 +; SI-NEXT: s_mov_b32 s18, s10 +; SI-NEXT: s_mov_b32 s19, s11 ; SI-NEXT: s_mov_b32 s6, s10 ; SI-NEXT: s_mov_b32 s7, s11 ; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 -; SI-NEXT: buffer_load_dword v1, off, s[4:7], 0 -; SI-NEXT: buffer_load_dword v3, off, s[16:19], 0 -; SI-NEXT: v_mov_b32_e32 v2, 0x3f200000 +; SI-NEXT: buffer_load_dword v1, off, s[16:19], 0 +; SI-NEXT: buffer_load_dword v2, off, s[4:7], 0 +; SI-NEXT: v_mov_b32_e32 v3, 0x3f200000 ; SI-NEXT: s_mov_b32 s8, s0 ; SI-NEXT: s_mov_b32 s9, s1 ; SI-NEXT: s_waitcnt vmcnt(2) ; SI-NEXT: v_cvt_f32_f16_e32 v4, v0 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v1 ; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v3 -; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v2 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-NEXT: v_cvt_f32_f16_e32 v5, v5 ; SI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; SI-NEXT: v_cvt_f32_f16_e32 v3, v3 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v5 -; SI-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc -; SI-NEXT: v_cmp_nlt_f32_e32 vcc, v4, v3 +; SI-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc +; SI-NEXT: v_cmp_nlt_f32_e32 vcc, v4, v1 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NEXT: v_cndmask_b32_e32 v1, 0.5, v1, vcc +; SI-NEXT: v_cndmask_b32_e32 v1, 0.5, v2, vcc ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; SI-NEXT: v_or_b32_e32 v0, v1, v0 @@ -777,32 +778,33 @@ define amdgpu_kernel void @select_v2f16_imm_c( ; VI-NEXT: s_mov_b32 s14, s10 ; VI-NEXT: s_mov_b32 s15, s11 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s8, s0 -; VI-NEXT: s_mov_b32 s9, s1 -; VI-NEXT: s_mov_b32 s0, s2 -; VI-NEXT: s_mov_b32 s1, s3 -; VI-NEXT: s_mov_b32 s12, s4 -; VI-NEXT: s_mov_b32 s13, s5 -; VI-NEXT: s_mov_b32 s2, s10 -; VI-NEXT: s_mov_b32 s3, s11 +; VI-NEXT: s_mov_b32 s12, s2 +; VI-NEXT: s_mov_b32 s13, s3 +; VI-NEXT: s_mov_b32 s16, s4 +; VI-NEXT: s_mov_b32 s17, s5 ; VI-NEXT: s_mov_b32 s4, s6 ; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s18, s10 +; VI-NEXT: s_mov_b32 s19, s11 ; VI-NEXT: s_mov_b32 s6, s10 ; VI-NEXT: s_mov_b32 s7, s11 -; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; VI-NEXT: buffer_load_dword v1, off, s[4:7], 0 -; VI-NEXT: buffer_load_dword v4, off, s[12:15], 0 -; VI-NEXT: v_mov_b32_e32 v2, 0x3800 -; VI-NEXT: v_mov_b32_e32 v3, 0x3900 +; VI-NEXT: buffer_load_dword v0, off, s[12:15], 0 +; VI-NEXT: buffer_load_dword v1, off, s[16:19], 0 +; VI-NEXT: buffer_load_dword v2, off, s[4:7], 0 +; VI-NEXT: v_mov_b32_e32 v3, 0x3800 +; VI-NEXT: v_mov_b32_e32 v4, 0x3900 +; VI-NEXT: s_mov_b32 s8, s0 +; VI-NEXT: s_mov_b32 s9, s1 ; VI-NEXT: s_waitcnt vmcnt(2) ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v1 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v4 -; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v4 -; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v2 ; VI-NEXT: v_cmp_nlt_f16_e32 vcc, v6, v5 -; VI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; VI-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc ; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 @@ -830,41 +832,41 @@ define amdgpu_kernel void @select_v2f16_imm_d( ; SI-NEXT: s_mov_b32 s14, s10 ; SI-NEXT: s_mov_b32 s15, s11 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s16, s4 -; SI-NEXT: s_mov_b32 s17, s5 ; SI-NEXT: s_mov_b32 s12, s2 ; SI-NEXT: s_mov_b32 s13, s3 -; SI-NEXT: s_mov_b32 s18, s10 -; SI-NEXT: s_mov_b32 s19, s11 +; SI-NEXT: s_mov_b32 s16, s4 +; SI-NEXT: s_mov_b32 s17, s5 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s7 +; SI-NEXT: s_mov_b32 s18, s10 +; SI-NEXT: s_mov_b32 s19, s11 ; SI-NEXT: s_mov_b32 s6, s10 ; SI-NEXT: s_mov_b32 s7, s11 ; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 -; SI-NEXT: buffer_load_dword v1, off, s[4:7], 0 -; SI-NEXT: buffer_load_dword v3, off, s[16:19], 0 -; SI-NEXT: v_mov_b32_e32 v2, 0x3f200000 +; SI-NEXT: buffer_load_dword v1, off, s[16:19], 0 +; SI-NEXT: buffer_load_dword v2, off, s[4:7], 0 +; SI-NEXT: v_mov_b32_e32 v3, 0x3f200000 ; SI-NEXT: s_mov_b32 s8, s0 ; SI-NEXT: s_mov_b32 s9, s1 ; SI-NEXT: s_waitcnt vmcnt(2) ; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v0 ; SI-NEXT: s_waitcnt vmcnt(1) -; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v1 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v3 +; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v2 ; SI-NEXT: v_cvt_f32_f16_e32 v4, v4 ; SI-NEXT: v_cvt_f32_f16_e32 v5, v5 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; SI-NEXT: v_cvt_f32_f16_e32 v3, v3 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-NEXT: v_cmp_lt_f32_e32 vcc, v4, v5 -; SI-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc -; SI-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 -; SI-NEXT: v_cndmask_b32_e32 v0, 0.5, v1, vcc -; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; SI-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-NEXT: v_cndmask_b32_e32 v0, 0.5, v2, vcc +; SI-NEXT: v_cvt_f16_f32_e32 v3, v3 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v2 +; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v3 ; SI-NEXT: v_or_b32_e32 v0, v0, v1 ; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; SI-NEXT: s_endpgm @@ -877,32 +879,33 @@ define amdgpu_kernel void @select_v2f16_imm_d( ; VI-NEXT: s_mov_b32 s14, s10 ; VI-NEXT: s_mov_b32 s15, s11 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s8, s0 -; VI-NEXT: s_mov_b32 s9, s1 -; VI-NEXT: s_mov_b32 s0, s2 -; VI-NEXT: s_mov_b32 s1, s3 -; VI-NEXT: s_mov_b32 s12, s4 -; VI-NEXT: s_mov_b32 s13, s5 -; VI-NEXT: s_mov_b32 s2, s10 -; VI-NEXT: s_mov_b32 s3, s11 +; VI-NEXT: s_mov_b32 s12, s2 +; VI-NEXT: s_mov_b32 s13, s3 +; VI-NEXT: s_mov_b32 s16, s4 +; VI-NEXT: s_mov_b32 s17, s5 ; VI-NEXT: s_mov_b32 s4, s6 ; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s18, s10 +; VI-NEXT: s_mov_b32 s19, s11 ; VI-NEXT: s_mov_b32 s6, s10 ; VI-NEXT: s_mov_b32 s7, s11 -; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; VI-NEXT: buffer_load_dword v1, off, s[4:7], 0 -; VI-NEXT: buffer_load_dword v4, off, s[12:15], 0 -; VI-NEXT: v_mov_b32_e32 v2, 0x3800 -; VI-NEXT: v_mov_b32_e32 v3, 0x3900 +; VI-NEXT: buffer_load_dword v0, off, s[12:15], 0 +; VI-NEXT: buffer_load_dword v1, off, s[16:19], 0 +; VI-NEXT: buffer_load_dword v2, off, s[4:7], 0 +; VI-NEXT: v_mov_b32_e32 v3, 0x3800 +; VI-NEXT: v_mov_b32_e32 v4, 0x3900 +; VI-NEXT: s_mov_b32 s8, s0 +; VI-NEXT: s_mov_b32 s9, s1 ; VI-NEXT: s_waitcnt vmcnt(2) ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1 +; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v1 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_cmp_lt_f16_e32 vcc, v0, v4 -; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v4 -; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; VI-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v2 ; VI-NEXT: v_cmp_lt_f16_e32 vcc, v6, v5 -; VI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; VI-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc ; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll index 596225fac66aa..f423672b8da5c 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll @@ -11,32 +11,28 @@ define amdgpu_kernel void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { ; SI-LABEL: sgpr_if_else_salu_br: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb -; SI-NEXT: s_load_dword s2, s[0:1], 0xf -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_cmp_lg_u32 s4, 0 -; SI-NEXT: s_cbranch_scc0 BB0_2 +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xb +; SI-NEXT: s_load_dword s0, s[0:1], 0xf +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_cmp_lg_u32 s8, 0 +; SI-NEXT: s_cbranch_scc0 BB0_2 ; SI-NEXT:; %bb.1: ; %else -; SI-NEXT: s_add_i32 s2, s7, s2 -; SI-NEXT: s_mov_b64 s[8:9], 0 -; SI-NEXT: s_andn2_b64 vcc, exec, s[8:9] -; SI-NEXT: s_cbranch_vccz BB0_3 -; SI-NEXT: s_branch BB0_4 +; SI-NEXT: s_add_i32 s0, s11, s0 +; SI-NEXT: s_cbranch_execz BB0_3 +; SI-NEXT: s_branch BB0_4 ; SI-NEXT:BB0_2: -; SI-NEXT: s_mov_b64 s[8:9], -1 -; SI-NEXT: ; implicit-def: $sgpr2 -; SI-NEXT: s_andn2_b64 vcc, exec, s[8:9] -; SI-NEXT: s_cbranch_vccnz BB0_4 +; SI-NEXT: ; implicit-def: $sgpr0 ; SI-NEXT:BB0_3: ; %if -; SI-NEXT: s_sub_i32 s2, s5, s6 +; SI-NEXT: s_sub_i32 s0, s9, s10 ; SI-NEXT:BB0_4: ; %endif -; SI-NEXT: s_add_i32 s4, s2, s4 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 -; SI-NEXT: s_endpgm +; SI-NEXT: s_add_i32 s0, s0, s8 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: v_mov_b32_e32 v0, s0 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm + entry: %0 = icmp eq i32 %a, 0 br i1 %0, label %if, label %else @@ -59,37 +55,33 @@ endif: define amdgpu_kernel void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, [8 x i32], i32 %a, [8 x i32], i32 %b, [8 x i32], i32 %c, [8 x i32], i32 %d, [8 x i32], i32 %e) { ; SI-LABEL: sgpr_if_else_salu_br_opt: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dword s2, s[0:1], 0x13 -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_cmp_lg_u32 s2, 0 -; SI-NEXT: s_cbranch_scc0 BB1_2 -; SI-NEXT: ; %bb.1: ; %else -; SI-NEXT: s_load_dword s3, s[0:1], 0x2e -; SI-NEXT: s_load_dword s6, s[0:1], 0x37 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_add_i32 s3, s3, s6 -; SI-NEXT: s_mov_b64 s[6:7], 0 -; SI-NEXT: s_andn2_b64 vcc, exec, s[6:7] -; SI-NEXT: s_cbranch_vccz BB1_3 -; SI-NEXT: s_branch BB1_4 -; SI-NEXT: BB1_2: -; SI-NEXT: s_mov_b64 s[6:7], -1 -; SI-NEXT: ; implicit-def: $sgpr3 -; SI-NEXT: s_andn2_b64 vcc, exec, s[6:7] -; SI-NEXT: s_cbranch_vccnz BB1_4 -; SI-NEXT: BB1_3: ; %if -; SI-NEXT: s_load_dword s3, s[0:1], 0x1c -; SI-NEXT: s_load_dword s0, s[0:1], 0x25 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_add_i32 s3, s3, s0 -; SI-NEXT: BB1_4: ; %endif -; SI-NEXT: s_add_i32 s0, s3, s2 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 -; SI-NEXT: s_endpgm +; SI-NEXT: s_load_dword s2, s[0:1], 0x13 +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_cmp_lg_u32 s2, 0 +; SI-NEXT: s_cbranch_scc0 BB1_2 +; SI-NEXT:; %bb.1: ; %else +; SI-NEXT: s_load_dword s3, s[0:1], 0x2e +; SI-NEXT: s_load_dword s6, s[0:1], 0x37 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_add_i32 s3, s3, s6 +; SI-NEXT: s_cbranch_execz BB1_3 +; SI-NEXT: s_branch BB1_4 +; SI-NEXT:BB1_2: +; SI-NEXT: ; implicit-def: $sgpr3 +; SI-NEXT:BB1_3: ; %if +; SI-NEXT: s_load_dword s3, s[0:1], 0x1c +; SI-NEXT: s_load_dword s0, s[0:1], 0x25 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_add_i32 s3, s3, s0 +; SI-NEXT:BB1_4: ; %endif +; SI-NEXT: s_add_i32 s0, s3, s2 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: v_mov_b32_e32 v0, s0 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm + entry: %cmp0 = icmp eq i32 %a, 0 br i1 %cmp0, label %if, label %else diff --git a/llvm/test/CodeGen/AMDGPU/shift-i128.ll b/llvm/test/CodeGen/AMDGPU/shift-i128.ll index 59aebaeed56e9..f2077aa2a1ad0 100644 --- a/llvm/test/CodeGen/AMDGPU/shift-i128.ll +++ b/llvm/test/CodeGen/AMDGPU/shift-i128.ll @@ -446,68 +446,68 @@ define <2 x i128> @v_ashr_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { define amdgpu_kernel void @s_shl_v2i128ss(<2 x i128> %lhs, <2 x i128> %rhs) { ; GCN-LABEL: s_shl_v2i128ss: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 -; GCN-NEXT: s_load_dwordx8 s[16:23], s[4:5], 0x8 -; GCN-NEXT: v_mov_b32_e32 v10, 16 -; GCN-NEXT: v_mov_b32_e32 v8, 0 -; GCN-NEXT: v_mov_b32_e32 v11, 0 -; GCN-NEXT: v_mov_b32_e32 v9, 0 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_sub_i32 s6, 64, s16 -; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[16:17], 64 -; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[18:19], 0 -; GCN-NEXT: s_sub_i32 s4, s16, 64 -; GCN-NEXT: s_lshr_b64 s[6:7], s[8:9], s6 -; GCN-NEXT: s_lshl_b64 s[24:25], s[10:11], s16 -; GCN-NEXT: s_and_b64 vcc, s[2:3], s[0:1] -; GCN-NEXT: s_or_b64 s[0:1], s[16:17], s[18:19] -; GCN-NEXT: s_lshl_b64 s[4:5], s[8:9], s4 -; GCN-NEXT: s_or_b64 s[6:7], s[24:25], s[6:7] -; GCN-NEXT: v_mov_b32_e32 v0, s5 -; GCN-NEXT: v_mov_b32_e32 v1, s7 -; GCN-NEXT: v_cmp_eq_u64_e64 s[0:1], s[0:1], 0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s11 -; GCN-NEXT: v_cndmask_b32_e64 v3, v0, v1, s[0:1] -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: v_mov_b32_e32 v1, s6 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s10 -; GCN-NEXT: s_sub_i32 s6, 64, s20 -; GCN-NEXT: v_cndmask_b32_e64 v2, v0, v1, s[0:1] -; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[20:21], 64 -; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[22:23], 0 -; GCN-NEXT: s_sub_i32 s4, s20, 64 -; GCN-NEXT: s_lshr_b64 s[6:7], s[12:13], s6 -; GCN-NEXT: s_lshl_b64 s[10:11], s[14:15], s20 -; GCN-NEXT: s_lshl_b64 s[4:5], s[12:13], s4 -; GCN-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7] -; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] -; GCN-NEXT: s_or_b64 s[2:3], s[20:21], s[22:23] -; GCN-NEXT: v_mov_b32_e32 v0, s5 -; GCN-NEXT: v_mov_b32_e32 v1, s7 -; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], 0 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] -; GCN-NEXT: v_mov_b32_e32 v1, s15 -; GCN-NEXT: v_cndmask_b32_e64 v7, v0, v1, s[2:3] -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: v_mov_b32_e32 v1, s6 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] -; GCN-NEXT: v_mov_b32_e32 v1, s14 -; GCN-NEXT: v_cndmask_b32_e64 v6, v0, v1, s[2:3] -; GCN-NEXT: s_lshl_b64 s[2:3], s[8:9], s16 -; GCN-NEXT: v_mov_b32_e32 v0, s3 -; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v0, s2 -; GCN-NEXT: s_lshl_b64 s[2:3], s[12:13], s20 -; GCN-NEXT: v_mov_b32_e32 v4, s3 -; GCN-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1] -; GCN-NEXT: v_mov_b32_e32 v4, s2 -; GCN-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] -; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] -; GCN-NEXT: flat_store_dwordx4 v[8:9], v[0:3] -; GCN-NEXT: s_endpgm +; GCN-NEXT: s_load_dwordx8 s[16:23], s[4:5], 0x8 +; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 +; GCN-NEXT: v_mov_b32_e32 v10, 16 +; GCN-NEXT: v_mov_b32_e32 v8, 0 +; GCN-NEXT: v_mov_b32_e32 v11, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_sub_i32 s6, 64, s16 +; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[16:17], 64 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[18:19], 0 +; GCN-NEXT: s_sub_i32 s4, s16, 64 +; GCN-NEXT: s_lshr_b64 s[6:7], s[8:9], s6 +; GCN-NEXT: s_lshl_b64 s[24:25], s[10:11], s16 +; GCN-NEXT: s_and_b64 vcc, s[2:3], s[0:1] +; GCN-NEXT: s_or_b64 s[0:1], s[16:17], s[18:19] +; GCN-NEXT: s_lshl_b64 s[4:5], s[8:9], s4 +; GCN-NEXT: s_or_b64 s[6:7], s[24:25], s[6:7] +; GCN-NEXT: v_mov_b32_e32 v0, s5 +; GCN-NEXT: v_mov_b32_e32 v1, s7 +; GCN-NEXT: v_cmp_eq_u64_e64 s[0:1], s[0:1], 0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s11 +; GCN-NEXT: v_cndmask_b32_e64 v3, v0, v1, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s6 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s10 +; GCN-NEXT: s_sub_i32 s6, 64, s20 +; GCN-NEXT: v_cndmask_b32_e64 v2, v0, v1, s[0:1] +; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[20:21], 64 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[22:23], 0 +; GCN-NEXT: s_sub_i32 s4, s20, 64 +; GCN-NEXT: s_lshr_b64 s[6:7], s[12:13], s6 +; GCN-NEXT: s_lshl_b64 s[10:11], s[14:15], s20 +; GCN-NEXT: s_lshl_b64 s[4:5], s[12:13], s4 +; GCN-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7] +; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: s_or_b64 s[2:3], s[20:21], s[22:23] +; GCN-NEXT: v_mov_b32_e32 v0, s5 +; GCN-NEXT: v_mov_b32_e32 v1, s7 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], 0 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v1, s15 +; GCN-NEXT: v_cndmask_b32_e64 v7, v0, v1, s[2:3] +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s6 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v1, s14 +; GCN-NEXT: v_cndmask_b32_e64 v6, v0, v1, s[2:3] +; GCN-NEXT: s_lshl_b64 s[2:3], s[8:9], s16 +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: s_lshl_b64 s[2:3], s[12:13], s20 +; GCN-NEXT: v_mov_b32_e32 v4, s3 +; GCN-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v4, s2 +; GCN-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v9, 0 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] +; GCN-NEXT: flat_store_dwordx4 v[8:9], v[0:3] +; GCN-NEXT: s_endpgm %shift = shl <2 x i128> %lhs, %rhs store <2 x i128> %shift, <2 x i128> addrspace(1)* null ret void @@ -516,68 +516,68 @@ define amdgpu_kernel void @s_shl_v2i128ss(<2 x i128> %lhs, <2 x i128> %rhs) { define amdgpu_kernel void @s_lshr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) { ; GCN-LABEL: s_lshr_v2i128_ss: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 -; GCN-NEXT: s_load_dwordx8 s[16:23], s[4:5], 0x8 -; GCN-NEXT: v_mov_b32_e32 v10, 16 -; GCN-NEXT: v_mov_b32_e32 v8, 0 -; GCN-NEXT: v_mov_b32_e32 v11, 0 -; GCN-NEXT: v_mov_b32_e32 v9, 0 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_sub_i32 s6, 64, s16 -; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[16:17], 64 -; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[18:19], 0 -; GCN-NEXT: s_sub_i32 s4, s16, 64 -; GCN-NEXT: s_lshl_b64 s[6:7], s[10:11], s6 -; GCN-NEXT: s_lshr_b64 s[24:25], s[8:9], s16 -; GCN-NEXT: s_or_b64 s[6:7], s[24:25], s[6:7] -; GCN-NEXT: s_and_b64 vcc, s[2:3], s[0:1] -; GCN-NEXT: s_or_b64 s[0:1], s[16:17], s[18:19] -; GCN-NEXT: s_lshr_b64 s[4:5], s[10:11], s4 -; GCN-NEXT: v_mov_b32_e32 v0, s5 -; GCN-NEXT: v_mov_b32_e32 v1, s7 -; GCN-NEXT: v_cmp_eq_u64_e64 s[0:1], s[0:1], 0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s9 -; GCN-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: v_mov_b32_e32 v2, s6 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s8 -; GCN-NEXT: s_sub_i32 s6, 64, s20 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[20:21], 64 -; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[22:23], 0 -; GCN-NEXT: s_sub_i32 s4, s20, 64 -; GCN-NEXT: s_lshl_b64 s[6:7], s[14:15], s6 -; GCN-NEXT: s_lshr_b64 s[8:9], s[12:13], s20 -; GCN-NEXT: s_lshr_b64 s[4:5], s[14:15], s4 -; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] -; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] -; GCN-NEXT: s_or_b64 s[2:3], s[20:21], s[22:23] -; GCN-NEXT: v_mov_b32_e32 v2, s5 -; GCN-NEXT: v_mov_b32_e32 v3, s7 -; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], 0 -; GCN-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; GCN-NEXT: v_mov_b32_e32 v3, s13 -; GCN-NEXT: v_cndmask_b32_e64 v5, v2, v3, s[2:3] -; GCN-NEXT: v_mov_b32_e32 v2, s4 -; GCN-NEXT: v_mov_b32_e32 v3, s6 -; GCN-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; GCN-NEXT: v_mov_b32_e32 v3, s12 -; GCN-NEXT: v_cndmask_b32_e64 v4, v2, v3, s[2:3] -; GCN-NEXT: s_lshr_b64 s[2:3], s[10:11], s16 -; GCN-NEXT: v_mov_b32_e32 v2, s3 -; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s2 -; GCN-NEXT: s_lshr_b64 s[2:3], s[14:15], s20 -; GCN-NEXT: v_mov_b32_e32 v6, s3 -; GCN-NEXT: v_cndmask_b32_e64 v7, 0, v6, s[0:1] -; GCN-NEXT: v_mov_b32_e32 v6, s2 -; GCN-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[0:1] -; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] -; GCN-NEXT: flat_store_dwordx4 v[8:9], v[0:3] -; GCN-NEXT: s_endpgm +; GCN-NEXT: s_load_dwordx8 s[16:23], s[4:5], 0x8 +; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 +; GCN-NEXT: v_mov_b32_e32 v10, 16 +; GCN-NEXT: v_mov_b32_e32 v8, 0 +; GCN-NEXT: v_mov_b32_e32 v11, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_sub_i32 s6, 64, s16 +; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[16:17], 64 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[18:19], 0 +; GCN-NEXT: s_sub_i32 s4, s16, 64 +; GCN-NEXT: s_lshl_b64 s[6:7], s[10:11], s6 +; GCN-NEXT: s_lshr_b64 s[24:25], s[8:9], s16 +; GCN-NEXT: s_or_b64 s[6:7], s[24:25], s[6:7] +; GCN-NEXT: s_and_b64 vcc, s[2:3], s[0:1] +; GCN-NEXT: s_or_b64 s[0:1], s[16:17], s[18:19] +; GCN-NEXT: s_lshr_b64 s[4:5], s[10:11], s4 +; GCN-NEXT: v_mov_b32_e32 v0, s5 +; GCN-NEXT: v_mov_b32_e32 v1, s7 +; GCN-NEXT: v_cmp_eq_u64_e64 s[0:1], s[0:1], 0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s9 +; GCN-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v2, s6 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-NEXT: v_mov_b32_e32 v2, s8 +; GCN-NEXT: s_sub_i32 s6, 64, s20 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[20:21], 64 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[22:23], 0 +; GCN-NEXT: s_sub_i32 s4, s20, 64 +; GCN-NEXT: s_lshl_b64 s[6:7], s[14:15], s6 +; GCN-NEXT: s_lshr_b64 s[8:9], s[12:13], s20 +; GCN-NEXT: s_lshr_b64 s[4:5], s[14:15], s4 +; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] +; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: s_or_b64 s[2:3], s[20:21], s[22:23] +; GCN-NEXT: v_mov_b32_e32 v2, s5 +; GCN-NEXT: v_mov_b32_e32 v3, s7 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], 0 +; GCN-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v3, s13 +; GCN-NEXT: v_cndmask_b32_e64 v5, v2, v3, s[2:3] +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_mov_b32_e32 v3, s6 +; GCN-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v3, s12 +; GCN-NEXT: v_cndmask_b32_e64 v4, v2, v3, s[2:3] +; GCN-NEXT: s_lshr_b64 s[2:3], s[10:11], s16 +; GCN-NEXT: v_mov_b32_e32 v2, s3 +; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc +; GCN-NEXT: v_mov_b32_e32 v2, s2 +; GCN-NEXT: s_lshr_b64 s[2:3], s[14:15], s20 +; GCN-NEXT: v_mov_b32_e32 v6, s3 +; GCN-NEXT: v_cndmask_b32_e64 v7, 0, v6, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v6, s2 +; GCN-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v9, 0 +; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] +; GCN-NEXT: flat_store_dwordx4 v[8:9], v[0:3] +; GCN-NEXT: s_endpgm %shift = lshr <2 x i128> %lhs, %rhs store <2 x i128> %shift, <2 x i128> addrspace(1)* null ret void @@ -586,72 +586,72 @@ define amdgpu_kernel void @s_lshr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) { define amdgpu_kernel void @s_ashr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) { ; GCN-LABEL: s_ashr_v2i128_ss: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 -; GCN-NEXT: s_load_dwordx8 s[16:23], s[4:5], 0x8 -; GCN-NEXT: v_mov_b32_e32 v8, 0 -; GCN-NEXT: v_mov_b32_e32 v9, 0 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_sub_i32 s6, 64, s16 -; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[16:17], 64 -; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[18:19], 0 -; GCN-NEXT: s_sub_i32 s4, s16, 64 -; GCN-NEXT: s_lshl_b64 s[6:7], s[10:11], s6 -; GCN-NEXT: s_lshr_b64 s[24:25], s[8:9], s16 -; GCN-NEXT: s_or_b64 s[6:7], s[24:25], s[6:7] -; GCN-NEXT: s_and_b64 vcc, s[2:3], s[0:1] -; GCN-NEXT: s_or_b64 s[0:1], s[16:17], s[18:19] -; GCN-NEXT: s_ashr_i64 s[4:5], s[10:11], s4 -; GCN-NEXT: v_mov_b32_e32 v0, s5 -; GCN-NEXT: v_mov_b32_e32 v1, s7 -; GCN-NEXT: v_cmp_eq_u64_e64 s[0:1], s[0:1], 0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s9 -; GCN-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: v_mov_b32_e32 v2, s6 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s8 -; GCN-NEXT: s_sub_i32 s6, 64, s20 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[20:21], 64 -; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[22:23], 0 -; GCN-NEXT: s_sub_i32 s4, s20, 64 -; GCN-NEXT: s_lshl_b64 s[6:7], s[14:15], s6 -; GCN-NEXT: s_lshr_b64 s[8:9], s[12:13], s20 -; GCN-NEXT: s_ashr_i64 s[4:5], s[14:15], s4 -; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] -; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] -; GCN-NEXT: s_or_b64 s[2:3], s[20:21], s[22:23] -; GCN-NEXT: v_mov_b32_e32 v2, s5 -; GCN-NEXT: v_mov_b32_e32 v3, s7 -; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], 0 -; GCN-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; GCN-NEXT: v_mov_b32_e32 v3, s13 -; GCN-NEXT: v_cndmask_b32_e64 v5, v2, v3, s[2:3] -; GCN-NEXT: v_mov_b32_e32 v2, s4 -; GCN-NEXT: v_mov_b32_e32 v3, s6 -; GCN-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; GCN-NEXT: v_mov_b32_e32 v3, s12 -; GCN-NEXT: v_cndmask_b32_e64 v4, v2, v3, s[2:3] -; GCN-NEXT: s_ashr_i64 s[2:3], s[10:11], s16 -; GCN-NEXT: s_ashr_i32 s4, s11, 31 -; GCN-NEXT: v_mov_b32_e32 v2, s4 -; GCN-NEXT: v_mov_b32_e32 v3, s3 -; GCN-NEXT: v_mov_b32_e32 v6, s2 -; GCN-NEXT: s_ashr_i64 s[2:3], s[14:15], s20 -; GCN-NEXT: s_ashr_i32 s4, s15, 31 -; GCN-NEXT: v_cndmask_b32_e32 v3, v2, v3, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc -; GCN-NEXT: v_mov_b32_e32 v6, s4 -; GCN-NEXT: v_mov_b32_e32 v7, s3 -; GCN-NEXT: v_mov_b32_e32 v10, s2 -; GCN-NEXT: v_cndmask_b32_e64 v7, v6, v7, s[0:1] -; GCN-NEXT: v_cndmask_b32_e64 v6, v6, v10, s[0:1] -; GCN-NEXT: v_mov_b32_e32 v10, 16 -; GCN-NEXT: v_mov_b32_e32 v11, 0 -; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] -; GCN-NEXT: flat_store_dwordx4 v[8:9], v[0:3] -; GCN-NEXT: s_endpgm +; GCN-NEXT: s_load_dwordx8 s[16:23], s[4:5], 0x8 +; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 +; GCN-NEXT: v_mov_b32_e32 v8, 0 +; GCN-NEXT: v_mov_b32_e32 v9, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_sub_i32 s6, 64, s16 +; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[16:17], 64 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[18:19], 0 +; GCN-NEXT: s_sub_i32 s4, s16, 64 +; GCN-NEXT: s_lshl_b64 s[6:7], s[10:11], s6 +; GCN-NEXT: s_lshr_b64 s[24:25], s[8:9], s16 +; GCN-NEXT: s_or_b64 s[6:7], s[24:25], s[6:7] +; GCN-NEXT: s_and_b64 vcc, s[2:3], s[0:1] +; GCN-NEXT: s_or_b64 s[0:1], s[16:17], s[18:19] +; GCN-NEXT: s_ashr_i64 s[4:5], s[10:11], s4 +; GCN-NEXT: v_mov_b32_e32 v0, s5 +; GCN-NEXT: v_mov_b32_e32 v1, s7 +; GCN-NEXT: v_cmp_eq_u64_e64 s[0:1], s[0:1], 0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s9 +; GCN-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v2, s6 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-NEXT: v_mov_b32_e32 v2, s8 +; GCN-NEXT: s_sub_i32 s6, 64, s20 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[20:21], 64 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[22:23], 0 +; GCN-NEXT: s_sub_i32 s4, s20, 64 +; GCN-NEXT: s_lshl_b64 s[6:7], s[14:15], s6 +; GCN-NEXT: s_lshr_b64 s[8:9], s[12:13], s20 +; GCN-NEXT: s_ashr_i64 s[4:5], s[14:15], s4 +; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] +; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: s_or_b64 s[2:3], s[20:21], s[22:23] +; GCN-NEXT: v_mov_b32_e32 v2, s5 +; GCN-NEXT: v_mov_b32_e32 v3, s7 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], 0 +; GCN-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v3, s13 +; GCN-NEXT: v_cndmask_b32_e64 v5, v2, v3, s[2:3] +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_mov_b32_e32 v3, s6 +; GCN-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v3, s12 +; GCN-NEXT: v_cndmask_b32_e64 v4, v2, v3, s[2:3] +; GCN-NEXT: s_ashr_i64 s[2:3], s[10:11], s16 +; GCN-NEXT: s_ashr_i32 s4, s11, 31 +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_mov_b32_e32 v3, s3 +; GCN-NEXT: v_mov_b32_e32 v6, s2 +; GCN-NEXT: s_ashr_i64 s[2:3], s[14:15], s20 +; GCN-NEXT: s_ashr_i32 s4, s15, 31 +; GCN-NEXT: v_cndmask_b32_e32 v3, v2, v3, vcc +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GCN-NEXT: v_mov_b32_e32 v6, s4 +; GCN-NEXT: v_mov_b32_e32 v7, s3 +; GCN-NEXT: v_mov_b32_e32 v10, s2 +; GCN-NEXT: v_cndmask_b32_e64 v7, v6, v7, s[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v6, v6, v10, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v10, 16 +; GCN-NEXT: v_mov_b32_e32 v11, 0 +; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] +; GCN-NEXT: flat_store_dwordx4 v[8:9], v[0:3] +; GCN-NEXT: s_endpgm %shift = ashr <2 x i128> %lhs, %rhs store <2 x i128> %shift, <2 x i128> addrspace(1)* null ret void diff --git a/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll b/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll index b000e9449e7eb..cdaff4f17c683 100644 --- a/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll +++ b/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll @@ -54,8 +54,8 @@ define amdgpu_kernel void @lshr_i64_32(i64 addrspace(1)* %out, i64 addrspace(1)* ; after 64-bit shift is split. ; GCN-LABEL: {{^}}lshr_and_i64_35: -; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} -; GCN: buffer_load_dword v[[LO:[0-9]+]] +; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} +; GCN-DAG: buffer_load_dword v[[LO:[0-9]+]] ; GCN: v_bfe_u32 v[[BFE:[0-9]+]], v[[LO]], 8, 23 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} define amdgpu_kernel void @lshr_and_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll index da15cff4c91a1..dfb8874ac019c 100644 --- a/llvm/test/CodeGen/AMDGPU/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/shl.ll @@ -13,14 +13,14 @@ define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> add ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s10, s2 +; GCN-NEXT: s_mov_b32 s11, s3 ; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s6 +; GCN-NEXT: s_mov_b32 s9, s7 +; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; GCN-NEXT: s_mov_b32 s0, s4 ; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s2 -; GCN-NEXT: s_mov_b32 s7, s3 -; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_lshl_b32_e32 v1, v1, v3 ; GCN-NEXT: v_lshl_b32_e32 v0, v0, v2 @@ -59,15 +59,15 @@ define amdgpu_kernel void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> add ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s10, s2 +; GCN-NEXT: s_mov_b32 s11, s3 ; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s6 +; GCN-NEXT: s_mov_b32 s9, s7 +; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCN-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 ; GCN-NEXT: s_mov_b32 s0, s4 ; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s2 -; GCN-NEXT: s_mov_b32 s7, s3 -; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 -; GCN-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_lshl_b32_e32 v3, v3, v7 ; GCN-NEXT: v_lshl_b32_e32 v2, v2, v6 @@ -411,23 +411,23 @@ define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> add ; GCN-NEXT: s_mov_b32 s8, s6 ; GCN-NEXT: s_mov_b32 s9, s7 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: s_mov_b64 s[12:13], s[6:7] ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: s_mov_b32 s14, 0 ; GCN-NEXT: s_mov_b32 s15, s3 -; GCN-NEXT: s_mov_b64 s[12:13], s[6:7] ; GCN-NEXT: buffer_load_dword v2, off, s[8:11], 0 ; GCN-NEXT: buffer_load_dword v0, v[0:1], s[12:15], 0 addr64 offset:4 +; GCN-NEXT: s_mov_b32 s6, 0xffff ; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: s_mov_b32 s4, 0xffff ; GCN-NEXT: s_mov_b32 s1, s5 ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_lshrrev_b32_e32 v1, 16, v2 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GCN-NEXT: v_and_b32_e32 v0, s4, v0 +; GCN-NEXT: v_and_b32_e32 v0, s6, v0 ; GCN-NEXT: v_lshl_b32_e32 v0, v2, v0 ; GCN-NEXT: v_lshl_b32_e32 v1, v1, v3 -; GCN-NEXT: v_and_b32_e32 v0, s4, v0 +; GCN-NEXT: v_and_b32_e32 v0, s6, v0 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GCN-NEXT: v_or_b32_e32 v0, v0, v1 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -490,14 +490,14 @@ define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> add ; GCN-NEXT: s_mov_b64 s[0:1], s[6:7] ; GCN-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 ; GCN-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8 -; GCN-NEXT: s_mov_b32 s8, 0xffff +; GCN-NEXT: s_mov_b32 s0, 0xffff ; GCN-NEXT: s_mov_b64 s[6:7], s[2:3] ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_lshrrev_b32_e32 v6, 16, v2 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v8, s8, v4 +; GCN-NEXT: v_and_b32_e32 v8, s0, v4 ; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GCN-NEXT: v_and_b32_e32 v9, s8, v5 +; GCN-NEXT: v_and_b32_e32 v9, s0, v5 ; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v3 ; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v5 ; GCN-NEXT: v_lshl_b32_e32 v5, v7, v5 @@ -505,9 +505,9 @@ define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> add ; GCN-NEXT: v_lshl_b32_e32 v4, v6, v4 ; GCN-NEXT: v_lshl_b32_e32 v2, v2, v8 ; GCN-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; GCN-NEXT: v_and_b32_e32 v3, s8, v3 +; GCN-NEXT: v_and_b32_e32 v3, s0, v3 ; GCN-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GCN-NEXT: v_and_b32_e32 v2, s8, v2 +; GCN-NEXT: v_and_b32_e32 v2, s0, v2 ; GCN-NEXT: v_or_b32_e32 v3, v3, v5 ; GCN-NEXT: v_or_b32_e32 v2, v2, v4 ; GCN-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64 @@ -732,17 +732,17 @@ define amdgpu_kernel void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> add ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s10, s2 +; GCN-NEXT: s_mov_b32 s11, s3 ; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s6 +; GCN-NEXT: s_mov_b32 s9, s7 +; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCN-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; GCN-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 +; GCN-NEXT: buffer_load_dwordx4 v[11:14], off, s[8:11], 0 offset:48 ; GCN-NEXT: s_mov_b32 s0, s4 ; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s2 -; GCN-NEXT: s_mov_b32 s7, s3 -; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 -; GCN-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 -; GCN-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 -; GCN-NEXT: buffer_load_dwordx4 v[11:14], off, s[4:7], 0 offset:48 ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_lshl_b64 v[2:3], v[2:3], v10 ; GCN-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll index 97184c5a09236..43418c63057b8 100644 --- a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll @@ -86,23 +86,23 @@ define amdgpu_kernel void @v_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> a ; VI-LABEL: v_shl_v2i16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 +; VI-NEXT: v_lshlrev_b32_e32 v4, 2, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: v_add_u32_e32 v2, vcc, 4, v0 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc +; VI-NEXT: flat_load_dword v5, v[0:1] +; VI-NEXT: flat_load_dword v2, v[2:3] +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v4 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: v_add_u32_e32 v4, vcc, 4, v0 -; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: flat_load_dword v1, v[4:5] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshlrev_b16_e32 v4, v1, v0 -; VI-NEXT: v_lshlrev_b16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; VI-NEXT: v_or_b32_e32 v0, v4, v0 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_lshlrev_b16_e32 v3, v2, v5 +; VI-NEXT: v_lshlrev_b16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_or_b32_e32 v2, v3, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; CI-LABEL: v_shl_v2i16: @@ -116,17 +116,17 @@ define amdgpu_kernel void @v_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> a ; CI-NEXT: s_mov_b64 s[0:1], s[6:7] ; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 -; CI-NEXT: s_mov_b32 s8, 0xffff +; CI-NEXT: s_mov_b32 s0, 0xffff ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: s_waitcnt vmcnt(1) ; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: v_and_b32_e32 v5, s8, v3 +; CI-NEXT: v_and_b32_e32 v5, s0, v3 ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_lshl_b32_e32 v3, v4, v3 ; CI-NEXT: v_lshl_b32_e32 v2, v2, v5 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; CI-NEXT: v_and_b32_e32 v2, s8, v2 +; CI-NEXT: v_and_b32_e32 v2, s0, v2 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 ; CI-NEXT: s_endpgm @@ -170,39 +170,39 @@ define amdgpu_kernel void @shl_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] +; VI-NEXT: flat_load_dword v3, v[0:1] ; VI-NEXT: s_lshr_b32 s1, s0, 16 -; VI-NEXT: v_mov_b32_e32 v4, s1 -; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: v_add_u32_e32 v2, vcc, s4, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v2 +; VI-NEXT: v_mov_b32_e32 v2, s1 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshlrev_b16_e32 v1, s0, v0 -; VI-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_lshlrev_b16_e32 v4, s0, v3 +; VI-NEXT: v_lshlrev_b16_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_or_b32_e32 v2, v4, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; CI-LABEL: shl_v_s_v2i16: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; CI-NEXT: s_load_dword s0, s[0:1], 0xd -; CI-NEXT: s_mov_b32 s8, 0xffff +; CI-NEXT: s_load_dword s8, s[0:1], 0xd ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_lshr_b32 s9, s0, 16 -; CI-NEXT: s_and_b32 s10, s0, s8 ; CI-NEXT: s_mov_b64 s[0:1], s[6:7] ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 +; CI-NEXT: s_mov_b32 s0, 0xffff +; CI-NEXT: s_lshr_b32 s1, s8, 16 +; CI-NEXT: s_and_b32 s8, s8, s0 ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; CI-NEXT: v_lshlrev_b32_e32 v2, s10, v2 -; CI-NEXT: v_lshlrev_b32_e32 v3, s9, v3 -; CI-NEXT: v_and_b32_e32 v2, s8, v2 +; CI-NEXT: v_lshlrev_b32_e32 v2, s8, v2 +; CI-NEXT: v_lshlrev_b32_e32 v3, s1, v3 +; CI-NEXT: v_and_b32_e32 v2, s0, v2 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 @@ -245,17 +245,17 @@ define amdgpu_kernel void @shl_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] +; VI-NEXT: flat_load_dword v3, v[0:1] ; VI-NEXT: s_lshr_b32 s1, s0, 16 -; VI-NEXT: v_mov_b32_e32 v4, s1 -; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: v_add_u32_e32 v2, vcc, s4, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v2 +; VI-NEXT: v_mov_b32_e32 v2, s1 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshlrev_b16_e64 v1, v0, s0 -; VI-NEXT: v_lshlrev_b16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_lshlrev_b16_e64 v4, v3, s0 +; VI-NEXT: v_lshlrev_b16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v2, v4, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; CI-LABEL: shl_s_v_v2i16: @@ -270,12 +270,12 @@ define amdgpu_kernel void @shl_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: s_mov_b32 s0, 0xffff -; CI-NEXT: s_lshr_b32 s9, s8, 16 +; CI-NEXT: s_lshr_b32 s1, s8, 16 ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_and_b32_e32 v3, s0, v2 ; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; CI-NEXT: v_lshl_b32_e32 v2, s9, v2 +; CI-NEXT: v_lshl_b32_e32 v2, s1, v2 ; CI-NEXT: v_lshl_b32_e32 v3, s8, v3 ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; CI-NEXT: v_and_b32_e32 v3, s0, v3 @@ -319,15 +319,15 @@ define amdgpu_kernel void @shl_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i1 ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshlrev_b16_e64 v1, v0, 8 -; VI-NEXT: v_lshlrev_b16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_lshlrev_b16_e64 v2, v3, 8 +; VI-NEXT: v_lshlrev_b16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v2, v2, v3 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; CI-LABEL: shl_imm_v_v2i16: @@ -387,16 +387,16 @@ define amdgpu_kernel void @shl_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i1 ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v0 -; VI-NEXT: v_and_b32_e32 v1, 0xff000000, v1 -; VI-NEXT: v_lshlrev_b16_e32 v0, 8, v0 -; VI-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v3 +; VI-NEXT: v_and_b32_e32 v2, 0xff000000, v2 +; VI-NEXT: v_lshlrev_b16_e32 v3, 8, v3 +; VI-NEXT: v_or_b32_e32 v2, v3, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; CI-LABEL: shl_v_imm_v2i16: @@ -429,45 +429,45 @@ define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> a ; GFX9-LABEL: v_shl_v4i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: global_load_dwordx2 v[4:5], v[0:1], off +; GFX9-NEXT: global_load_dwordx2 v[2:3], v[0:1], off ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off offset:8 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: v_mov_b32_e32 v5, s1 +; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, s0, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_pk_lshlrev_b16 v1, v1, v5 -; GFX9-NEXT: v_pk_lshlrev_b16 v0, v0, v4 -; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX9-NEXT: v_pk_lshlrev_b16 v1, v1, v3 +; GFX9-NEXT: v_pk_lshlrev_b16 v0, v0, v2 +; GFX9-NEXT: global_store_dwordx2 v[4:5], v[0:1], off ; GFX9-NEXT: s_endpgm ; ; VI-LABEL: v_shl_v4i16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; VI-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: v_add_u32_e32 v4, vcc, 8, v0 -; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; VI-NEXT: v_add_u32_e32 v2, vcc, 8, v0 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; VI-NEXT: flat_load_dwordx2 v[4:5], v[4:5] +; VI-NEXT: flat_load_dwordx2 v[2:3], v[2:3] +; VI-NEXT: v_mov_b32_e32 v5, s1 +; VI-NEXT: v_add_u32_e32 v4, vcc, s0, v4 +; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshlrev_b16_e32 v6, v5, v1 -; VI-NEXT: v_lshlrev_b16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; VI-NEXT: v_lshlrev_b16_e32 v5, v4, v0 -; VI-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_lshlrev_b16_e32 v6, v3, v1 +; VI-NEXT: v_lshlrev_b16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_lshlrev_b16_e32 v3, v2, v0 +; VI-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-NEXT: v_or_b32_e32 v1, v6, v1 -; VI-NEXT: v_or_b32_e32 v0, v5, v0 -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-NEXT: v_or_b32_e32 v0, v3, v0 +; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] ; VI-NEXT: s_endpgm ; ; CI-LABEL: v_shl_v4i16: @@ -481,14 +481,14 @@ define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> a ; CI-NEXT: s_mov_b64 s[0:1], s[6:7] ; CI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 ; CI-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8 -; CI-NEXT: s_mov_b32 s8, 0xffff +; CI-NEXT: s_mov_b32 s0, 0xffff ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: s_waitcnt vmcnt(1) ; CI-NEXT: v_lshrrev_b32_e32 v6, 16, v2 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: v_and_b32_e32 v8, s8, v4 +; CI-NEXT: v_and_b32_e32 v8, s0, v4 ; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; CI-NEXT: v_and_b32_e32 v9, s8, v5 +; CI-NEXT: v_and_b32_e32 v9, s0, v5 ; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v3 ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5 ; CI-NEXT: v_lshl_b32_e32 v5, v7, v5 @@ -496,9 +496,9 @@ define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> a ; CI-NEXT: v_lshl_b32_e32 v4, v6, v4 ; CI-NEXT: v_lshl_b32_e32 v2, v2, v8 ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; CI-NEXT: v_and_b32_e32 v3, s8, v3 +; CI-NEXT: v_and_b32_e32 v3, s0, v3 ; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; CI-NEXT: v_and_b32_e32 v2, s8, v2 +; CI-NEXT: v_and_b32_e32 v2, s0, v2 ; CI-NEXT: v_or_b32_e32 v3, v3, v5 ; CI-NEXT: v_or_b32_e32 v2, v2, v4 ; CI-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64 @@ -539,21 +539,21 @@ define amdgpu_kernel void @shl_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i1 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 +; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: s_mov_b32 s0, 0xff000000 +; VI-NEXT: s_mov_b32 s2, 0xff000000 ; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: v_lshlrev_b32_e32 v4, 8, v1 ; VI-NEXT: v_lshlrev_b16_e32 v5, 8, v0 ; VI-NEXT: v_lshlrev_b32_e32 v0, 8, v0 -; VI-NEXT: v_and_b32_e32 v0, s0, v0 +; VI-NEXT: v_and_b32_e32 v0, s2, v0 ; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: v_and_b32_e32 v4, s0, v4 +; VI-NEXT: v_and_b32_e32 v4, s2, v4 ; VI-NEXT: v_or_b32_e32 v1, v1, v4 ; VI-NEXT: v_or_b32_e32 v0, v5, v0 ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -569,14 +569,14 @@ define amdgpu_kernel void @shl_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_mov_b64 s[0:1], s[6:7] ; CI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 -; CI-NEXT: s_mov_b32 s8, 0xff00 +; CI-NEXT: s_mov_b32 s0, 0xff00 ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v4, 8, v3 ; CI-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; CI-NEXT: v_and_b32_e32 v4, s8, v4 +; CI-NEXT: v_and_b32_e32 v4, s0, v4 ; CI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; CI-NEXT: v_and_b32_e32 v3, s8, v3 +; CI-NEXT: v_and_b32_e32 v3, s0, v3 ; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; CI-NEXT: v_or_b32_e32 v3, v3, v4 ; CI-NEXT: v_and_b32_e32 v2, 0xff00ff00, v2 diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll index ff4a8296d8dd0..95191583249ce 100644 --- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll @@ -112,17 +112,17 @@ define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out, ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] ; VI-NEXT: flat_load_dword v4, v[0:1] -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) -; VI-NEXT: v_subrev_u32_e32 v1, vcc, 64, v4 +; VI-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_subrev_u32_e32 v0, vcc, 64, v0 -; VI-NEXT: flat_store_dword v[2:3], v1 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_subrev_u32_e32 v3, vcc, 64, v4 +; VI-NEXT: flat_store_dword v[0:1], v2 +; VI-NEXT: flat_store_dword v[0:1], v3 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_i32_x_sub_64_multi_use: @@ -133,17 +133,17 @@ define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: global_load_dword v3, v[0:1], off ; GFX9-NEXT: global_load_dword v4, v[0:1], off -; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v2 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_subrev_u32_e32 v1, 64, v4 +; GFX9-NEXT: v_subrev_u32_e32 v2, 64, v3 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_subrev_u32_e32 v0, 64, v0 -; GFX9-NEXT: global_store_dword v[2:3], v1, off -; GFX9-NEXT: global_store_dword v[2:3], v0, off +; GFX9-NEXT: v_subrev_u32_e32 v3, 64, v4 +; GFX9-NEXT: global_store_dword v[0:1], v2, off +; GFX9-NEXT: global_store_dword v[0:1], v3, off ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: v_test_i32_x_sub_64_multi_use: @@ -945,17 +945,17 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(i16 addrspace(1)* %out, ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: flat_load_ushort v3, v[0:1] ; VI-NEXT: flat_load_ushort v4, v[0:1] -; VI-NEXT: flat_load_ushort v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) -; VI-NEXT: v_subrev_u16_e32 v1, 64, v4 +; VI-NEXT: v_subrev_u16_e32 v2, 64, v3 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_subrev_u16_e32 v0, 64, v0 -; VI-NEXT: flat_store_short v[2:3], v1 -; VI-NEXT: flat_store_short v[2:3], v0 +; VI-NEXT: v_subrev_u16_e32 v3, 64, v4 +; VI-NEXT: flat_store_short v[0:1], v2 +; VI-NEXT: flat_store_short v[0:1], v3 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_i16_x_sub_64_multi_use: @@ -966,17 +966,17 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(i16 addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: global_load_ushort v3, v[0:1], off ; GFX9-NEXT: global_load_ushort v4, v[0:1], off -; GFX9-NEXT: global_load_ushort v0, v[0:1], off -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v2 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_subrev_u16_e32 v1, 64, v4 +; GFX9-NEXT: v_subrev_u16_e32 v2, 64, v3 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_subrev_u16_e32 v0, 64, v0 -; GFX9-NEXT: global_store_short v[2:3], v1, off -; GFX9-NEXT: global_store_short v[2:3], v0, off +; GFX9-NEXT: v_subrev_u16_e32 v3, 64, v4 +; GFX9-NEXT: global_store_short v[0:1], v2, off +; GFX9-NEXT: global_store_short v[0:1], v3, off ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: v_test_i16_x_sub_64_multi_use: @@ -1037,20 +1037,20 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(<2 x i16> addrspace(1)* %out ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 +; VI-NEXT: v_mov_b32_e32 v4, 64 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v1, 64 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_sub_u16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_subrev_u16_e32 v0, 64, v0 -; VI-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_sub_u16_sdwa v2, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_subrev_u16_e32 v3, 64, v3 +; VI-NEXT: v_or_b32_e32 v2, v3, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_v2i16_x_sub_64_64: @@ -1125,15 +1125,15 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(<2 x i16> addrspace(1)* %out, ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u16_e32 v1, -7, v0 -; VI-NEXT: v_sub_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_add_u16_e32 v2, -7, v3 +; VI-NEXT: v_sub_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v2, v2, v3 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_v2i16_x_sub_7_64: @@ -1204,20 +1204,20 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(<2 x i16> addrspace(1)* %ou ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 +; VI-NEXT: v_mov_b32_e32 v4, 0xffffff85 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v1, 0xffffff85 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_subrev_u16_e32 v0, 64, v0 -; VI-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_add_u16_sdwa v2, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_subrev_u16_e32 v3, 64, v3 +; VI-NEXT: v_or_b32_e32 v2, v3, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_v2i16_x_sub_64_123: @@ -1292,15 +1292,15 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_0(<2 x i16> addrspace(1)* %out, ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_and_b32_e32 v1, 0xffff0000, v0 -; VI-NEXT: v_add_u16_e32 v0, -7, v0 -; VI-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_and_b32_e32 v2, 0xffff0000, v3 +; VI-NEXT: v_add_u16_e32 v3, -7, v3 +; VI-NEXT: v_or_b32_e32 v2, v3, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_v2i16_x_sub_7_0: @@ -1608,20 +1608,20 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_neg32(<2 x i16> addrspace(1) ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 +; VI-NEXT: v_mov_b32_e32 v4, 32 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v1, 32 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_sub_u16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_subrev_u16_e32 v0, 32, v0 -; VI-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_sub_u16_sdwa v2, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_subrev_u16_e32 v3, 32, v3 +; VI-NEXT: v_or_b32_e32 v2, v3, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_v2i16_x_add_neg32_neg32: @@ -1772,15 +1772,15 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_0(<2 x i16> addrspace(1)* %o ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_and_b32_e32 v1, 0xffff0000, v0 -; VI-NEXT: v_subrev_u16_e32 v0, 32, v0 -; VI-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_and_b32_e32 v2, 0xffff0000, v3 +; VI-NEXT: v_subrev_u16_e32 v3, 32, v3 +; VI-NEXT: v_or_b32_e32 v2, v3, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_v2i16_x_add_neg32_0: @@ -1856,15 +1856,15 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg16_neg16(<2 x i16> addrspace(1) ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u16_e32 v1, -16, v0 -; VI-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_add_u16_e32 v2, -16, v3 +; VI-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v2, v2, v3 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_v2i16_x_add_neg16_neg16: @@ -2015,15 +2015,15 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg16_0(<2 x i16> addrspace(1)* %o ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_and_b32_e32 v1, 0xffff0000, v0 -; VI-NEXT: v_add_u16_e32 v0, -16, v0 -; VI-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_and_b32_e32 v2, 0xffff0000, v3 +; VI-NEXT: v_add_u16_e32 v3, -16, v3 +; VI-NEXT: v_or_b32_e32 v2, v3, v2 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_v2i16_x_add_neg16_0: @@ -2094,20 +2094,20 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(<2 x i16> addrspace(1)* ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: s_movk_i32 s2, 0xc400 +; VI-NEXT: v_mov_b32_e32 v4, s2 +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: s_movk_i32 s0, 0xc400 -; VI-NEXT: v_mov_b32_e32 v4, s0 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u16_e32 v1, s0, v0 -; VI-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_add_u16_e32 v2, s2, v3 +; VI-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v2, v2, v3 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_v2i16_x_add_neg_fpone: @@ -2179,20 +2179,20 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(<2 x i16> addrspace(1 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: s_movk_i32 s2, 0x4400 +; VI-NEXT: v_mov_b32_e32 v4, s2 +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: s_movk_i32 s0, 0x4400 -; VI-NEXT: v_mov_b32_e32 v4, s0 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u16_e32 v1, s0, v0 -; VI-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_add_u16_e32 v2, s2, v3 +; VI-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v2, v2, v3 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_v2i16_x_add_neg_negfpone: @@ -2264,20 +2264,20 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fptwo(<2 x i16> addrspace(1)* ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: s_movk_i32 s2, 0x4000 +; VI-NEXT: v_mov_b32_e32 v4, s2 +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: s_movk_i32 s0, 0x4000 -; VI-NEXT: v_mov_b32_e32 v4, s0 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u16_e32 v1, s0, v0 -; VI-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_add_u16_e32 v2, s2, v3 +; VI-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v2, v2, v3 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_v2i16_x_add_neg_fptwo: @@ -2349,20 +2349,20 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfptwo(<2 x i16> addrspace(1 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: flat_load_dword v3, v[0:1] +; VI-NEXT: s_movk_i32 s2, 0xc000 +; VI-NEXT: v_mov_b32_e32 v4, s2 +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 +; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: s_movk_i32 s0, 0xc000 -; VI-NEXT: v_mov_b32_e32 v4, s0 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u16_e32 v1, s0, v0 -; VI-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: flat_store_dword v[2:3], v0 +; VI-NEXT: v_add_u16_e32 v2, s2, v3 +; VI-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v2, v2, v3 +; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: v_test_v2i16_x_add_neg_negfptwo: diff --git a/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir b/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir index 0eaa485903a79..81b304b7fa9c9 100644 --- a/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir +++ b/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir @@ -8,7 +8,7 @@ ... # GCN-LABEL: name: shrink_add_vop3{{$}} -# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_ADD_I32_e64 %19, %17, 0, implicit $exec +# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_ADD_CO_U32_e64 %19, %17, 0, implicit $exec # GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec name: shrink_add_vop3 alignment: 1 @@ -83,7 +83,7 @@ body: | %16 = REG_SEQUENCE killed %4, 17, %12, 18 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, implicit $exec - %29, %9 = V_ADD_I32_e64 %19, %17, 0, implicit $exec + %29, %9 = V_ADD_CO_U32_e64 %19, %17, 0, implicit $exec %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -91,7 +91,7 @@ body: | ... --- # GCN-LABEL: name: shrink_sub_vop3{{$}} -# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUB_I32_e64 %19, %17, 0, implicit $exec +# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUB_CO_U32_e64 %19, %17, 0, implicit $exec # GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec name: shrink_sub_vop3 @@ -167,7 +167,7 @@ body: | %16 = REG_SEQUENCE killed %4, 17, %12, 18 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, implicit $exec - %29, %9 = V_SUB_I32_e64 %19, %17, 0, implicit $exec + %29, %9 = V_SUB_CO_U32_e64 %19, %17, 0, implicit $exec %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -175,7 +175,7 @@ body: | ... --- # GCN-LABEL: name: shrink_subrev_vop3{{$}} -# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUBREV_I32_e64 %19, %17, 0, implicit $exec +# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUBREV_CO_U32_e64 %19, %17, 0, implicit $exec # GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec name: shrink_subrev_vop3 @@ -251,7 +251,7 @@ body: | %16 = REG_SEQUENCE killed %4, 17, %12, 18 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, implicit $exec - %29, %9 = V_SUBREV_I32_e64 %19, %17, 0, implicit $exec + %29, %9 = V_SUBREV_CO_U32_e64 %19, %17, 0, implicit $exec %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll b/llvm/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll index 927a0d9daab3f..df8366a722e93 100644 --- a/llvm/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll @@ -1,9 +1,9 @@ ; RUN: llc -o - %s -march=amdgcn -mcpu=verde -verify-machineinstrs -stop-after finalize-isel | FileCheck %s ; This test verifies that the instruction selection will add the implicit ; register operands in the correct order when modifying the opcode of an -; instruction to V_ADD_I32_e32. +; instruction to V_ADD_CO_U32_e32. -; CHECK: %{{[0-9]+}}:vgpr_32 = V_ADD_I32_e32 %{{[0-9]+}}, %{{[0-9]+}}, implicit-def $vcc, implicit $exec +; CHECK: %{{[0-9]+}}:vgpr_32 = V_ADD_CO_U32_e32 %{{[0-9]+}}, %{{[0-9]+}}, implicit-def $vcc, implicit $exec define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/sign_extend.ll b/llvm/test/CodeGen/AMDGPU/sign_extend.ll index 1a585f8b39be8..d263be2dcf820 100644 --- a/llvm/test/CodeGen/AMDGPU/sign_extend.ll +++ b/llvm/test/CodeGen/AMDGPU/sign_extend.ll @@ -399,14 +399,14 @@ define amdgpu_kernel void @v_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 addr ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_mov_b32 s10, s2 +; SI-NEXT: s_mov_b32 s11, s3 ; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s6 +; SI-NEXT: s_mov_b32 s9, s7 +; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_mov_b32 s4, s6 -; SI-NEXT: s_mov_b32 s5, s7 -; SI-NEXT: s_mov_b32 s6, s2 -; SI-NEXT: s_mov_b32 s7, s3 -; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_ashrrev_i32_e32 v1, 24, v0 ; SI-NEXT: v_bfe_i32 v2, v0, 16, 8 @@ -423,14 +423,14 @@ define amdgpu_kernel void @v_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 addr ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_mov_b32 s10, s2 +; VI-NEXT: s_mov_b32 s11, s3 ; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s6 +; VI-NEXT: s_mov_b32 s9, s7 +; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s4, s6 -; VI-NEXT: s_mov_b32 s5, s7 -; VI-NEXT: s_mov_b32 s6, s2 -; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_lshrrev_b16_e32 v1, 8, v0 ; VI-NEXT: v_ashrrev_i32_e32 v2, 24, v0 @@ -523,14 +523,14 @@ define amdgpu_kernel void @v_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 add ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_mov_b32 s10, s2 +; SI-NEXT: s_mov_b32 s11, s3 ; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s6 +; SI-NEXT: s_mov_b32 s9, s7 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_mov_b32 s4, s6 -; SI-NEXT: s_mov_b32 s5, s7 -; SI-NEXT: s_mov_b32 s6, s2 -; SI-NEXT: s_mov_b32 s7, s3 -; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_ashr_i64 v[2:3], v[0:1], 48 ; SI-NEXT: v_ashrrev_i32_e32 v3, 16, v0 @@ -547,14 +547,14 @@ define amdgpu_kernel void @v_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 add ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_mov_b32 s10, s2 +; VI-NEXT: s_mov_b32 s11, s3 ; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s6 +; VI-NEXT: s_mov_b32 s9, s7 +; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s4, s6 -; VI-NEXT: s_mov_b32 s5, s7 -; VI-NEXT: s_mov_b32 s6, s2 -; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_ashrrev_i32_e32 v3, 16, v0 ; VI-NEXT: v_bfe_i32 v0, v0, 0, 16 diff --git a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir index 9574edd0af98b..3daf2b88943f6 100644 --- a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir +++ b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir @@ -13,7 +13,7 @@ body: | ; CHECK: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec ; CHECK: S_WAITCNT 127 ; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec - ; CHECK: renamable $vgpr0 = V_ADD_I32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec + ; CHECK: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec ; CHECK: renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec ; CHECK: renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) ; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) @@ -59,7 +59,7 @@ body: | renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec S_WAITCNT 127 $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec - renamable $vgpr0 = V_ADD_I32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec + renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir b/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir index f62cb869fdf04..e855bbe1f1818 100644 --- a/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir +++ b/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir @@ -17,7 +17,7 @@ body: | %5:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 %6:sreg_32_xm0 = S_MOV_B32 4095 %8:vgpr_32 = COPY %6 - %7:vgpr_32 = V_ADD_I32_e32 %4, killed %8, implicit-def dead $vcc, implicit $exec + %7:vgpr_32 = V_ADD_CO_U32_e32 %4, killed %8, implicit-def dead $vcc, implicit $exec %10:sreg_32 = COPY %7 %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0, 0 $vgpr0 = COPY %9 diff --git a/llvm/test/CodeGen/AMDGPU/store-hi16.ll b/llvm/test/CodeGen/AMDGPU/store-hi16.ll index 51ed07aa2964b..b7d4829b0426c 100644 --- a/llvm/test/CodeGen/AMDGPU/store-hi16.ll +++ b/llvm/test/CodeGen/AMDGPU/store-hi16.ll @@ -314,13 +314,13 @@ entry: ; GFX803: v_add{{(_co)?}}_{{i|u}}32_e32 ; GFX803: v_addc_u32_e32 -; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v +; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff802, v ; GFX9-DAG: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, v ; GFX906-DAG: v_lshrrev_b32_e32 -; GFX906: flat_store_short v[0:1], v2 offset:2050{{$}} +; GFX906: flat_store_short v[0:1], v2{{$}} -; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:2050{{$}} +; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}} ; GFX803: flat_store_short v[0:1], v2{{$}} ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 @@ -363,13 +363,13 @@ entry: ; GFX803-DAG: v_add_u32_e32 ; GFX803-DAG: v_addc_u32_e32 -; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v +; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff001, v ; GFX9-DAG: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, v{{[0-9]+}}, vcc -; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:1{{$}} +; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}} ; GFX906-DAG: v_lshrrev_b32_e32 v2, 16, v2 -; GFX906: flat_store_byte v[0:1], v2 offset:1{{$}} +; GFX906: flat_store_byte v[0:1], v2{{$}} ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2 ; GFX803: flat_store_byte v[0:1], v2{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll index 9c673c74248b9..2b8eba5f90149 100644 --- a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll +++ b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll @@ -6,14 +6,14 @@ define void @local_store_i56(i56 addrspace(3)* %ptr, i56 %arg) #0 { ; CIVI-LABEL: local_store_i56: ; CIVI: ; %bb.0: -; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CIVI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; CIVI-NEXT: s_mov_b32 m0, -1 -; CIVI-NEXT: ds_write_b8 v0, v3 offset:6 -; CIVI-NEXT: ds_write_b16 v0, v2 offset:4 -; CIVI-NEXT: ds_write_b32 v0, v1 -; CIVI-NEXT: s_waitcnt lgkmcnt(0) -; CIVI-NEXT: s_setpc_b64 s[30:31] +; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CIVI-NEXT: s_mov_b32 m0, -1 +; CIVI-NEXT: ds_write_b16 v0, v2 offset:4 +; CIVI-NEXT: ds_write_b32 v0, v1 +; CIVI-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; CIVI-NEXT: ds_write_b8 v0, v1 offset:6 +; CIVI-NEXT: s_waitcnt lgkmcnt(0) +; CIVI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: local_store_i56: ; GFX9: ; %bb.0: @@ -30,70 +30,70 @@ define void @local_store_i56(i56 addrspace(3)* %ptr, i56 %arg) #0 { define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0 { ; HAWAII-LABEL: local_store_i55: ; HAWAII: ; %bb.0: -; HAWAII-NEXT: s_or_b32 s0, s4, 14 -; HAWAII-NEXT: v_mov_b32_e32 v0, s0 -; HAWAII-NEXT: v_mov_b32_e32 v1, s5 -; HAWAII-NEXT: flat_load_ubyte v0, v[0:1] -; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0 -; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2 -; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3 -; HAWAII-NEXT: s_mov_b32 m0, -1 -; HAWAII-NEXT: s_waitcnt lgkmcnt(0) -; HAWAII-NEXT: v_mov_b32_e32 v1, s0 -; HAWAII-NEXT: v_mov_b32_e32 v2, s1 -; HAWAII-NEXT: v_mov_b32_e32 v3, s2 -; HAWAII-NEXT: s_waitcnt vmcnt(0) -; HAWAII-NEXT: v_and_b32_e32 v0, 0x7f, v0 -; HAWAII-NEXT: ds_write_b8 v1, v0 offset:6 -; HAWAII-NEXT: ds_write_b16 v1, v3 offset:4 -; HAWAII-NEXT: ds_write_b32 v1, v2 -; HAWAII-NEXT: s_endpgm +; HAWAII-NEXT: s_or_b32 s0, s4, 14 +; HAWAII-NEXT: v_mov_b32_e32 v0, s0 +; HAWAII-NEXT: v_mov_b32_e32 v1, s5 +; HAWAII-NEXT: flat_load_ubyte v0, v[0:1] +; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0 +; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2 +; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3 +; HAWAII-NEXT: s_mov_b32 m0, -1 +; HAWAII-NEXT: s_waitcnt lgkmcnt(0) +; HAWAII-NEXT: v_mov_b32_e32 v1, s0 +; HAWAII-NEXT: v_mov_b32_e32 v3, s1 +; HAWAII-NEXT: v_mov_b32_e32 v2, s2 +; HAWAII-NEXT: ds_write_b16 v1, v2 offset:4 +; HAWAII-NEXT: s_waitcnt vmcnt(0) +; HAWAII-NEXT: v_and_b32_e32 v0, 0x7f, v0 +; HAWAII-NEXT: ds_write_b8 v1, v0 offset:6 +; HAWAII-NEXT: ds_write_b32 v1, v3 +; HAWAII-NEXT: s_endpgm ; ; FIJI-LABEL: local_store_i55: ; FIJI: ; %bb.0: -; FIJI-NEXT: s_or_b32 s0, s4, 14 -; FIJI-NEXT: v_mov_b32_e32 v0, s0 -; FIJI-NEXT: v_mov_b32_e32 v1, s5 -; FIJI-NEXT: flat_load_ubyte v0, v[0:1] -; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0 -; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8 -; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc -; FIJI-NEXT: s_mov_b32 m0, -1 -; FIJI-NEXT: s_waitcnt lgkmcnt(0) -; FIJI-NEXT: v_mov_b32_e32 v1, s0 -; FIJI-NEXT: v_mov_b32_e32 v3, s1 -; FIJI-NEXT: s_and_b32 s3, s2, 0xffff -; FIJI-NEXT: v_mov_b32_e32 v2, s2 -; FIJI-NEXT: s_waitcnt vmcnt(0) -; FIJI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; FIJI-NEXT: v_or_b32_e32 v0, s3, v0 -; FIJI-NEXT: v_bfe_u32 v0, v0, 16, 7 -; FIJI-NEXT: ds_write_b8 v1, v0 offset:6 -; FIJI-NEXT: ds_write_b16 v1, v2 offset:4 -; FIJI-NEXT: ds_write_b32 v1, v3 -; FIJI-NEXT: s_endpgm +; FIJI-NEXT: s_or_b32 s0, s4, 14 +; FIJI-NEXT: v_mov_b32_e32 v0, s0 +; FIJI-NEXT: v_mov_b32_e32 v1, s5 +; FIJI-NEXT: flat_load_ubyte v0, v[0:1] +; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0 +; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8 +; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc +; FIJI-NEXT: s_mov_b32 m0, -1 +; FIJI-NEXT: s_waitcnt lgkmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v1, s0 +; FIJI-NEXT: v_mov_b32_e32 v3, s1 +; FIJI-NEXT: s_and_b32 s3, s2, 0xffff +; FIJI-NEXT: v_mov_b32_e32 v2, s2 +; FIJI-NEXT: ds_write_b16 v1, v2 offset:4 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; FIJI-NEXT: v_or_b32_e32 v0, s3, v0 +; FIJI-NEXT: v_bfe_u32 v0, v0, 16, 7 +; FIJI-NEXT: ds_write_b8 v1, v0 offset:6 +; FIJI-NEXT: ds_write_b32 v1, v3 +; FIJI-NEXT: s_endpgm ; ; GFX9-LABEL: local_store_i55: ; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: global_load_ubyte_d16_hi v2, v[0:1], off offset:14 -; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX9-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: s_and_b32 s3, s2, 0xffff -; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_e32 v2, s3, v2 -; GFX9-NEXT: v_and_b32_e32 v2, 0x7fffff, v2 -; GFX9-NEXT: ds_write_b8_d16_hi v0, v2 offset:6 -; GFX9-NEXT: ds_write_b16 v0, v1 offset:4 -; GFX9-NEXT: ds_write_b32 v0, v3 -; GFX9-NEXT: s_endpgm +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: global_load_ubyte_d16_hi v2, v[0:1], off offset:14 +; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX9-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX9-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: s_and_b32 s3, s2, 0xffff +; GFX9-NEXT: ds_write_b16 v0, v1 offset:4 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_or_b32_e32 v1, s3, v2 +; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffff, v1 +; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:6 +; GFX9-NEXT: ds_write_b32 v0, v3 +; GFX9-NEXT: s_endpgm store i55 %arg, i55 addrspace(3)* %ptr, align 8 ret void } @@ -101,31 +101,31 @@ define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0 define amdgpu_kernel void @local_store_i48(i48 addrspace(3)* %ptr, i48 %arg) #0 { ; HAWAII-LABEL: local_store_i48: ; HAWAII: ; %bb.0: -; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0 -; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2 -; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3 -; HAWAII-NEXT: s_mov_b32 m0, -1 -; HAWAII-NEXT: s_waitcnt lgkmcnt(0) -; HAWAII-NEXT: v_mov_b32_e32 v0, s0 -; HAWAII-NEXT: v_mov_b32_e32 v2, s1 -; HAWAII-NEXT: v_mov_b32_e32 v1, s2 -; HAWAII-NEXT: ds_write_b16 v0, v1 offset:4 -; HAWAII-NEXT: ds_write_b32 v0, v2 -; HAWAII-NEXT: s_endpgm +; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0 +; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2 +; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3 +; HAWAII-NEXT: s_mov_b32 m0, -1 +; HAWAII-NEXT: s_waitcnt lgkmcnt(0) +; HAWAII-NEXT: v_mov_b32_e32 v0, s0 +; HAWAII-NEXT: v_mov_b32_e32 v1, s2 +; HAWAII-NEXT: ds_write_b16 v0, v1 offset:4 +; HAWAII-NEXT: v_mov_b32_e32 v1, s1 +; HAWAII-NEXT: ds_write_b32 v0, v1 +; HAWAII-NEXT: s_endpgm ; ; FIJI-LABEL: local_store_i48: ; FIJI: ; %bb.0: -; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0 -; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8 -; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc -; FIJI-NEXT: s_mov_b32 m0, -1 -; FIJI-NEXT: s_waitcnt lgkmcnt(0) -; FIJI-NEXT: v_mov_b32_e32 v0, s0 -; FIJI-NEXT: v_mov_b32_e32 v2, s1 -; FIJI-NEXT: v_mov_b32_e32 v1, s2 -; FIJI-NEXT: ds_write_b16 v0, v1 offset:4 -; FIJI-NEXT: ds_write_b32 v0, v2 -; FIJI-NEXT: s_endpgm +; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0 +; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8 +; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc +; FIJI-NEXT: s_mov_b32 m0, -1 +; FIJI-NEXT: s_waitcnt lgkmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v0, s0 +; FIJI-NEXT: v_mov_b32_e32 v1, s2 +; FIJI-NEXT: ds_write_b16 v0, v1 offset:4 +; FIJI-NEXT: v_mov_b32_e32 v1, s1 +; FIJI-NEXT: ds_write_b32 v0, v1 +; FIJI-NEXT: s_endpgm ; ; GFX9-LABEL: local_store_i48: ; GFX9: ; %bb.0: @@ -146,35 +146,35 @@ define amdgpu_kernel void @local_store_i48(i48 addrspace(3)* %ptr, i48 %arg) #0 define amdgpu_kernel void @local_store_i65(i65 addrspace(3)* %ptr, i65 %arg) #0 { ; HAWAII-LABEL: local_store_i65: ; HAWAII: ; %bb.0: -; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x0 -; HAWAII-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 -; HAWAII-NEXT: s_load_dword s3, s[4:5], 0x4 -; HAWAII-NEXT: s_mov_b32 m0, -1 -; HAWAII-NEXT: s_waitcnt lgkmcnt(0) -; HAWAII-NEXT: v_mov_b32_e32 v2, s2 -; HAWAII-NEXT: v_mov_b32_e32 v0, s0 -; HAWAII-NEXT: s_and_b32 s3, s3, 1 -; HAWAII-NEXT: v_mov_b32_e32 v3, s3 -; HAWAII-NEXT: v_mov_b32_e32 v1, s1 -; HAWAII-NEXT: ds_write_b8 v2, v3 offset:8 -; HAWAII-NEXT: ds_write_b64 v2, v[0:1] -; HAWAII-NEXT: s_endpgm +; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x0 +; HAWAII-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; HAWAII-NEXT: s_load_dword s3, s[4:5], 0x4 +; HAWAII-NEXT: s_mov_b32 m0, -1 +; HAWAII-NEXT: s_waitcnt lgkmcnt(0) +; HAWAII-NEXT: v_mov_b32_e32 v2, s2 +; HAWAII-NEXT: s_and_b32 s3, s3, 1 +; HAWAII-NEXT: v_mov_b32_e32 v0, s3 +; HAWAII-NEXT: ds_write_b8 v2, v0 offset:8 +; HAWAII-NEXT: v_mov_b32_e32 v0, s0 +; HAWAII-NEXT: v_mov_b32_e32 v1, s1 +; HAWAII-NEXT: ds_write_b64 v2, v[0:1] +; HAWAII-NEXT: s_endpgm ; ; FIJI-LABEL: local_store_i65: ; FIJI: ; %bb.0: -; FIJI-NEXT: s_load_dword s2, s[4:5], 0x0 -; FIJI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; FIJI-NEXT: s_load_dword s3, s[4:5], 0x10 -; FIJI-NEXT: s_mov_b32 m0, -1 -; FIJI-NEXT: s_waitcnt lgkmcnt(0) -; FIJI-NEXT: v_mov_b32_e32 v2, s2 -; FIJI-NEXT: v_mov_b32_e32 v0, s0 -; FIJI-NEXT: s_and_b32 s3, s3, 1 -; FIJI-NEXT: v_mov_b32_e32 v3, s3 -; FIJI-NEXT: v_mov_b32_e32 v1, s1 -; FIJI-NEXT: ds_write_b8 v2, v3 offset:8 -; FIJI-NEXT: ds_write_b64 v2, v[0:1] -; FIJI-NEXT: s_endpgm +; FIJI-NEXT: s_load_dword s2, s[4:5], 0x0 +; FIJI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; FIJI-NEXT: s_load_dword s3, s[4:5], 0x10 +; FIJI-NEXT: s_mov_b32 m0, -1 +; FIJI-NEXT: s_waitcnt lgkmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v2, s2 +; FIJI-NEXT: s_and_b32 s3, s3, 1 +; FIJI-NEXT: v_mov_b32_e32 v0, s3 +; FIJI-NEXT: ds_write_b8 v2, v0 offset:8 +; FIJI-NEXT: v_mov_b32_e32 v0, s0 +; FIJI-NEXT: v_mov_b32_e32 v1, s1 +; FIJI-NEXT: ds_write_b64 v2, v[0:1] +; FIJI-NEXT: s_endpgm ; ; GFX9-LABEL: local_store_i65: ; GFX9: ; %bb.0: @@ -218,22 +218,22 @@ define void @local_store_i13(i13 addrspace(3)* %ptr, i13 %arg) #0 { define void @local_store_i17(i17 addrspace(3)* %ptr, i17 %arg) #0 { ; CIVI-LABEL: local_store_i17: ; CIVI: ; %bb.0: -; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CIVI-NEXT: s_mov_b32 m0, -1 -; CIVI-NEXT: v_bfe_u32 v2, v1, 16, 1 -; CIVI-NEXT: ds_write_b16 v0, v1 -; CIVI-NEXT: ds_write_b8 v0, v2 offset:2 -; CIVI-NEXT: s_waitcnt lgkmcnt(0) -; CIVI-NEXT: s_setpc_b64 s[30:31] +; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CIVI-NEXT: s_mov_b32 m0, -1 +; CIVI-NEXT: ds_write_b16 v0, v1 +; CIVI-NEXT: v_bfe_u32 v1, v1, 16, 1 +; CIVI-NEXT: ds_write_b8 v0, v1 offset:2 +; CIVI-NEXT: s_waitcnt lgkmcnt(0) +; CIVI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: local_store_i17: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_and_b32_e32 v2, 0x1ffff, v1 -; GFX9-NEXT: ds_write_b16 v0, v1 -; GFX9-NEXT: ds_write_b8_d16_hi v0, v2 offset:2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_write_b16 v0, v1 +; GFX9-NEXT: v_and_b32_e32 v1, 0x1ffff, v1 +; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:2 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] store i17 %arg, i17 addrspace(3)* %ptr, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll index 06e4d0f4935eb..43536131582d2 100644 --- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll @@ -30,26 +30,24 @@ define amdgpu_kernel void @v_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i ; VI-LABEL: v_test_sub_v2i16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_mov_b32_e32 v3, s9 -; VI-NEXT: v_add_u32_e32 v2, vcc, s8, v2 +; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] ; VI-NEXT: flat_load_dword v1, v[2:3] -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: v_sub_u16_e32 v2, v0, v1 ; VI-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-NEXT: v_or_b32_e32 v0, v2, v0 -; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid @@ -88,14 +86,14 @@ define amdgpu_kernel void @s_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: s_load_dword s4, s[6:7], 0x0 -; VI-NEXT: s_load_dword s5, s[8:9], 0x0 +; VI-NEXT: s_load_dword s6, s[8:9], 0x0 +; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_lshr_b32 s6, s4, 16 -; VI-NEXT: s_lshr_b32 s7, s5, 16 -; VI-NEXT: s_sub_i32 s4, s4, s5 -; VI-NEXT: s_sub_i32 s5, s6, s7 +; VI-NEXT: s_lshr_b32 s5, s4, 16 +; VI-NEXT: s_lshr_b32 s7, s6, 16 +; VI-NEXT: s_sub_i32 s4, s4, s6 +; VI-NEXT: s_sub_i32 s5, s5, s7 ; VI-NEXT: s_and_b32 s4, s4, 0xffff ; VI-NEXT: s_lshl_b32 s5, s5, 16 ; VI-NEXT: s_or_b32 s4, s4, s5 @@ -185,22 +183,20 @@ define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %ou ; ; VI-LABEL: v_test_sub_v2i16_constant: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0xfffffe38 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s7 -; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v0 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: v_mov_b32_e32 v1, 0xfffffe38 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u16_e32 v1, 0xffffff85, v0 -; VI-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, v1, v0 +; VI-NEXT: v_add_u16_e32 v2, 0xffffff85, v0 +; VI-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v0, v2, v0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -235,22 +231,20 @@ define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)* ; ; VI-LABEL: v_test_sub_v2i16_neg_constant: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0x3df -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s7 -; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v0 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: v_mov_b32_e32 v1, 0x3df +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u16_e32 v1, 0x34d, v0 -; VI-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, v1, v0 +; VI-NEXT: v_add_u16_e32 v2, 0x34d, v0 +; VI-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v0, v2, v0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -283,22 +277,20 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(<2 x i16> addrspace(1)* ; ; VI-LABEL: v_test_sub_v2i16_inline_neg1: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: v_mov_b32_e32 v2, 1 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s7 -; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v0 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: v_mov_b32_e32 v1, 1 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u16_e32 v1, 1, v0 -; VI-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, v1, v0 +; VI-NEXT: v_add_u16_e32 v2, 1, v0 +; VI-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v0, v2, v0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -331,17 +323,15 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_lo_zero_hi(<2 x i16> addrspac ; ; VI-LABEL: v_test_sub_v2i16_inline_lo_zero_hi: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s7 -; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v0 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: v_and_b32_e32 v1, 0xffff0000, v0 ; VI-NEXT: v_subrev_u16_e32 v0, 32, v0 @@ -411,50 +401,46 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1) ; GFX9-LABEL: v_test_sub_v2i16_zext_to_v2i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: v_mov_b32_e32 v3, s9 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s8, v2 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: global_load_dword v1, v[2:3], off -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX9-NEXT: s_endpgm ; ; VI-LABEL: v_test_sub_v2i16_zext_to_v2i32: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_mov_b32_e32 v3, s9 -; VI-NEXT: v_add_u32_e32 v2, vcc, s8, v2 +; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: flat_load_dword v1, v[0:1] ; VI-NEXT: flat_load_dword v2, v[2:3] -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: v_sub_u16_e32 v0, v1, v2 ; VI-NEXT: v_sub_u16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.out = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %out, i32 %tid @@ -473,54 +459,50 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1) ; GFX9-LABEL: v_test_sub_v2i16_zext_to_v2i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: v_mov_b32_e32 v3, s9 -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, s8, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v3, vcc +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: global_load_dword v1, v[4:5], off -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 +; GFX9-NEXT: global_load_dword v1, v[2:3], off +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_pk_sub_i16 v1, v0, v1 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_endpgm ; ; VI-LABEL: v_test_sub_v2i16_zext_to_v2i64: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_mov_b32_e32 v3, s9 -; VI-NEXT: v_add_u32_e32 v2, vcc, s8, v2 +; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: flat_load_dword v4, v[0:1] ; VI-NEXT: flat_load_dword v2, v[2:3] ; VI-NEXT: v_mov_b32_e32 v1, 0 -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: v_mov_b32_e32 v3, v1 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: v_sub_u16_e32 v0, v4, v2 ; VI-NEXT: v_sub_u16_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.out = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %out, i32 %tid @@ -539,52 +521,48 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i32(<2 x i32> addrspace(1) ; GFX9-LABEL: v_test_sub_v2i16_sext_to_v2i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: v_mov_b32_e32 v3, s9 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s8, v2 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: global_load_dword v1, v[2:3], off -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 16, v0 ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 16 -; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX9-NEXT: s_endpgm ; ; VI-LABEL: v_test_sub_v2i16_sext_to_v2i32: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_mov_b32_e32 v3, s9 -; VI-NEXT: v_add_u32_e32 v2, vcc, s8, v2 +; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] ; VI-NEXT: flat_load_dword v1, v[2:3] -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: v_sub_u16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-NEXT: v_sub_u16_e32 v0, v0, v1 ; VI-NEXT: v_bfe_i32 v0, v0, 0, 16 ; VI-NEXT: v_bfe_i32 v1, v2, 0, 16 -; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.out = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %out, i32 %tid @@ -603,21 +581,19 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i64(<2 x i64> addrspace(1) ; GFX9-LABEL: v_test_sub_v2i16_sext_to_v2i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: v_mov_b32_e32 v3, s9 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s8, v2 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: global_load_dword v1, v[2:3], off -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_pk_sub_i16 v1, v0, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 @@ -625,27 +601,25 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i64(<2 x i64> addrspace(1) ; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 16 ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_endpgm ; ; VI-LABEL: v_test_sub_v2i16_sext_to_v2i64: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_mov_b32_e32 v3, s9 -; VI-NEXT: v_add_u32_e32 v2, vcc, s8, v2 +; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] ; VI-NEXT: flat_load_dword v1, v[2:3] -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: v_sub_u16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-NEXT: v_sub_u16_e32 v0, v0, v1 @@ -653,7 +627,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i64(<2 x i64> addrspace(1) ; VI-NEXT: v_bfe_i32 v2, v2, 0, 16 ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.out = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %out, i32 %tid diff --git a/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll b/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll new file mode 100644 index 0000000000000..13c4dc80be156 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll @@ -0,0 +1,60 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s +define void @test() #1 { + ; Clean up the unreachable blocks introduced with LowerSwitch pass. + ; This test ensures that, in the pass flow, UnreachableBlockElim pass + ; follows the LowerSwitch. Otherwise, this testcase will crash + ; immediately after the instruction selection due to the incomplete + ; PHI node in an MBB whose incoming values were never codegenerated. + ; + ; GCN-LABEL: name: test + ; GCN: bb.{{[0-9]+}}.entry: + ; GCN: bb.{{[0-9]+}}.entry.true.blk: + ; GCN: bb.{{[0-9]+}}.entry.false.blk: + ; GCN: bb.{{[0-9]+}}.switch.blk: + + ; GCN-NOT: bb.{{[0-9]+}}.preheader.blk + ; GCN-NOT: bb.{{[0-9]+}}.pre.false.blk: + ; GCN-NOT: bb.{{[0-9]+}}.unreach.blk: + ; GCN-NOT: PHI + + ; GCN: bb.{{[0-9]+}}.exit: + entry: + %idx = tail call i32 @llvm.amdgcn.workitem.id.x() #0 + br i1 undef, label %entry.true.blk, label %entry.false.blk + + entry.true.blk: ; preds = %entry + %exit.cmp = icmp ult i32 %idx, 3 + br i1 %exit.cmp, label %switch.blk, label %exit + + entry.false.blk: ; preds = %entry + unreachable + + switch.blk: ; preds = %entry.true.blk + switch i32 %idx, label %preheader.blk [ + i32 0, label %exit + i32 1, label %exit + i32 2, label %exit + ] + + preheader.blk: ; preds = %switch.blk + %pre.exit = icmp ult i32 %idx, 5 + br i1 %pre.exit, label %unreach.blk, label %pre.false.blk + + pre.false.blk: ; preds = %preheader.blk + %call.pre.false = tail call i32 @func(i32 %idx) #0 + br label %unreach.blk + + unreach.blk: ; preds = %preheader.blk, %pre.false.blk + %phi.val = phi i32 [ %call.pre.false, %pre.false.blk ], [ undef, %preheader.blk ] + store i32 %phi.val, i32* undef + unreachable + + exit: ; preds = %switch.blk + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #0 +declare i32 @func(i32)#0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/trunc-combine.ll b/llvm/test/CodeGen/AMDGPU/trunc-combine.ll index 2000264941397..ff3e837235f05 100644 --- a/llvm/test/CodeGen/AMDGPU/trunc-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/trunc-combine.ll @@ -106,13 +106,13 @@ define amdgpu_kernel void @truncate_high_elt_extract_vector(<2 x i16> addrspace( ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dword s2, s[4:5], 0x0 +; VI-NEXT: s_load_dword s3, s[6:7], 0x0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_load_dword s0, s[4:5], 0x0 -; VI-NEXT: s_load_dword s1, s[6:7], 0x0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_sext_i32_i16 s0, s0 -; VI-NEXT: s_sext_i32_i16 s1, s1 +; VI-NEXT: s_sext_i32_i16 s0, s2 +; VI-NEXT: s_sext_i32_i16 s1, s3 ; VI-NEXT: v_mov_b32_e32 v2, s0 ; VI-NEXT: v_mul_i32_i24_e32 v2, s1, v2 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 diff --git a/llvm/test/CodeGen/AMDGPU/trunc-store-i64.ll b/llvm/test/CodeGen/AMDGPU/trunc-store-i64.ll index 1c52aac1068f3..627ba9e0f7170 100644 --- a/llvm/test/CodeGen/AMDGPU/trunc-store-i64.ll +++ b/llvm/test/CodeGen/AMDGPU/trunc-store-i64.ll @@ -38,10 +38,10 @@ entry: } ; GCN-LABEL: {{^}}trunc_store_v16i64_v16i32: -; GCN: global_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:48 -; GCN: global_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:32 -; GCN: global_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:16 -; GCN: global_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off +; GCN: global_store_dwordx4 v{{\[[0-9]:[0-9]+\]}}, v{{\[[0-9]:[0-9]+\]}}, off offset:48 +; GCN: global_store_dwordx4 v{{\[[0-9]:[0-9]+\]}}, v{{\[[0-9]:[0-9]+\]}}, off offset:32 +; GCN: global_store_dwordx4 v{{\[[0-9]:[0-9]+\]}}, v{{\[[0-9]:[0-9]+\]}}, off offset:16 +; GCN: global_store_dwordx4 v{{\[[0-9]:[0-9]+\]}}, v{{\[[0-9]:[0-9]+\]}}, off define amdgpu_kernel void @trunc_store_v16i64_v16i32(< 16 x i32> addrspace(1)* %out, <16 x i64> %in) { entry: %trunc = trunc <16 x i64> %in to <16 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll index 7ebe78c16cb7d..f18d35f1fef01 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -1824,46 +1824,46 @@ define amdgpu_kernel void @s_test_udiv24_k_num_i64(i64 addrspace(1)* %out, i64 % define amdgpu_kernel void @s_test_udiv24_k_den_i64(i64 addrspace(1)* %out, i64 %x) { ; GCN-LABEL: s_test_udiv24_k_den_i64: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s6, 0x46b6fe00 -; GCN-NEXT: s_mov_b32 s3, 0xf000 -; GCN-NEXT: s_mov_b32 s2, -1 -; GCN-NEXT: s_lshr_b32 s0, s7, 8 -; GCN-NEXT: v_cvt_f32_u32_e32 v0, s0 -; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: s_mov_b32 s1, s5 +; GCN-NEXT: s_lshr_b32 s2, s3, 8 +; GCN-NEXT: v_cvt_f32_u32_e32 v0, s2 +; GCN-NEXT: s_mov_b32 s2, 0x46b6fe00 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 ; GCN-NEXT: v_mul_f32_e32 v1, 0x38331158, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v1 -; GCN-NEXT: v_mad_f32 v0, -v1, s6, v0 -; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s6 +; GCN-NEXT: v_mad_f32 v0, -v1, s2, v0 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s2 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc ; GCN-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; ; GCN-IR-LABEL: s_test_udiv24_k_den_i64: ; GCN-IR: ; %bb.0: -; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 +; GCN-IR-NEXT: s_mov_b32 s6, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_mov_b32 s6, 0x46b6fe00 -; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 -; GCN-IR-NEXT: s_mov_b32 s2, -1 -; GCN-IR-NEXT: s_lshr_b32 s0, s7, 8 -; GCN-IR-NEXT: v_cvt_f32_u32_e32 v0, s0 -; GCN-IR-NEXT: s_mov_b32 s0, s4 -; GCN-IR-NEXT: s_mov_b32 s1, s5 +; GCN-IR-NEXT: s_lshr_b32 s2, s3, 8 +; GCN-IR-NEXT: v_cvt_f32_u32_e32 v0, s2 +; GCN-IR-NEXT: s_mov_b32 s2, 0x46b6fe00 +; GCN-IR-NEXT: s_mov_b32 s4, s0 +; GCN-IR-NEXT: s_mov_b32 s5, s1 ; GCN-IR-NEXT: v_mul_f32_e32 v1, 0x38331158, v0 ; GCN-IR-NEXT: v_trunc_f32_e32 v1, v1 ; GCN-IR-NEXT: v_cvt_u32_f32_e32 v2, v1 -; GCN-IR-NEXT: v_mad_f32 v0, -v1, s6, v0 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s6 +; GCN-IR-NEXT: v_mad_f32 v0, -v1, s2, v0 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s2 ; GCN-IR-NEXT: v_mov_b32_e32 v1, 0 ; GCN-IR-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc ; GCN-IR-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-IR-NEXT: s_endpgm %x.shr = lshr i64 %x, 40 %result = udiv i64 %x.shr, 23423 diff --git a/llvm/test/CodeGen/AMDGPU/udivrem.ll b/llvm/test/CodeGen/AMDGPU/udivrem.ll index 10299b314e83e..559f1092e6e44 100644 --- a/llvm/test/CodeGen/AMDGPU/udivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/udivrem.ll @@ -36,40 +36,41 @@ define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, [8 x i32], i32 ; ; GFX6-LABEL: test_udivrem: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0x26 -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x13 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0x1d -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 -; GFX6-NEXT: s_mov_b32 s10, s6 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX6-NEXT: s_sub_i32 s2, 0, s3 -; GFX6-NEXT: s_mov_b32 s11, s7 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, s2, v0 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GFX6-NEXT: v_mul_hi_u32 v0, s0, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, v0, s3 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 -; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 -; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v1 -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s3, v1 -; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] -; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 -; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v1 -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s3, v1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_cndmask_b32_e64 v0, v1, v2, s[0:1] -; GFX6-NEXT: buffer_store_dword v0, off, s[8:11], 0 -; GFX6-NEXT: s_endpgm +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x26 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x13 +; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_mov_b32 s10, s6 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX6-NEXT: s_sub_i32 s2, 0, s3 +; GFX6-NEXT: s_mov_b32 s11, s7 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_lo_u32 v1, s2, v0 +; GFX6-NEXT: s_load_dword s2, s[0:1], 0x1d +; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX6-NEXT: v_mul_lo_u32 v1, v0, s3 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s2, v1 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s3, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] +; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s3, v1 +; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: v_cndmask_b32_e64 v0, v1, v2, s[0:1] +; GFX6-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; GFX6-NEXT: s_endpgm ; ; GFX8-LABEL: test_udivrem: ; GFX8: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index ef492be025068..cec4df0cd2958 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -1479,52 +1479,52 @@ define amdgpu_kernel void @s_test_urem24_k_num_i64(i64 addrspace(1)* %out, i64 % define amdgpu_kernel void @s_test_urem24_k_den_i64(i64 addrspace(1)* %out, i64 %x) { ; GCN-LABEL: s_test_urem24_k_den_i64: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_mov_b32 s1, 0x46b6fe00 -; GCN-NEXT: s_movk_i32 s0, 0x5b7f -; GCN-NEXT: s_mov_b32 s3, 0xf000 -; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s4, 0x46b6fe00 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s6, s7, 8 -; GCN-NEXT: v_cvt_f32_u32_e32 v0, s6 +; GCN-NEXT: s_lshr_b32 s2, s3, 8 +; GCN-NEXT: v_cvt_f32_u32_e32 v0, s2 +; GCN-NEXT: s_movk_i32 s3, 0x5b7f +; GCN-NEXT: s_mov_b32 s5, s1 ; GCN-NEXT: v_mul_f32_e32 v1, 0x38331158, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v1 -; GCN-NEXT: v_mad_f32 v0, -v1, s1, v0 -; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s1 -; GCN-NEXT: s_mov_b32 s1, s5 +; GCN-NEXT: v_mad_f32 v0, -v1, s4, v0 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s4 +; GCN-NEXT: s_mov_b32 s4, s0 ; GCN-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc -; GCN-NEXT: v_mul_lo_u32 v0, v0, s0 -; GCN-NEXT: s_mov_b32 s0, s4 +; GCN-NEXT: v_mul_lo_u32 v0, v0, s3 ; GCN-NEXT: v_mov_b32_e32 v1, 0 -; GCN-NEXT: v_sub_i32_e32 v0, vcc, s6, v0 +; GCN-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; ; GCN-IR-LABEL: s_test_urem24_k_den_i64: ; GCN-IR: ; %bb.0: -; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_mov_b32 s1, 0x46b6fe00 -; GCN-IR-NEXT: s_movk_i32 s0, 0x5b7f -; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 -; GCN-IR-NEXT: s_mov_b32 s2, -1 +; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-IR-NEXT: s_mov_b32 s4, 0x46b6fe00 +; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 +; GCN-IR-NEXT: s_mov_b32 s6, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_lshr_b32 s6, s7, 8 -; GCN-IR-NEXT: v_cvt_f32_u32_e32 v0, s6 +; GCN-IR-NEXT: s_lshr_b32 s2, s3, 8 +; GCN-IR-NEXT: v_cvt_f32_u32_e32 v0, s2 +; GCN-IR-NEXT: s_movk_i32 s3, 0x5b7f +; GCN-IR-NEXT: s_mov_b32 s5, s1 ; GCN-IR-NEXT: v_mul_f32_e32 v1, 0x38331158, v0 ; GCN-IR-NEXT: v_trunc_f32_e32 v1, v1 ; GCN-IR-NEXT: v_cvt_u32_f32_e32 v2, v1 -; GCN-IR-NEXT: v_mad_f32 v0, -v1, s1, v0 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s1 -; GCN-IR-NEXT: s_mov_b32 s1, s5 +; GCN-IR-NEXT: v_mad_f32 v0, -v1, s4, v0 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s4 +; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc -; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s0 -; GCN-IR-NEXT: s_mov_b32 s0, s4 +; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s3 ; GCN-IR-NEXT: v_mov_b32_e32 v1, 0 -; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s6, v0 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 ; GCN-IR-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-IR-NEXT: s_endpgm %x.shr = lshr i64 %x, 40 %result = urem i64 %x.shr, 23423 diff --git a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll index 4730d92b9f5bf..f4249f7dc8a9d 100644 --- a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll +++ b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll @@ -80,12 +80,12 @@ define amdgpu_kernel void @madak_f16_use_2( ; SI-NEXT: s_mov_b32 s9, s11 ; SI-NEXT: s_mov_b32 s10, s2 ; SI-NEXT: s_mov_b32 s11, s3 -; SI-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 ; SI-NEXT: s_mov_b32 s14, s2 ; SI-NEXT: s_mov_b32 s15, s3 -; SI-NEXT: buffer_load_ushort v3, off, s[12:15], 0 -; SI-NEXT: v_mov_b32_e32 v2, 0x41200000 +; SI-NEXT: buffer_load_ushort v0, off, s[16:19], 0 +; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 +; SI-NEXT: buffer_load_ushort v2, off, s[12:15], 0 +; SI-NEXT: v_mov_b32_e32 v3, 0x41200000 ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: s_mov_b32 s8, s6 @@ -95,11 +95,11 @@ define amdgpu_kernel void @madak_f16_use_2( ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-NEXT: v_madak_f32 v1, v0, v1, 0x41200000 -; SI-NEXT: v_mac_f32_e32 v2, v0, v3 +; SI-NEXT: v_mac_f32_e32 v3, v0, v2 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v1 -; SI-NEXT: v_cvt_f16_f32_e32 v1, v2 +; SI-NEXT: v_cvt_f16_f32_e32 v1, v3 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; SI-NEXT: buffer_store_short v1, off, s[8:11], 0 ; SI-NEXT: s_endpgm @@ -119,24 +119,22 @@ define amdgpu_kernel void @madak_f16_use_2( ; VI-NEXT: s_mov_b32 s9, s11 ; VI-NEXT: s_mov_b32 s10, s2 ; VI-NEXT: s_mov_b32 s11, s3 -; VI-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 ; VI-NEXT: s_mov_b32 s14, s2 ; VI-NEXT: s_mov_b32 s15, s3 -; VI-NEXT: buffer_load_ushort v3, off, s[12:15], 0 -; VI-NEXT: v_mov_b32_e32 v2, 0x4900 +; VI-NEXT: buffer_load_ushort v0, off, s[16:19], 0 +; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 +; VI-NEXT: buffer_load_ushort v2, off, s[12:15], 0 +; VI-NEXT: v_mov_b32_e32 v3, 0x4900 ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s4, s6 -; VI-NEXT: s_mov_b32 s5, s7 -; VI-NEXT: s_mov_b32 s6, s2 -; VI-NEXT: s_mov_b32 s7, s3 +; VI-NEXT: s_mov_b32 s8, s6 +; VI-NEXT: s_mov_b32 s9, s7 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: v_madak_f16 v1, v0, v1, 0x4900 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_mac_f16_e32 v2, v0, v3 +; VI-NEXT: v_mac_f16_e32 v3, v0, v2 ; VI-NEXT: buffer_store_short v1, off, s[0:3], 0 -; VI-NEXT: buffer_store_short v2, off, s[4:7], 0 +; VI-NEXT: buffer_store_short v3, off, s[8:11], 0 ; VI-NEXT: s_endpgm half addrspace(1)* %r0, half addrspace(1)* %r1, diff --git a/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll b/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll index d87e4990b2554..831bc8f796530 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll @@ -36,33 +36,33 @@ define amdgpu_kernel void @extract_insert_different_dynelt_v4i32(i32 addrspace(1 ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GCN-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd -; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: v_mov_b32_e32 v5, 0 ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_mov_b64 s[0:1], s[6:7] -; GCN-NEXT: v_lshlrev_b32_e32 v1, 4, v0 -; GCN-NEXT: v_lshlrev_b32_e32 v4, 2, v0 -; GCN-NEXT: v_mov_b32_e32 v5, v2 -; GCN-NEXT: buffer_load_dwordx4 v[0:3], v[1:2], s[0:3], 0 addr64 -; GCN-NEXT: v_mov_b32_e32 v6, s8 +; GCN-NEXT: v_lshlrev_b32_e32 v4, 4, v0 +; GCN-NEXT: buffer_load_dwordx4 v[1:4], v[4:5], s[0:3], 0 addr64 +; GCN-NEXT: v_lshlrev_b32_e32 v6, 2, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s8 ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s9, 3 +; GCN-NEXT: v_mov_b32_e32 v7, v5 ; GCN-NEXT: s_mov_b64 s[6:7], s[2:3] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s9, 2 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s9, 1 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s9, 0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s10, 1 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s10, 2 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s10, 3 +; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s10, 2 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GCN-NEXT: buffer_store_dword v0, v[4:5], s[4:7], 0 addr64 +; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s10, 3 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GCN-NEXT: buffer_store_dword v0, v[6:7], s[4:7], 0 addr64 ; GCN-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %id.ext = sext i32 %id to i64 diff --git a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll index bc77c295d4d56..fcb6619ae1eec 100644 --- a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll +++ b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll @@ -108,12 +108,12 @@ define <4 x half> @shuffle_v4f16_35u5(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dword v2, v[2:3], off ; GFX9-NEXT: global_load_dword v0, v[0:1], off offset:4 -; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff +; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_and_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX9-NEXT: v_and_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_lshl_or_b32 v0, v3, 16, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0 @@ -128,11 +128,11 @@ define <4 x half> @shuffle_v4f16_357u(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[2:3], v[2:3], off ; GFX9-NEXT: global_load_dword v0, v[0:1], off offset:4 -; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_and_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_and_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v3 ; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll index 384cb1b4699d7..85859fb61eb4f 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll @@ -29,7 +29,7 @@ define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) #0 { ; GCN-NEXT: s_and_saveexec_b32 s4, s4 ; GCN-NEXT: s_nop 0 ; GCN-NEXT: buffer_store_dword v0, v0, s[8:11], 0 offen -; GCN-NEXT: v_nop +; GCN-NEXT: s_waitcnt_depctr 0xffe3 ; GCN-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GCN-NEXT: s_cbranch_execnz BB0_2 ; GCN-NEXT: ; %bb.3: ; in Loop: Header=BB0_1 Depth=1 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll index 4dd9efa9c0088..432b016a2b595 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll @@ -59,7 +59,7 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: ;;#ASMEND ; GFX10: image_gather4_c_b_cl v[40:43], v[32:39], s[4:11], s[12:15] dmask:0x1 -; GFX10-NEXT: v_nop +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+4 diff --git a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir index 6ae620b8ad242..c2a329f9fa66a 100644 --- a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir @@ -2,7 +2,7 @@ # GCN-LABEL: name: vmem_write_sgpr # GCN: BUFFER_LOAD_DWORD_OFFEN -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_sgpr @@ -16,7 +16,7 @@ body: | ... # GCN-LABEL: name: vmem_write_exec # GCN: BUFFER_STORE_DWORD_OFFEN_exact -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_exec @@ -35,7 +35,7 @@ body: | # GCN-NEXT: S_MOV_B32 # GCN-NEXT: S_MOV_B32 # GCN-NEXT: S_MOV_B32 -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_sgpr_chain @@ -54,7 +54,7 @@ body: | ... # GCN-LABEL: name: vmem_smem_write_sgpr # GCN: BUFFER_LOAD_DWORD_OFFEN -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_LOAD_DWORD_IMM --- name: vmem_smem_write_sgpr @@ -69,7 +69,7 @@ body: | # GCN-LABEL: name: vmem_snop_write_sgpr # GCN: BUFFER_LOAD_DWORD_OFFEN # GCN-NEXT: S_NOP -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: vmem_snop_write_sgpr @@ -115,7 +115,7 @@ body: | # GCN-LABEL: name: vmem_swait_any_write_sgpr # GCN: BUFFER_LOAD_DWORD_OFFEN # GCN-NEXT: S_WAITCNT -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: vmem_swait_any_write_sgpr @@ -130,7 +130,7 @@ body: | ... # GCN-LABEL: name: vmem_write_exec_impread # GCN: BUFFER_LOAD_DWORD_OFFEN -# GCN: V_NOP +# GCN: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B64 --- name: vmem_write_exec_impread @@ -144,7 +144,7 @@ body: | ... # GCN-LABEL: name: vmem_write_exec_expread # GCN: BUFFER_LOAD_DWORD_OFFEN -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B64 --- name: vmem_write_exec_expread @@ -157,7 +157,7 @@ body: | ... # GCN-LABEL: name: ds_write_m0 # GCN: DS_READ_B32 -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: ds_write_m0 @@ -171,7 +171,7 @@ body: | ... # GCN-LABEL: name: vmem_write_sgpr_fall_through # GCN: BUFFER_LOAD_DWORD_OFFEN -# GCN: V_NOP +# GCN: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_sgpr_fall_through @@ -189,7 +189,7 @@ body: | # GCN-LABEL: name: vmem_write_sgpr_branch # GCN: BUFFER_LOAD_DWORD_OFFEN # GCN-NEXT: S_BRANCH -# GCN: V_NOP +# GCN: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_sgpr_branch @@ -209,7 +209,7 @@ body: | # GCN: BUFFER_LOAD_DWORD_OFFEN # GCN-NEXT: S_BRANCH # GCN: bb.2: -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_sgpr_branch_around @@ -235,9 +235,9 @@ body: | # GCN-NEXT: S_BRANCH # GCN: bb.1: # GCN: S_WAITCNT -# GCN: V_ADD_I32 +# GCN: V_ADD_CO_U32 # GCN: bb.2: -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_sgpr_cbranch_around @@ -254,7 +254,7 @@ body: | bb.1: successors: %bb.2 S_WAITCNT 0 - $vgpr2, $vcc_lo = V_ADD_I32_e64 $vgpr1, $vgpr1, 0, implicit $exec + $vgpr2, $vcc_lo = V_ADD_CO_U32_e64 $vgpr1, $vgpr1, 0, implicit $exec S_BRANCH %bb.2 bb.2: @@ -262,7 +262,7 @@ body: | ... # GCN-LABEL: name: vmem_write_sgpr_branch_backedge # GCN: $vgpr0 = IMPLICIT_DEF -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_sgpr_branch_backedge @@ -280,7 +280,7 @@ body: | ... # GCN-LABEL: name: ds_write_exec # GCN: DS_WRITE_B32_gfx9 -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: ds_write_exec @@ -293,7 +293,7 @@ body: | ... # GCN-LABEL: name: vmem_scratch_exec # GCN: SCRATCH_LOAD_DWORD -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: vmem_scratch_exec @@ -305,7 +305,7 @@ body: | ... # GCN-LABEL: name: vmem_flat_exec # GCN: FLAT_LOAD_DWORD -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: vmem_flat_exec @@ -318,7 +318,7 @@ body: | ... # GCN-LABEL: name: vmem_global_exec # GCN: GLOBAL_LOAD_DWORD -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: vmem_global_exec @@ -331,7 +331,7 @@ body: | ... # GCN-LABEL: name: vmem_global_atomic_exec # GCN: GLOBAL_ATOMIC_ADD_RTN -# GCN-NEXT: V_NOP +# GCN-NEXT: S_WAITCNT_DEPCTR 65507 # GCN-NEXT: S_MOV_B32 --- name: vmem_global_atomic_exec diff --git a/llvm/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir b/llvm/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir index 187d786449217..bac89399d0350 100644 --- a/llvm/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir @@ -35,7 +35,7 @@ # GCN-LABEL: name: fold_fi_vgpr{{$}} # GCN: %1:vgpr_32 = IMPLICIT_DEF -# GCN: %2:vgpr_32 = V_ADD_I32_e32 %stack.0.alloca, %1, implicit-def $vcc, implicit $exec +# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 %stack.0.alloca, %1, implicit-def $vcc, implicit $exec name: fold_fi_vgpr tracksRegLiveness: true registers: @@ -50,13 +50,13 @@ body: | bb.0: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = IMPLICIT_DEF - %2, $vcc = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2, $vcc = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0 ... # GCN-LABEL: name: fold_vgpr_fi{{$}} # GCN: %1:vgpr_32 = IMPLICIT_DEF -# GCN: %2:vgpr_32 = V_ADD_I32_e32 %stack.0.alloca, %1, implicit-def $vcc, implicit $exec +# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 %stack.0.alloca, %1, implicit-def $vcc, implicit $exec name: fold_vgpr_fi tracksRegLiveness: true registers: @@ -71,14 +71,14 @@ body: | bb.0: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = IMPLICIT_DEF - %2, $vcc = V_ADD_I32_e64 %1, %0, 0, implicit $exec + %2, $vcc = V_ADD_CO_U32_e64 %1, %0, 0, implicit $exec S_ENDPGM 0 ... # GCN-LABEL: name: fold_sgpr_fi{{$}} # GCN: %0:vgpr_32 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec # GCN: %1:sgpr_32 = IMPLICIT_DEF -# GCN: %2:vgpr_32 = V_ADD_I32_e32 %1, %0, implicit-def $vcc, implicit $exec +# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 %1, %0, implicit-def $vcc, implicit $exec name: fold_sgpr_fi tracksRegLiveness: true registers: @@ -93,14 +93,14 @@ body: | bb.0: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = IMPLICIT_DEF - %2, $vcc = V_ADD_I32_e64 %1, %0, 0, implicit $exec + %2, $vcc = V_ADD_CO_U32_e64 %1, %0, 0, implicit $exec S_ENDPGM 0 ... # GCN-LABEL: name: fold_fi_sgpr{{$}} # GCN: %0:vgpr_32 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec # GCN: %1:sgpr_32 = IMPLICIT_DEF -# GCN: %2:vgpr_32 = V_ADD_I32_e32 %1, %0, implicit-def $vcc, implicit $exec +# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 %1, %0, implicit-def $vcc, implicit $exec name: fold_fi_sgpr tracksRegLiveness: true registers: @@ -115,13 +115,13 @@ body: | bb.0: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = IMPLICIT_DEF - %2, $vcc = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2, $vcc = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0 ... # TODO: Should probably prefer folding immediate first # GCN-LABEL: name: fold_fi_imm{{$}} # GCN: %1:vgpr_32 = V_MOV_B32_e32 999, implicit $exec -# GCN: %2:vgpr_32 = V_ADD_I32_e32 %stack.0.alloca, %1, implicit-def $vcc, implicit $exec +# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 %stack.0.alloca, %1, implicit-def $vcc, implicit $exec name: fold_fi_imm tracksRegLiveness: true registers: @@ -136,13 +136,13 @@ body: | bb.0: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = V_MOV_B32_e32 999, implicit $exec - %2, $vcc = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2, $vcc = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0 ... # GCN-LABEL: name: fold_imm_fi{{$}} # GCN: %0:vgpr_32 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec -# GCN: %2:vgpr_32 = V_ADD_I32_e32 999, %0, implicit-def $vcc, implicit $exec +# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 999, %0, implicit-def $vcc, implicit $exec name: fold_imm_fi tracksRegLiveness: true registers: @@ -157,5 +157,5 @@ body: | bb.0: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = V_MOV_B32_e32 999, implicit $exec - %2, $vcc = V_ADD_I32_e64 %1, %0, 0, implicit $exec + %2, $vcc = V_ADD_CO_U32_e64 %1, %0, 0, implicit $exec S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir b/llvm/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir index 5ad1b3b6ecf78..fe71266668d53 100644 --- a/llvm/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir +++ b/llvm/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir @@ -2,7 +2,7 @@ ... # GCN-LABEL: name: fold_imm_non_ssa{{$}} # GCN: %0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec -# GCN: %2:vgpr_32 = V_ADD_I32_e32 456, %0, implicit-def $vcc, implicit $exec +# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 456, %0, implicit-def $vcc, implicit $exec name: fold_imm_non_ssa tracksRegLiveness: true @@ -16,13 +16,13 @@ body: | %0 = COPY undef %0 %0 = V_MOV_B32_e32 123, implicit $exec %1 = V_MOV_B32_e32 456, implicit $exec - %2, $vcc = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %2, $vcc = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0 ... # GCN-LABEL: name: fold_partially_defined_superreg{{$}} # GCN: %1:vgpr_32 = V_MOV_B32_e32 456, implicit $exec -# GCN: %2:vgpr_32 = V_ADD_I32_e32 123, %1, implicit-def $vcc, implicit $exec +# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 123, %1, implicit-def $vcc, implicit $exec name: fold_partially_defined_superreg tracksRegLiveness: true registers: @@ -34,7 +34,7 @@ body: | bb.0: undef %3.sub0 = V_MOV_B32_e32 123, implicit $exec, implicit-def %3 %1 = V_MOV_B32_e32 456, implicit $exec - %2, $vcc = V_ADD_I32_e64 %3.sub0, %1, 0, implicit $exec + %2, $vcc = V_ADD_CO_U32_e64 %3.sub0, %1, 0, implicit $exec S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index 0b0b9a30f113f..896b974bc2baa 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -668,7 +668,7 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d ; GCN-LABEL: {{^}}test_loop_vcc: ; GFX1032: v_cmp_lt_f32_e32 vcc_lo, ; GFX1064: v_cmp_lt_f32_e32 vcc, -; GCN: s_cbranch_vccnz +; GCN: s_cbranch_vccz define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) #0 { entry: br label %loop @@ -836,8 +836,8 @@ define amdgpu_ps void @test_wqm_vote(float %a) { } ; GCN-LABEL: {{^}}test_branch_true: -; GFX1032: s_and_b32 vcc_lo, exec_lo, -1 -; GFX1064: s_and_b64 vcc, exec, -1 +; GFX1032: s_mov_b32 vcc_lo, exec_lo +; GFX1064: s_mov_b64 vcc, exec define amdgpu_kernel void @test_branch_true() #2 { entry: br i1 true, label %for.end, label %for.body.lr.ph @@ -1059,7 +1059,7 @@ declare void @external_void_func_void() #1 ; GFX1064-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GFX1032-NEXT: s_or_saveexec_b32 [[COPY_EXEC0:s[0-9]]], -1{{$}} ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: v_nop +; GCN-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC0]] @@ -1082,7 +1082,7 @@ declare void @external_void_func_void() #1 ; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}} ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: v_nop +; GCN-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll index a43c656b06071..bff7cf6809905 100644 --- a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll +++ b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll @@ -6,17 +6,17 @@ define amdgpu_kernel void @widen_i16_constant_load(i16 addrspace(4)* %arg) { ; SI-LABEL: widen_i16_constant_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s4, 0 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s5, s4 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_load_dword s0, s[0:1], 0x0 +; SI-NEXT: s_load_dword s1, s[0:1], 0x0 +; SI-NEXT: s_mov_b32 s0, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_addk_i32 s0, 0x3e7 -; SI-NEXT: s_or_b32 s0, s0, 4 -; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; SI-NEXT: s_addk_i32 s1, 0x3e7 +; SI-NEXT: s_or_b32 s4, s1, 4 +; SI-NEXT: s_mov_b32 s1, s0 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: widen_i16_constant_load: @@ -43,18 +43,18 @@ define amdgpu_kernel void @widen_i16_constant_load_zext_i32(i16 addrspace(4)* %a ; SI-LABEL: widen_i16_constant_load_zext_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s4, 0 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s5, s4 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_load_dword s0, s[0:1], 0x0 +; SI-NEXT: s_load_dword s1, s[0:1], 0x0 +; SI-NEXT: s_mov_b32 s0, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_and_b32 s0, s0, 0xffff -; SI-NEXT: s_addk_i32 s0, 0x3e7 -; SI-NEXT: s_or_b32 s0, s0, 4 -; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_and_b32 s1, s1, 0xffff +; SI-NEXT: s_addk_i32 s1, 0x3e7 +; SI-NEXT: s_or_b32 s4, s1, 4 +; SI-NEXT: s_mov_b32 s1, s0 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: widen_i16_constant_load_zext_i32: @@ -83,18 +83,18 @@ define amdgpu_kernel void @widen_i16_constant_load_sext_i32(i16 addrspace(4)* %a ; SI-LABEL: widen_i16_constant_load_sext_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s4, 0 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s5, s4 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_load_dword s0, s[0:1], 0x0 +; SI-NEXT: s_load_dword s1, s[0:1], 0x0 +; SI-NEXT: s_mov_b32 s0, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_sext_i32_i16 s0, s0 -; SI-NEXT: s_addk_i32 s0, 0x3e7 -; SI-NEXT: s_or_b32 s0, s0, 4 -; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_sext_i32_i16 s1, s1 +; SI-NEXT: s_addk_i32 s1, 0x3e7 +; SI-NEXT: s_or_b32 s4, s1, 4 +; SI-NEXT: s_mov_b32 s1, s0 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: widen_i16_constant_load_sext_i32: @@ -122,13 +122,13 @@ define amdgpu_kernel void @widen_i16_constant_load_sext_i32(i16 addrspace(4)* %a define amdgpu_kernel void @widen_i17_constant_load(i17 addrspace(4)* %arg) { ; SI-LABEL: widen_i17_constant_load: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x9 ; SI-NEXT: s_mov_b32 s0, 0 -; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_mov_b32 s1, s0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_load_dword s7, s[8:9], 0x0 +; SI-NEXT: s_load_dword s7, s[6:7], 0x0 ; SI-NEXT: s_mov_b32 s4, 2 ; SI-NEXT: s_mov_b32 s5, s0 ; SI-NEXT: s_mov_b32 s6, s2 @@ -206,23 +206,23 @@ define amdgpu_kernel void @widen_f16_constant_load(half addrspace(4)* %arg) { define amdgpu_kernel void @widen_v2i8_constant_load(<2 x i8> addrspace(4)* %arg) { ; SI-LABEL: widen_v2i8_constant_load: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s4, 0 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s5, s4 +; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s0, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_load_dword s0, s[0:1], 0x0 +; SI-NEXT: s_load_dword s1, s[2:3], 0x0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_and_b32 s1, s0, 0xff00 -; SI-NEXT: s_add_i32 s0, s0, 12 -; SI-NEXT: s_or_b32 s0, s0, 4 -; SI-NEXT: s_and_b32 s0, s0, 0xff -; SI-NEXT: s_or_b32 s0, s1, s0 -; SI-NEXT: s_addk_i32 s0, 0x2c00 -; SI-NEXT: s_or_b32 s0, s0, 0x300 -; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; SI-NEXT: s_and_b32 s4, s1, 0xff00 +; SI-NEXT: s_add_i32 s1, s1, 12 +; SI-NEXT: s_or_b32 s1, s1, 4 +; SI-NEXT: s_and_b32 s1, s1, 0xff +; SI-NEXT: s_or_b32 s1, s4, s1 +; SI-NEXT: s_addk_i32 s1, 0x2c00 +; SI-NEXT: s_or_b32 s4, s1, 0x300 +; SI-NEXT: s_mov_b32 s1, s0 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: widen_v2i8_constant_load: @@ -302,16 +302,16 @@ define amdgpu_kernel void @widen_i1_constant_load(i1 addrspace(4)* %arg) { ; SI-LABEL: widen_i1_constant_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s4, 0 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s5, s4 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_load_dword s0, s[0:1], 0x0 +; SI-NEXT: s_load_dword s1, s[0:1], 0x0 +; SI-NEXT: s_mov_b32 s0, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_and_b32 s0, s0, 1 -; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; SI-NEXT: s_and_b32 s4, s1, 1 +; SI-NEXT: s_mov_b32 s1, s0 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: widen_i1_constant_load: @@ -336,18 +336,18 @@ define amdgpu_kernel void @widen_i16_zextload_i64_constant_load(i16 addrspace(4) ; SI-LABEL: widen_i16_zextload_i64_constant_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s4, 0 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s5, s4 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_load_dword s0, s[0:1], 0x0 +; SI-NEXT: s_load_dword s1, s[0:1], 0x0 +; SI-NEXT: s_mov_b32 s0, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_and_b32 s0, s0, 0xffff -; SI-NEXT: s_addk_i32 s0, 0x3e7 -; SI-NEXT: s_or_b32 s0, s0, 4 -; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_and_b32 s1, s1, 0xffff +; SI-NEXT: s_addk_i32 s1, 0x3e7 +; SI-NEXT: s_or_b32 s4, s1, 4 +; SI-NEXT: s_mov_b32 s1, s0 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: widen_i16_zextload_i64_constant_load: @@ -376,19 +376,19 @@ define amdgpu_kernel void @widen_i1_zext_to_i64_constant_load(i1 addrspace(4)* % ; SI-LABEL: widen_i1_zext_to_i64_constant_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s4, 0 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s5, s4 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_load_dword s0, s[0:1], 0x0 +; SI-NEXT: s_load_dword s1, s[0:1], 0x0 +; SI-NEXT: s_mov_b32 s0, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_and_b32 s0, s0, 1 -; SI-NEXT: s_add_u32 s0, s0, 0x3e7 -; SI-NEXT: s_addc_u32 s1, 0, 0 -; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: v_mov_b32_e32 v1, s1 -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: s_and_b32 s1, s1, 1 +; SI-NEXT: s_add_u32 s4, s1, 0x3e7 +; SI-NEXT: s_addc_u32 s5, 0, 0 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: s_mov_b32 s1, s0 +; SI-NEXT: v_mov_b32_e32 v1, s5 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: widen_i1_zext_to_i64_constant_load: @@ -455,17 +455,17 @@ define amdgpu_kernel void @widen_i16_global_invariant_load(i16 addrspace(1)* %ar ; SI-LABEL: widen_i16_global_invariant_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s4, 0 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s5, s4 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_load_dword s0, s[0:1], 0x0 +; SI-NEXT: s_load_dword s1, s[0:1], 0x0 +; SI-NEXT: s_mov_b32 s0, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_addk_i32 s0, 0x3e7 -; SI-NEXT: s_or_b32 s0, s0, 1 -; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; SI-NEXT: s_addk_i32 s1, 0x3e7 +; SI-NEXT: s_or_b32 s4, s1, 1 +; SI-NEXT: s_mov_b32 s1, s0 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: widen_i16_global_invariant_load: diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index 167d8fa21ccb3..127d0bc0fc686 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -652,13 +652,11 @@ main_body: ; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0 ; CHECK-DAG: s_mov_b32 [[SEVEN:s[0-9]+]], 0x40e00000 -; CHECK: ; %body +; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body ; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]] -; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %loop +; CHECK: [[LOOP:BB[0-9]+_[0-9]+]]: ; %loop ; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]] -; CHECK: s_cbranch_vccz - -; CHECK: s_cbranch_vccnz [[LOOPHDR]] +; CHECK: s_cbranch_vccz [[LOOPHDR]] ; CHECK: ; %break ; CHECK: ; return diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir index cb84da90b3863..010edf85cfade 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.mir +++ b/llvm/test/CodeGen/AMDGPU/wqm.mir @@ -41,9 +41,9 @@ body: | %1 = COPY $sgpr1 %0 = COPY $sgpr0 S_CMP_LT_I32 0, %0, implicit-def $scc - %12 = V_ADD_I32_e32 %3, %3, implicit-def $vcc, implicit $exec + %12 = V_ADD_CO_U32_e32 %3, %3, implicit-def $vcc, implicit $exec %5 = S_CSELECT_B32 %2, %1, implicit $scc - %11 = V_ADD_I32_e32 %5, %12, implicit-def $vcc, implicit $exec + %11 = V_ADD_CO_U32_e32 %5, %12, implicit-def $vcc, implicit $exec $vgpr0 = WWM %11, implicit $exec SI_RETURN_TO_EPILOG $vgpr0 diff --git a/llvm/test/CodeGen/ARM/cmp-bool.ll b/llvm/test/CodeGen/ARM/cmp-bool.ll new file mode 100644 index 0000000000000..18ef348b9edac --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmp-bool.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=armv7a < %s | FileCheck %s --check-prefix=ARM +; RUN: llc -mtriple=armv6m < %s | FileCheck %s --check-prefix=THUMB +; RUN: llc -mtriple=armv7m < %s | FileCheck %s --check-prefix=THUMB2 + +define void @bool_eq(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind { +; ARM-LABEL: bool_eq: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: cmp r0, r1 +; ARM-NEXT: bxne lr +; ARM-NEXT: bx r2 +; +; THUMB-LABEL: bool_eq: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: push {r7, lr} +; THUMB-NEXT: cmp r0, r1 +; THUMB-NEXT: bne .LBB0_2 +; THUMB-NEXT: @ %bb.1: @ %if.then +; THUMB-NEXT: blx r2 +; THUMB-NEXT: .LBB0_2: @ %if.end +; THUMB-NEXT: pop {r7, pc} +; +; THUMB2-LABEL: bool_eq: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: cmp r0, r1 +; THUMB2-NEXT: it ne +; THUMB2-NEXT: bxne lr +; THUMB2-NEXT: bx r2 +entry: + %0 = xor i1 %a, %b + br i1 %0, label %if.end, label %if.then + +if.then: + tail call void %c() #1 + br label %if.end + +if.end: + ret void +} + +define void @bool_ne(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind { +; ARM-LABEL: bool_ne: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: cmp r0, r1 +; ARM-NEXT: bxeq lr +; ARM-NEXT: bx r2 +; +; THUMB-LABEL: bool_ne: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: push {r7, lr} +; THUMB-NEXT: cmp r0, r1 +; THUMB-NEXT: beq .LBB1_2 +; THUMB-NEXT: @ %bb.1: @ %if.then +; THUMB-NEXT: blx r2 +; THUMB-NEXT: .LBB1_2: @ %if.end +; THUMB-NEXT: pop {r7, pc} +; +; THUMB2-LABEL: bool_ne: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: cmp r0, r1 +; THUMB2-NEXT: it eq +; THUMB2-NEXT: bxeq lr +; THUMB2-NEXT: bx r2 +entry: + %cmp = xor i1 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void %c() #1 + br label %if.end + +if.end: + ret void +} diff --git a/llvm/test/CodeGen/ARM/dbg-tcreturn.ll b/llvm/test/CodeGen/ARM/dbg-tcreturn.ll index 37ec4e3d92ee4..d4061be981803 100644 --- a/llvm/test/CodeGen/ARM/dbg-tcreturn.ll +++ b/llvm/test/CodeGen/ARM/dbg-tcreturn.ll @@ -1,5 +1,4 @@ -; XFAIL: * -; RUN: llc %s -o - -stop-after=finalize-isel -verify-machineinstr | FileCheck %s +; RUN: llc %s -o - -stop-after=finalize-isel -verify-machineinstrs | FileCheck %s target datalayout = "e-m:o-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" target triple = "thumbv7-apple-ios7.0.0" @@ -12,8 +11,8 @@ target triple = "thumbv7-apple-ios7.0.0" ; CHECK-NEXT: %0:gpr = COPY $r0 ; CHECK-NEXT: $r0 = COPY %0 ; CHECK-NEXT: $r1 = COPY %1 -; CHECK-NEXT: TCRETURNdi &__divsi3, implicit $sp, implicit $r0, implicit $r1 ; CHECK-NEXT: DBG_VALUE $noreg, $noreg, !13, !DIExpression(), debug-location !16 +; CHECK-NEXT: TCRETURNdi &__divsi3, implicit $sp, implicit $r0, implicit $r1 define i32 @test(i32 %a1, i32 %a2) !dbg !5 { entry: diff --git a/llvm/test/CodeGen/ARM/fcopysign.ll b/llvm/test/CodeGen/ARM/fcopysign.ll index d013fbf8c15ac..930ef1f2d20b4 100644 --- a/llvm/test/CodeGen/ARM/fcopysign.ll +++ b/llvm/test/CodeGen/ARM/fcopysign.ll @@ -1,40 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -disable-post-ra -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT ; RUN: llc < %s -disable-post-ra -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD ; rdar://8984306 define float @test1(float %x, float %y) nounwind { -entry: ; SOFT-LABEL: test1: -; SOFT: lsr r1, r1, #31 -; SOFT: bfi r0, r1, #31, #1 - +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: lsr r1, r1, #31 +; SOFT-NEXT: bfi r0, r1, #31, #1 +; SOFT-NEXT: bx lr +; ; HARD-LABEL: test1: -; HARD: vmov.i32 [[REG1:(d[0-9]+)]], #0x80000000 -; HARD: vbsl [[REG1]], d +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: vmov.f32 s2, s1 +; HARD-NEXT: @ kill: def $s0 killed $s0 def $d0 +; HARD-NEXT: vmov.i32 d16, #0x80000000 +; HARD-NEXT: vbit d0, d1, d16 +; HARD-NEXT: @ kill: def $s0 killed $s0 killed $d0 +; HARD-NEXT: bx lr +entry: + %0 = tail call float @copysignf(float %x, float %y) nounwind readnone ret float %0 } define double @test2(double %x, double %y) nounwind { -entry: ; SOFT-LABEL: test2: -; SOFT: lsr r2, r3, #31 -; SOFT: bfi r1, r2, #31, #1 - +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: lsr r2, r3, #31 +; SOFT-NEXT: bfi r1, r2, #31, #1 +; SOFT-NEXT: bx lr +; ; HARD-LABEL: test2: -; HARD: vmov.i32 [[REG2:(d[0-9]+)]], #0x80000000 -; HARD: vshl.i64 [[REG2]], [[REG2]], #32 -; HARD: vbsl [[REG2]], d1, d0 +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: vmov.i32 d16, #0x80000000 +; HARD-NEXT: vshl.i64 d16, d16, #32 +; HARD-NEXT: vbit d0, d1, d16 +; HARD-NEXT: bx lr +entry: + %0 = tail call double @copysign(double %x, double %y) nounwind readnone ret double %0 } define double @test3(double %x, double %y, double %z) nounwind { -entry: ; SOFT-LABEL: test3: -; SOFT: vmov.i32 [[REG3:(d[0-9]+)]], #0x80000000 -; SOFT: vshl.i64 [[REG3]], [[REG3]], #32 -; SOFT: vbsl [[REG3]], +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: vmov d16, r2, r3 +; SOFT-NEXT: vmov d17, r0, r1 +; SOFT-NEXT: vmul.f64 d16, d17, d16 +; SOFT-NEXT: vmov.i32 d17, #0x80000000 +; SOFT-NEXT: vshl.i64 d17, d17, #32 +; SOFT-NEXT: vldr d18, [sp] +; SOFT-NEXT: vbit d16, d18, d17 +; SOFT-NEXT: vmov r0, r1, d16 +; SOFT-NEXT: bx lr +; +; HARD-LABEL: test3: +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: vmul.f64 d16, d0, d1 +; HARD-NEXT: vmov.i32 d17, #0x80000000 +; HARD-NEXT: vshl.i64 d17, d17, #32 +; HARD-NEXT: vorr d0, d17, d17 +; HARD-NEXT: vbsl d0, d2, d16 +; HARD-NEXT: bx lr +entry: %0 = fmul double %x, %y %1 = tail call double @copysign(double %0, double %z) nounwind readnone ret double %1 @@ -42,12 +72,34 @@ entry: ; rdar://9287902 define float @test4() nounwind { -entry: ; SOFT-LABEL: test4: -; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1 -; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000 -; SOFT: vshr.u64 [[REG7]], [[REG7]], #32 -; SOFT: vbsl [[REG6]], [[REG7]], +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: push {lr} +; SOFT-NEXT: bl _bar +; SOFT-NEXT: vmov d16, r0, r1 +; SOFT-NEXT: vcvt.f32.f64 s0, d16 +; SOFT-NEXT: vmov.i32 d17, #0x80000000 +; SOFT-NEXT: vshr.u64 d16, d16, #32 +; SOFT-NEXT: vmov.f32 d18, #5.000000e-01 +; SOFT-NEXT: vbif d16, d18, d17 +; SOFT-NEXT: vadd.f32 d0, d0, d16 +; SOFT-NEXT: vmov r0, s0 +; SOFT-NEXT: pop {lr} +; +; HARD-LABEL: test4: +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: .save {r11, lr} +; HARD-NEXT: push {r11, lr} +; HARD-NEXT: bl bar +; HARD-NEXT: vmov d16, r0, r1 +; HARD-NEXT: vcvt.f32.f64 s0, d16 +; HARD-NEXT: vmov.i32 d17, #0x80000000 +; HARD-NEXT: vshr.u64 d16, d16, #32 +; HARD-NEXT: vmov.f32 s2, #5.000000e-01 +; HARD-NEXT: vbit d1, d16, d17 +; HARD-NEXT: vadd.f32 s0, s0, s2 +; HARD-NEXT: pop {r11, pc} +entry: %0 = tail call double (...) @bar() nounwind %1 = fptrunc double %0 to float %2 = tail call float @copysignf(float 5.000000e-01, float %1) nounwind readnone diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll index 3cd07df671b99..65b8217ecfe57 100644 --- a/llvm/test/CodeGen/ARM/fp16-promote.ll +++ b/llvm/test/CodeGen/ARM/fp16-promote.ll @@ -424,7 +424,7 @@ declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 ; CHECK-FP16: vsqrt.f32 ; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-VFP-LIBCALL: vsqrt.f32 +; CHECK-LIBCALL-VFP: vsqrt.f32 ; CHECK-NOVFP: bl sqrtf ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_sqrt(half* %p) #0 { @@ -700,18 +700,44 @@ define void @test_maximum(half* %p) #0 { } ; CHECK-FP16-LABEL: test_copysign: -; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vbsl -; CHECK-FP16: vcvtb.f16.f32 +; CHECK-FP16: ldrh r2, [r0] +; CHECK-FP16-NEXT: vmov.i32 d16, #0x80000000 +; CHECK-FP16-NEXT: ldrh r1, [r1] +; CHECK-FP16-NEXT: vmov s0, r2 +; CHECK-FP16-NEXT: vmov s2, r1 +; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-FP16-NEXT: vcvtb.f32.f16 s2, s2 +; CHECK-FP16-NEXT: vbit d0, d1, d16 +; CHECK-FP16-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-FP16-NEXT: vmov r1, s0 +; CHECK-FP16-NEXT: strh r1, [r0] +; CHECK-FP16-NEXT: bx lr + ; CHECK-LIBCALL-LABEL: test_copysign: -; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-VFP-LIBCALL: vbsl +; CHECK-LIBCALL-VFP: .fnstart +; CHECK-LIBCALL-VFP-NEXT: .save {r4, r5, r11, lr} +; CHECK-LIBCALL-VFP-NEXT: push {r4, r5, r11, lr} +; CHECK-LIBCALL-VFP-NEXT: .vsave {d8, d9} +; CHECK-LIBCALL-VFP-NEXT: vpush {d8, d9} +; CHECK-LIBCALL-VFP-NEXT: mov r5, r0 +; CHECK-LIBCALL-VFP-NEXT: ldrh r0, [r0] +; CHECK-LIBCALL-VFP-NEXT: mov r4, r1 +; CHECK-LIBCALL: bl __aeabi_h2f +; CHECK-LIBCALL-VFP: ldrh r1, [r4] +; CHECK-LIBCALL-VFP-NEXT: vmov s18, r0 +; CHECK-LIBCALL-VFP-NEXT: vmov.i32 d8, #0x80000000 +; CHECK-LIBCALL-VFP-NEXT: mov r0, r1 +; CHECK-LIBCALL: bl __aeabi_h2f +; CHECK-LIBCALL-VFP: vmov s0, r0 +; CHECK-LIBCALL-VFP-NEXT: vbif d0, d9, d8 +; CHECK-LIBCALL-VFP-NEXT: vmov r0, s0 +; CHECK-LIBCALL: bl __aeabi_f2h +; CHECK-LIBCALL-VFP: strh r0, [r5] +; CHECK-LIBCALL-VFP-NEXT: vpop {d8, d9} +; CHECK-LIBCALL-VFP-NEXT: pop {r4, r5, r11, pc} ; CHECK-NOVFP: and ; CHECK-NOVFP: bic ; CHECK-NOVFP: orr -; CHECK-LIBCALL: bl __aeabi_f2h define void @test_copysign(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 %b = load half, half* %q, align 2 @@ -820,7 +846,7 @@ define void @test_round(half* %p) { ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-VFP-LIBCALL: vmla.f32 +; CHECK-LIBCALL-VFP: vmla.f32 ; CHECK-NOVFP: bl __aeabi_fmul ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_fmuladd(half* %p, half* %q, half* %r) #0 { diff --git a/llvm/test/CodeGen/ARM/store_half.ll b/llvm/test/CodeGen/ARM/store_half.ll new file mode 100644 index 0000000000000..c182f9c3f7664 --- /dev/null +++ b/llvm/test/CodeGen/ARM/store_half.ll @@ -0,0 +1,9 @@ +; RUN: llc < %s -mtriple=thumbebv8.2a-arm-none-eabi -mattr=+fullfp16 -filetype=obj -o /dev/null +; RUN: llc < %s -mtriple=thumbv8.2a-arm-none-eabi -mattr=+fullfp16 -filetype=obj -o /dev/null +; RUN: llc < %s -mtriple=armebv8.2a-arm-none-eabi -mattr=+fullfp16 -filetype=obj -o /dev/null +; RUN: llc < %s -mtriple=armv8.2a-arm-none-eabi -mattr=+fullfp16 -filetype=obj -o /dev/null + +define void @woah(half* %waythere) { + store half 0xHE110, half* %waythere + ret void +} diff --git a/llvm/test/CodeGen/ARM/vbsl-constant.ll b/llvm/test/CodeGen/ARM/vbsl-constant.ll index 6bcbbc8fa878d..392bea1f19335 100644 --- a/llvm/test/CodeGen/ARM/vbsl-constant.ll +++ b/llvm/test/CodeGen/ARM/vbsl-constant.ll @@ -1,10 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+neon | FileCheck %s define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { -;CHECK-LABEL: v_bsli8: -;CHECK: vldr -;CHECK: vldr -;CHECK: vbsl +; CHECK-LABEL: v_bsli8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 d16, #0x3 +; CHECK-NEXT: vldr d17, [r2] +; CHECK-NEXT: vldr d18, [r0] +; CHECK-NEXT: vbsl d16, d18, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = load <8 x i8>, <8 x i8>* %C @@ -15,10 +20,14 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { } define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { -;CHECK-LABEL: v_bsli16: -;CHECK: vldr -;CHECK: vldr -;CHECK: vbsl +; CHECK-LABEL: v_bsli16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 d16, #0x3 +; CHECK-NEXT: vldr d17, [r2] +; CHECK-NEXT: vldr d18, [r0] +; CHECK-NEXT: vbsl d16, d18, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = load <4 x i16>, <4 x i16>* %C @@ -29,10 +38,14 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind } define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { -;CHECK-LABEL: v_bsli32: -;CHECK: vldr -;CHECK: vldr -;CHECK: vbsl +; CHECK-LABEL: v_bsli32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 d16, #0x3 +; CHECK-NEXT: vldr d17, [r2] +; CHECK-NEXT: vldr d18, [r0] +; CHECK-NEXT: vbsl d16, d18, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = load <2 x i32>, <2 x i32>* %C @@ -43,11 +56,14 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind } define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind { -;CHECK-LABEL: v_bsli64: -;CHECK: vldr -;CHECK: vldr -;CHECK: vldr -;CHECK: vbsl +; CHECK-LABEL: v_bsli64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d17, [r2] +; CHECK-NEXT: vldr d16, LCPI3_0 +; CHECK-NEXT: vldr d18, [r0] +; CHECK-NEXT: vbsl d16, d18, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B %tmp3 = load <1 x i64>, <1 x i64>* %C @@ -58,10 +74,15 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind } define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { -;CHECK-LABEL: v_bslQi8: -;CHECK: vld1.32 -;CHECK: vld1.32 -;CHECK: vbsl +; CHECK-LABEL: v_bslQi8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.32 {d16, d17}, [r2] +; CHECK-NEXT: vmov.i8 q9, #0x3 +; CHECK-NEXT: vld1.32 {d20, d21}, [r0] +; CHECK-NEXT: vbit q8, q10, q9 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = load <16 x i8>, <16 x i8>* %C @@ -72,10 +93,15 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind } define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { -;CHECK-LABEL: v_bslQi16: -;CHECK: vld1.32 -;CHECK: vld1.32 -;CHECK: vbsl +; CHECK-LABEL: v_bslQi16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.32 {d16, d17}, [r2] +; CHECK-NEXT: vmov.i16 q9, #0x3 +; CHECK-NEXT: vld1.32 {d20, d21}, [r0] +; CHECK-NEXT: vbit q8, q10, q9 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = load <8 x i16>, <8 x i16>* %C @@ -86,10 +112,15 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin } define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { -;CHECK-LABEL: v_bslQi32: -;CHECK: vld1.32 -;CHECK: vld1.32 -;CHECK: vbsl +; CHECK-LABEL: v_bslQi32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.32 {d16, d17}, [r2] +; CHECK-NEXT: vmov.i32 q9, #0x3 +; CHECK-NEXT: vld1.32 {d20, d21}, [r0] +; CHECK-NEXT: vbit q8, q10, q9 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = load <4 x i32>, <4 x i32>* %C @@ -100,11 +131,16 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin } define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind { -;CHECK-LABEL: v_bslQi64: -;CHECK: vld1.32 -;CHECK: vld1.32 -;CHECK: vld1.64 -;CHECK: vbsl +; CHECK-LABEL: v_bslQi64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.32 {d16, d17}, [r2] +; CHECK-NEXT: vld1.32 {d18, d19}, [r0] +; CHECK-NEXT: adr r0, LCPI7_0 +; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128] +; CHECK-NEXT: vbit q8, q9, q10 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B %tmp3 = load <2 x i64>, <2 x i64>* %C diff --git a/llvm/test/CodeGen/ARM/vbsl.ll b/llvm/test/CodeGen/ARM/vbsl.ll index 6812dd90a1004..b43c709c99848 100644 --- a/llvm/test/CodeGen/ARM/vbsl.ll +++ b/llvm/test/CodeGen/ARM/vbsl.ll @@ -1,10 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s ; rdar://12471808 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { -;CHECK-LABEL: v_bsli8: -;CHECK: vbsl +; CHECK-LABEL: v_bsli8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d18, [r0] +; CHECK-NEXT: vldr d16, [r2] +; CHECK-NEXT: vldr d17, [r1] +; CHECK-NEXT: vbit d16, d17, d18 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = load <8 x i8>, <8 x i8>* %C @@ -16,8 +23,14 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { } define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { -;CHECK-LABEL: v_bsli16: -;CHECK: vbsl +; CHECK-LABEL: v_bsli16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d18, [r0] +; CHECK-NEXT: vldr d16, [r2] +; CHECK-NEXT: vldr d17, [r1] +; CHECK-NEXT: vbit d16, d17, d18 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = load <4 x i16>, <4 x i16>* %C @@ -29,8 +42,14 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind } define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { -;CHECK-LABEL: v_bsli32: -;CHECK: vbsl +; CHECK-LABEL: v_bsli32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d18, [r0] +; CHECK-NEXT: vldr d16, [r2] +; CHECK-NEXT: vldr d17, [r1] +; CHECK-NEXT: vbit d16, d17, d18 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = load <2 x i32>, <2 x i32>* %C @@ -42,8 +61,14 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind } define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind { -;CHECK-LABEL: v_bsli64: -;CHECK: vbsl +; CHECK-LABEL: v_bsli64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d18, [r0] +; CHECK-NEXT: vldr d16, [r2] +; CHECK-NEXT: vldr d17, [r1] +; CHECK-NEXT: vbit d16, d17, d18 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B %tmp3 = load <1 x i64>, <1 x i64>* %C @@ -55,8 +80,15 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind } define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { -;CHECK-LABEL: v_bslQi8: -;CHECK: vbsl +; CHECK-LABEL: v_bslQi8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d20, d21}, [r0] +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vbit q8, q9, q10 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = load <16 x i8>, <16 x i8>* %C @@ -68,8 +100,15 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind } define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { -;CHECK-LABEL: v_bslQi16: -;CHECK: vbsl +; CHECK-LABEL: v_bslQi16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d20, d21}, [r0] +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vbit q8, q9, q10 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = load <8 x i16>, <8 x i16>* %C @@ -81,8 +120,15 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin } define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { -;CHECK-LABEL: v_bslQi32: -;CHECK: vbsl +; CHECK-LABEL: v_bslQi32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d20, d21}, [r0] +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vbit q8, q9, q10 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = load <4 x i32>, <4 x i32>* %C @@ -94,8 +140,15 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin } define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind { -;CHECK-LABEL: v_bslQi64: -;CHECK: vbsl +; CHECK-LABEL: v_bslQi64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d20, d21}, [r0] +; CHECK-NEXT: vld1.64 {d16, d17}, [r2] +; CHECK-NEXT: vld1.64 {d18, d19}, [r1] +; CHECK-NEXT: vbit q8, q9, q10 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B %tmp3 = load <2 x i64>, <2 x i64>* %C @@ -108,84 +161,180 @@ define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwin define <8 x i8> @f1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: f1: -; CHECK: vbsl +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [sp] +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: vbit d16, d17, d18 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind ret <8 x i8> %vbsl.i } define <4 x i16> @f2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: f2: -; CHECK: vbsl +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [sp] +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: vbit d16, d17, d18 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind ret <4 x i16> %vbsl3.i } define <2 x i32> @f3(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: f3: -; CHECK: vbsl +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [sp] +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: vbit d16, d17, d18 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind ret <2 x i32> %vbsl3.i } define <2 x float> @f4(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: f4: -; CHECK: vbsl +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [sp] +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: vbit d16, d17, d18 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %vbsl4.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind ret <2 x float> %vbsl4.i } define <16 x i8> @g1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: g1: -; CHECK: vbsl +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d19, r2, r3 +; CHECK-NEXT: add r12, sp, #16 +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vld1.64 {d16, d17}, [r12] +; CHECK-NEXT: vld1.64 {d20, d21}, [r0] +; CHECK-NEXT: vbit q8, q10, q9 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind ret <16 x i8> %vbsl.i } define <8 x i16> @g2(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: g2: -; CHECK: vbsl +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d19, r2, r3 +; CHECK-NEXT: add r12, sp, #16 +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vld1.64 {d16, d17}, [r12] +; CHECK-NEXT: vld1.64 {d20, d21}, [r0] +; CHECK-NEXT: vbit q8, q10, q9 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind ret <8 x i16> %vbsl3.i } define <4 x i32> @g3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: g3: -; CHECK: vbsl +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d19, r2, r3 +; CHECK-NEXT: add r12, sp, #16 +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vld1.64 {d16, d17}, [r12] +; CHECK-NEXT: vld1.64 {d20, d21}, [r0] +; CHECK-NEXT: vbit q8, q10, q9 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind ret <4 x i32> %vbsl3.i } define <4 x float> @g4(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: g4: -; CHECK: vbsl +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d19, r2, r3 +; CHECK-NEXT: add r12, sp, #16 +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vld1.64 {d16, d17}, [r12] +; CHECK-NEXT: vld1.64 {d20, d21}, [r0] +; CHECK-NEXT: vbit q8, q10, q9 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %vbsl4.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind ret <4 x float> %vbsl4.i } define <1 x i64> @test_vbsl_s64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: test_vbsl_s64: -; CHECK: vbsl d +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [sp] +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: vbit d16, d17, d18 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind ret <1 x i64> %vbsl3.i } define <1 x i64> @test_vbsl_u64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: test_vbsl_u64: -; CHECK: vbsl d +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [sp] +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: vbit d16, d17, d18 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind ret <1 x i64> %vbsl3.i } define <2 x i64> @test_vbslq_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: test_vbslq_s64: -; CHECK: vbsl q +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d19, r2, r3 +; CHECK-NEXT: add r12, sp, #16 +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vld1.64 {d16, d17}, [r12] +; CHECK-NEXT: vld1.64 {d20, d21}, [r0] +; CHECK-NEXT: vbit q8, q10, q9 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind ret <2 x i64> %vbsl3.i } define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: test_vbslq_u64: -; CHECK: vbsl q +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d19, r2, r3 +; CHECK-NEXT: add r12, sp, #16 +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vld1.64 {d16, d17}, [r12] +; CHECK-NEXT: vld1.64 {d20, d21}, [r0] +; CHECK-NEXT: vbit q8, q10, q9 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind ret <2 x i64> %vbsl3.i } diff --git a/llvm/test/CodeGen/ARM/vselect_imax.ll b/llvm/test/CodeGen/ARM/vselect_imax.ll index e212b37fa1f5f..03e212c75567f 100644 --- a/llvm/test/CodeGen/ARM/vselect_imax.ll +++ b/llvm/test/CodeGen/ARM/vselect_imax.ll @@ -63,11 +63,66 @@ define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2, ; lowering we also need to adjust the cost. %T0_18 = type <4 x i64> %T1_18 = type <4 x i1> -; CHECK-LABEL: func_blend18: define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2, %T1_18* %blend, %T0_18* %storeaddr) { -; CHECK: vbsl -; CHECK: vbsl +; CHECK-LABEL: func_blend18: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128]! +; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]! +; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128] +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128] +; CHECK-NEXT: vmov.32 r12, d18[0] +; CHECK-NEXT: vmov.32 r2, d20[0] +; CHECK-NEXT: vmov.32 lr, d18[1] +; CHECK-NEXT: vmov.32 r0, d20[1] +; CHECK-NEXT: vmov.32 r7, d16[0] +; CHECK-NEXT: vmov.32 r5, d22[0] +; CHECK-NEXT: vmov.32 r4, d22[1] +; CHECK-NEXT: vmov.32 r6, d19[0] +; CHECK-NEXT: subs r2, r2, r12 +; CHECK-NEXT: vmov.32 r2, d16[1] +; CHECK-NEXT: sbcs r0, r0, lr +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: subs r7, r5, r7 +; CHECK-NEXT: vmov.32 r7, d21[0] +; CHECK-NEXT: vmov.32 r5, d19[1] +; CHECK-NEXT: sbcs r2, r4, r2 +; CHECK-NEXT: vmov.32 r4, d21[1] +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: subs r7, r7, r6 +; CHECK-NEXT: vmov.32 r6, d23[0] +; CHECK-NEXT: vmov.32 r7, d17[0] +; CHECK-NEXT: sbcs r5, r4, r5 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: vmov.32 r5, d17[1] +; CHECK-NEXT: subs r7, r6, r7 +; CHECK-NEXT: vmov.32 r7, d23[1] +; CHECK-NEXT: sbcs r7, r7, r5 +; CHECK-NEXT: movlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mvnne r1, #0 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: vdup.32 d25, r1 +; CHECK-NEXT: mvnne r4, #0 +; CHECK-NEXT: vdup.32 d24, r2 +; CHECK-NEXT: vdup.32 d27, r4 +; CHECK-NEXT: vbit q8, q11, q12 +; CHECK-NEXT: vdup.32 d26, r0 +; CHECK-NEXT: vbit q9, q10, q13 +; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128]! +; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128] +; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: mov pc, lr %v0 = load %T0_18, %T0_18* %loadaddr %v1 = load %T0_18, %T0_18* %loadaddr2 %c = icmp slt %T0_18 %v0, %v1 @@ -79,13 +134,124 @@ define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2, } %T0_19 = type <8 x i64> %T1_19 = type <8 x i1> -; CHECK-LABEL: func_blend19: define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2, %T1_19* %blend, %T0_19* %storeaddr) { -; CHECK: vbsl -; CHECK: vbsl -; CHECK: vbsl -; CHECK: vbsl +; CHECK-LABEL: func_blend19: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: add r2, r1, #48 +; CHECK-NEXT: add r5, r1, #32 +; CHECK-NEXT: vld1.64 {d16, d17}, [r2:128] +; CHECK-NEXT: add r2, r0, #48 +; CHECK-NEXT: add r6, r0, #32 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: vld1.64 {d18, d19}, [r2:128] +; CHECK-NEXT: vmov.32 r12, d16[0] +; CHECK-NEXT: vmov.32 r2, d18[0] +; CHECK-NEXT: vmov.32 lr, d16[1] +; CHECK-NEXT: vmov.32 r4, d18[1] +; CHECK-NEXT: vld1.64 {d28, d29}, [r0:128]! +; CHECK-NEXT: vld1.64 {d26, d27}, [r5:128] +; CHECK-NEXT: vld1.64 {d30, d31}, [r6:128] +; CHECK-NEXT: vmov.32 r5, d17[0] +; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128] +; CHECK-NEXT: vmov.32 r0, d17[1] +; CHECK-NEXT: vld1.64 {d24, d25}, [r1:128]! +; CHECK-NEXT: vld1.64 {d20, d21}, [r1:128] +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: subs r2, r2, r12 +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: vmov.32 r2, d19[0] +; CHECK-NEXT: sbcs r6, r4, lr +; CHECK-NEXT: vmov.32 r4, d24[0] +; CHECK-NEXT: vmov.32 r6, d19[1] +; CHECK-NEXT: movlt r12, #1 +; CHECK-NEXT: cmp r12, #0 +; CHECK-NEXT: mvnne r12, #0 +; CHECK-NEXT: subs r2, r2, r5 +; CHECK-NEXT: vmov.32 r5, d28[0] +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: sbcs r0, r6, r0 +; CHECK-NEXT: vmov.32 r6, d28[1] +; CHECK-NEXT: vmov.32 r0, d24[1] +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: vdup.32 d7, r2 +; CHECK-NEXT: vdup.32 d6, r12 +; CHECK-NEXT: subs r5, r5, r4 +; CHECK-NEXT: vmov.32 r4, d25[1] +; CHECK-NEXT: vmov.32 r5, d25[0] +; CHECK-NEXT: sbcs r0, r6, r0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: vmov.32 r0, d29[0] +; CHECK-NEXT: movlt r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: mvnne r6, #0 +; CHECK-NEXT: subs r0, r0, r5 +; CHECK-NEXT: vmov.32 r5, d21[0] +; CHECK-NEXT: vmov.32 r0, d29[1] +; CHECK-NEXT: sbcs r0, r0, r4 +; CHECK-NEXT: vmov.32 r4, d23[0] +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: vdup.32 d1, r0 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vdup.32 d0, r6 +; CHECK-NEXT: vmov.32 r6, d22[0] +; CHECK-NEXT: vbit q12, q14, q0 +; CHECK-NEXT: subs r5, r4, r5 +; CHECK-NEXT: vmov.32 r4, d23[1] +; CHECK-NEXT: vmov.32 r5, d21[1] +; CHECK-NEXT: sbcs r5, r4, r5 +; CHECK-NEXT: vmov.32 r4, d20[1] +; CHECK-NEXT: vmov.32 r5, d20[0] +; CHECK-NEXT: movlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: vdup.32 d5, r0 +; CHECK-NEXT: add r0, r3, #32 +; CHECK-NEXT: subs r6, r6, r5 +; CHECK-NEXT: vmov.32 r5, d26[0] +; CHECK-NEXT: vmov.32 r6, d22[1] +; CHECK-NEXT: sbcs r6, r6, r4 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: vmov.32 r6, d30[0] +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: subs r6, r6, r5 +; CHECK-NEXT: vmov.32 r5, d30[1] +; CHECK-NEXT: vmov.32 r6, d26[1] +; CHECK-NEXT: sbcs r6, r5, r6 +; CHECK-NEXT: vmov.32 r5, d31[0] +; CHECK-NEXT: vmov.32 r6, d27[0] +; CHECK-NEXT: movlt r1, #1 +; CHECK-NEXT: subs r6, r5, r6 +; CHECK-NEXT: vmov.32 r5, d31[1] +; CHECK-NEXT: vmov.32 r6, d27[1] +; CHECK-NEXT: sbcs r6, r5, r6 +; CHECK-NEXT: movlt r7, #1 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: mvnne r7, #0 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mvnne r1, #0 +; CHECK-NEXT: vdup.32 d3, r7 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: vdup.32 d2, r1 +; CHECK-NEXT: mvnne r4, #0 +; CHECK-NEXT: vbit q13, q15, q1 +; CHECK-NEXT: vdup.32 d4, r4 +; CHECK-NEXT: vbit q10, q11, q2 +; CHECK-NEXT: vbit q8, q9, q3 +; CHECK-NEXT: vst1.64 {d26, d27}, [r0:128] +; CHECK-NEXT: add r0, r3, #48 +; CHECK-NEXT: vst1.64 {d24, d25}, [r3:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] +; CHECK-NEXT: vst1.64 {d20, d21}, [r3:128] +; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: mov pc, lr %v0 = load %T0_19, %T0_19* %loadaddr %v1 = load %T0_19, %T0_19* %loadaddr2 %c = icmp slt %T0_19 %v0, %v1 @@ -97,17 +263,250 @@ define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2, } %T0_20 = type <16 x i64> %T1_20 = type <16 x i1> -; CHECK-LABEL: func_blend20: define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2, %T1_20* %blend, %T0_20* %storeaddr) { -; CHECK: vbsl -; CHECK: vbsl -; CHECK: vbsl -; CHECK: vbsl -; CHECK: vbsl -; CHECK: vbsl -; CHECK: vbsl -; CHECK: vbsl +; CHECK-LABEL: func_blend20: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, sp, #4 +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, sp, #8 +; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: vld1.64 {d16, d17}, [r8:128]! +; CHECK-NEXT: add r10, r0, #64 +; CHECK-NEXT: vld1.64 {d18, d19}, [r9:128]! +; CHECK-NEXT: vmov.32 r2, d16[0] +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: vmov.32 r6, d18[0] +; CHECK-NEXT: vmov.32 r4, d16[1] +; CHECK-NEXT: vmov.32 r7, d18[1] +; CHECK-NEXT: vmov.32 r5, d17[0] +; CHECK-NEXT: subs r2, r6, r2 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: vmov.32 r2, d19[0] +; CHECK-NEXT: sbcs r7, r7, r4 +; CHECK-NEXT: movlt r6, #1 +; CHECK-NEXT: vmov.32 r7, d17[1] +; CHECK-NEXT: subs r2, r2, r5 +; CHECK-NEXT: vmov.32 r2, d19[1] +; CHECK-NEXT: sbcs r2, r2, r7 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: vdup.32 d21, r2 +; CHECK-NEXT: mvnne r6, #0 +; CHECK-NEXT: vdup.32 d20, r6 +; CHECK-NEXT: mov r2, #32 +; CHECK-NEXT: add r6, r1, #64 +; CHECK-NEXT: vld1.64 {d24, d25}, [r10:128], r2 +; CHECK-NEXT: vbit q8, q9, q10 +; CHECK-NEXT: vld1.64 {d28, d29}, [r6:128], r2 +; CHECK-NEXT: vmov.32 r4, d29[0] +; CHECK-NEXT: vmov.32 r5, d25[0] +; CHECK-NEXT: vld1.64 {d0, d1}, [r9:128] +; CHECK-NEXT: vld1.64 {d2, d3}, [r8:128] +; CHECK-NEXT: vld1.64 {d22, d23}, [r6:128]! +; CHECK-NEXT: vld1.64 {d20, d21}, [r6:128] +; CHECK-NEXT: vmov.32 r6, d0[0] +; CHECK-NEXT: vld1.64 {d18, d19}, [r10:128]! +; CHECK-NEXT: vmov.32 r9, d23[0] +; CHECK-NEXT: vmov.32 r11, d19[0] +; CHECK-NEXT: vmov.32 r8, d23[1] +; CHECK-NEXT: subs r4, r5, r4 +; CHECK-NEXT: vmov.32 r5, d25[1] +; CHECK-NEXT: vmov.32 r4, d29[1] +; CHECK-NEXT: sbcs r4, r5, r4 +; CHECK-NEXT: vmov.32 r5, d24[0] +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mvnne r4, #0 +; CHECK-NEXT: vdup.32 d5, r4 +; CHECK-NEXT: vmov.32 r4, d28[0] +; CHECK-NEXT: subs r4, r5, r4 +; CHECK-NEXT: vmov.32 r5, d24[1] +; CHECK-NEXT: vmov.32 r4, d28[1] +; CHECK-NEXT: sbcs r4, r5, r4 +; CHECK-NEXT: vmov.32 r5, d1[0] +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mvnne r4, #0 +; CHECK-NEXT: vdup.32 d4, r4 +; CHECK-NEXT: vmov.32 r4, d3[0] +; CHECK-NEXT: subs r4, r5, r4 +; CHECK-NEXT: vmov.32 r5, d1[1] +; CHECK-NEXT: vmov.32 r4, d3[1] +; CHECK-NEXT: sbcs r4, r5, r4 +; CHECK-NEXT: add r5, r1, #32 +; CHECK-NEXT: vld1.64 {d26, d27}, [r5:128] +; CHECK-NEXT: add r5, r1, #48 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: add r1, r1, #80 +; CHECK-NEXT: vld1.64 {d30, d31}, [r5:128] +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: vbif q12, q14, q2 +; CHECK-NEXT: vmov.32 r5, d2[0] +; CHECK-NEXT: mvnne r4, #0 +; CHECK-NEXT: vdup.32 d29, r4 +; CHECK-NEXT: vmov.32 r4, d31[1] +; CHECK-NEXT: subs r5, r6, r5 +; CHECK-NEXT: vmov.32 r6, d0[1] +; CHECK-NEXT: vmov.32 r5, d2[1] +; CHECK-NEXT: sbcs r5, r6, r5 +; CHECK-NEXT: add r6, r0, #48 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: vld1.64 {d6, d7}, [r6:128] +; CHECK-NEXT: movlt r5, #1 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: mvnne r5, #0 +; CHECK-NEXT: vmov.32 r7, d7[0] +; CHECK-NEXT: vdup.32 d28, r5 +; CHECK-NEXT: vmov.32 r5, d31[0] +; CHECK-NEXT: vbsl q14, q0, q1 +; CHECK-NEXT: vmov.32 r6, d7[1] +; CHECK-NEXT: vmov.32 r2, d6[0] +; CHECK-NEXT: subs r5, r7, r5 +; CHECK-NEXT: vmov.32 r7, d6[1] +; CHECK-NEXT: sbcs r4, r6, r4 +; CHECK-NEXT: vmov.32 r6, d30[0] +; CHECK-NEXT: vmov.32 r5, d30[1] +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mvnne r4, #0 +; CHECK-NEXT: vdup.32 d3, r4 +; CHECK-NEXT: vmov.32 r4, d26[1] +; CHECK-NEXT: subs r2, r2, r6 +; CHECK-NEXT: sbcs r2, r7, r5 +; CHECK-NEXT: add r5, r0, #32 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: vld1.64 {d0, d1}, [r5:128] +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: vmov.32 r6, d0[0] +; CHECK-NEXT: vdup.32 d2, r2 +; CHECK-NEXT: add r0, r0, #80 +; CHECK-NEXT: vmov.32 r2, d26[0] +; CHECK-NEXT: vbit q15, q3, q1 +; CHECK-NEXT: vmov.32 r5, d0[1] +; CHECK-NEXT: vmov.32 r7, d1[0] +; CHECK-NEXT: vld1.64 {d2, d3}, [r10:128] +; CHECK-NEXT: vld1.64 {d6, d7}, [r1:128] +; CHECK-NEXT: vld1.64 {d8, d9}, [r0:128] +; CHECK-NEXT: vmov.32 r1, d7[1] +; CHECK-NEXT: vmov.32 r10, d19[1] +; CHECK-NEXT: vmov.32 lr, d6[0] +; CHECK-NEXT: vmov.32 r3, d8[0] +; CHECK-NEXT: vmov.32 r12, d8[1] +; CHECK-NEXT: subs r2, r6, r2 +; CHECK-NEXT: vmov.32 r6, d1[1] +; CHECK-NEXT: sbcs r2, r5, r4 +; CHECK-NEXT: vmov.32 r5, d27[0] +; CHECK-NEXT: vmov.32 r4, d27[1] +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: subs r5, r7, r5 +; CHECK-NEXT: vmov.32 r7, d7[0] +; CHECK-NEXT: sbcs r4, r6, r4 +; CHECK-NEXT: vmov.32 r6, d2[0] +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: vmov.32 r5, d2[1] +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mvnne r4, #0 +; CHECK-NEXT: vdup.32 d5, r4 +; CHECK-NEXT: vdup.32 d4, r2 +; CHECK-NEXT: vmov.32 r2, d20[0] +; CHECK-NEXT: vbit q13, q0, q2 +; CHECK-NEXT: vmov.32 r4, d20[1] +; CHECK-NEXT: subs r0, r6, r2 +; CHECK-NEXT: vmov.32 r2, d9[1] +; CHECK-NEXT: sbcs r0, r5, r4 +; CHECK-NEXT: vmov.32 r4, d9[0] +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmov.32 r6, d18[0] +; CHECK-NEXT: movlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: vmov.32 r5, d18[1] +; CHECK-NEXT: subs r4, r4, r7 +; CHECK-NEXT: vmov.32 r7, d21[1] +; CHECK-NEXT: sbcs r1, r2, r1 +; CHECK-NEXT: vmov.32 r4, d22[1] +; CHECK-NEXT: vmov.32 r1, d22[0] +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: vdup.32 d11, r2 +; CHECK-NEXT: vmov.32 r2, d3[1] +; CHECK-NEXT: subs r1, r6, r1 +; CHECK-NEXT: vmov.32 r6, d21[0] +; CHECK-NEXT: sbcs r1, r5, r4 +; CHECK-NEXT: vmov.32 r4, d3[0] +; CHECK-NEXT: vmov.32 r5, d6[1] +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: movlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mvnne r1, #0 +; CHECK-NEXT: subs r4, r4, r6 +; CHECK-NEXT: sbcs r2, r2, r7 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: subs r4, r11, r9 +; CHECK-NEXT: sbcs r4, r10, r8 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: subs r3, r3, lr +; CHECK-NEXT: sbcs r3, r12, r5 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: movlt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: mvnne r3, #0 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mvnne r4, #0 +; CHECK-NEXT: vdup.32 d10, r3 +; CHECK-NEXT: vdup.32 d1, r4 +; CHECK-NEXT: vorr q2, q5, q5 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: vdup.32 d0, r1 +; CHECK-NEXT: vbsl q2, q4, q3 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: vbif q9, q11, q0 +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vdup.32 d7, r2 +; CHECK-NEXT: vdup.32 d6, r0 +; CHECK-NEXT: add r0, r1, #80 +; CHECK-NEXT: vbit q10, q1, q3 +; CHECK-NEXT: vst1.64 {d4, d5}, [r0:128] +; CHECK-NEXT: add r0, r1, #32 +; CHECK-NEXT: vst1.64 {d26, d27}, [r0:128] +; CHECK-NEXT: add r0, r1, #48 +; CHECK-NEXT: vst1.64 {d30, d31}, [r0:128] +; CHECK-NEXT: add r0, r1, #64 +; CHECK-NEXT: vst1.64 {d16, d17}, [r1:128]! +; CHECK-NEXT: vst1.64 {d28, d29}, [r1:128] +; CHECK-NEXT: mov r1, #32 +; CHECK-NEXT: vst1.64 {d24, d25}, [r0:128], r1 +; CHECK-NEXT: vst1.64 {d18, d19}, [r0:128]! +; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128] +; CHECK-NEXT: add sp, sp, #8 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: add sp, sp, #4 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: mov pc, lr %v0 = load %T0_20, %T0_20* %loadaddr %v1 = load %T0_20, %T0_20* %loadaddr2 %c = icmp slt %T0_20 %v0, %v1 diff --git a/llvm/test/CodeGen/BPF/BTF/local-var-readonly-1.ll b/llvm/test/CodeGen/BPF/BTF/local-var-readonly-1.ll new file mode 100644 index 0000000000000..3da7e64c22002 --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/local-var-readonly-1.ll @@ -0,0 +1,105 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; +; Source: +; void foo(const void *); +; int test() { +; const char *str = "abcd"; +; const struct { +; unsigned a[4]; +; } val = { .a = {2, 3, 4, 5} }; +; foo(str); +; foo(&val); +; return 0; +; } +; Compilation flag: +; clang -target bpf -O2 -g -S -emit-llvm t.c + +%struct.anon = type { [4 x i32] } + +@.str = private unnamed_addr constant [5 x i8] c"abcd\00", align 1 +@__const.test.val = private unnamed_addr constant %struct.anon { [4 x i32] [i32 2, i32 3, i32 4, i32 5] }, align 4 + +; Function Attrs: nounwind +define dso_local i32 @test() local_unnamed_addr #0 !dbg !7 { +entry: + %val = alloca %struct.anon, align 4 + call void @llvm.dbg.value(metadata i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), metadata !12, metadata !DIExpression()), !dbg !25 + %0 = bitcast %struct.anon* %val to i8*, !dbg !26 + call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) #4, !dbg !26 + call void @llvm.dbg.declare(metadata %struct.anon* %val, metadata !16, metadata !DIExpression()), !dbg !27 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %0, i8* nonnull align 4 dereferenceable(16) bitcast (%struct.anon* @__const.test.val to i8*), i64 16, i1 false), !dbg !27 + tail call void @foo(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0)) #4, !dbg !28 + call void @foo(i8* nonnull %0) #4, !dbg !29 + call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) #4, !dbg !30 + ret i32 0, !dbg !31 +} + +; the initial value of "str" is stored in section .rodata.str1.1 +; the initial value of "val" is stored in section .rodata.cst16 +; CHECK-NOT: BTF_KIND_DATASEC + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) #2 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 + +declare !dbg !32 dso_local void @foo(i8*) local_unnamed_addr #3 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { nounwind readnone speculatable willreturn } +attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (https://github.com/llvm/llvm-project.git 1e92cffe18a07c12042b57504dfa7fb709b833c8)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "t.c", directory: "/tmp/home/yhs/tmp") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 1e92cffe18a07c12042b57504dfa7fb709b833c8)"} +!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !16} +!12 = !DILocalVariable(name: "str", scope: !7, file: !1, line: 3, type: !13) +!13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 64) +!14 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !15) +!15 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!16 = !DILocalVariable(name: "val", scope: !7, file: !1, line: 6, type: !17) +!17 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !18) +!18 = distinct !DICompositeType(tag: DW_TAG_structure_type, scope: !7, file: !1, line: 4, size: 128, elements: !19) +!19 = !{!20} +!20 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !18, file: !1, line: 5, baseType: !21, size: 128) +!21 = !DICompositeType(tag: DW_TAG_array_type, baseType: !22, size: 128, elements: !23) +!22 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!23 = !{!24} +!24 = !DISubrange(count: 4) +!25 = !DILocation(line: 0, scope: !7) +!26 = !DILocation(line: 4, column: 3, scope: !7) +!27 = !DILocation(line: 6, column: 5, scope: !7) +!28 = !DILocation(line: 7, column: 3, scope: !7) +!29 = !DILocation(line: 8, column: 3, scope: !7) +!30 = !DILocation(line: 10, column: 1, scope: !7) +!31 = !DILocation(line: 9, column: 3, scope: !7) +!32 = !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !33, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2) +!33 = !DISubroutineType(types: !34) +!34 = !{null, !35} +!35 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !36, size: 64) +!36 = !DIDerivedType(tag: DW_TAG_const_type, baseType: null) diff --git a/llvm/test/CodeGen/BPF/BTF/local-var-readonly-2.ll b/llvm/test/CodeGen/BPF/BTF/local-var-readonly-2.ll new file mode 100644 index 0000000000000..772b566698f40 --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/local-var-readonly-2.ll @@ -0,0 +1,97 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; +; Source: +; void foo(const void *); +; int test() { +; const struct { +; unsigned a[4]; +; char b; +; } val = { .a = {2, 3, 4, 5}, .b = 4 }; +; foo(&val); +; return 0; +; } +; Compilation flag: +; clang -target bpf -O2 -g -S -emit-llvm t.c + +%struct.anon = type { [4 x i32], i8 } + +@__const.test.val = private unnamed_addr constant %struct.anon { [4 x i32] [i32 2, i32 3, i32 4, i32 5], i8 4 }, align 4 + +; Function Attrs: nounwind +define dso_local i32 @test() local_unnamed_addr #0 !dbg !7 { +entry: + %val = alloca %struct.anon, align 4 + %0 = bitcast %struct.anon* %val to i8*, !dbg !23 + call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull %0) #4, !dbg !23 + call void @llvm.dbg.declare(metadata %struct.anon* %val, metadata !12, metadata !DIExpression()), !dbg !24 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(20) %0, i8* nonnull align 4 dereferenceable(20) bitcast (%struct.anon* @__const.test.val to i8*), i64 20, i1 false), !dbg !24 + call void @foo(i8* nonnull %0) #4, !dbg !25 + call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull %0) #4, !dbg !26 + ret i32 0, !dbg !27 +} + +; the init value of local variable "val" is stored in .rodata section +; CHECK: .long 42 # BTF_KIND_DATASEC +; CHECK-NEXT: .long 251658240 # 0xf000000 +; CHECK-NEXT: .long 0 + +; CHECK: .ascii ".rodata" # string offset=42 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) #2 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 + +declare !dbg !28 dso_local void @foo(i8*) local_unnamed_addr #3 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { nounwind readnone speculatable willreturn } +attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (https://github.com/llvm/llvm-project.git 1e92cffe18a07c12042b57504dfa7fb709b833c8)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "t.c", directory: "/tmp/home/yhs/tmp") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 1e92cffe18a07c12042b57504dfa7fb709b833c8)"} +!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{!10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12} +!12 = !DILocalVariable(name: "val", scope: !7, file: !1, line: 6, type: !13) +!13 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !14) +!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, scope: !7, file: !1, line: 3, size: 160, elements: !15) +!15 = !{!16, !21} +!16 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !14, file: !1, line: 4, baseType: !17, size: 128) +!17 = !DICompositeType(tag: DW_TAG_array_type, baseType: !18, size: 128, elements: !19) +!18 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!19 = !{!20} +!20 = !DISubrange(count: 4) +!21 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !14, file: !1, line: 5, baseType: !22, size: 8, offset: 128) +!22 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!23 = !DILocation(line: 3, column: 3, scope: !7) +!24 = !DILocation(line: 6, column: 5, scope: !7) +!25 = !DILocation(line: 7, column: 3, scope: !7) +!26 = !DILocation(line: 9, column: 1, scope: !7) +!27 = !DILocation(line: 8, column: 3, scope: !7) +!28 = !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !29, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2) +!29 = !DISubroutineType(types: !30) +!30 = !{null, !31} +!31 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !32, size: 64) +!32 = !DIDerivedType(tag: DW_TAG_const_type, baseType: null) diff --git a/llvm/test/CodeGen/BPF/BTF/map-def-2.ll b/llvm/test/CodeGen/BPF/BTF/map-def-2.ll new file mode 100644 index 0000000000000..bf3c4a7961fbf --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/map-def-2.ll @@ -0,0 +1,90 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; +; Source code: +; struct key_type { +; int a1; +; }; +; typedef struct map_type { +; struct key_type *key; +; } _map_type; +; typedef _map_type __map_type; +; __map_type __attribute__((section(".maps"))) hash_map; +; Compilation flag: +; clang -target bpf -O2 -g -S -emit-llvm t2.c + +%struct.map_type = type { %struct.key_type* } +%struct.key_type = type { i32 } + +@hash_map = dso_local local_unnamed_addr global %struct.map_type zeroinitializer, section ".maps", align 8, !dbg !0 + +; CHECK: .long 0 # BTF_KIND_PTR(id = 1) +; CHECK-NEXT: .long 33554432 # 0x2000000 +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 1 # BTF_KIND_STRUCT(id = 2) +; CHECK-NEXT: .long 67108865 # 0x4000001 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 10 +; CHECK-NEXT: .long 3 +; CHECK-NEXT: .long 0 # 0x0 +; CHECK-NEXT: .long 13 # BTF_KIND_INT(id = 3) +; CHECK-NEXT: .long 16777216 # 0x1000000 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 16777248 # 0x1000020 +; CHECK-NEXT: .long 17 # BTF_KIND_TYPEDEF(id = 4) +; CHECK-NEXT: .long 134217728 # 0x8000000 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long 28 # BTF_KIND_TYPEDEF(id = 5) +; CHECK-NEXT: .long 134217728 # 0x8000000 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .long 38 # BTF_KIND_STRUCT(id = 6) +; CHECK-NEXT: .long 67108865 # 0x4000001 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .long 47 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 0 # 0x0 +; CHECK-NEXT: .long 51 # BTF_KIND_VAR(id = 7) +; CHECK-NEXT: .long 234881024 # 0xe000000 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 60 # BTF_KIND_DATASEC(id = 8) +; CHECK-NEXT: .long 251658241 # 0xf000001 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .long hash_map +; CHECK-NEXT: .long 8 + +; CHECK: .ascii "key_type" # string offset=1 +; CHECK: .ascii "a1" # string offset=10 +; CHECK: .ascii "int" # string offset=13 +; CHECK: .ascii "__map_type" # string offset=17 +; CHECK: .ascii "_map_type" # string offset=28 +; CHECK: .ascii "map_type" # string offset=38 +; CHECK: .ascii "key" # string offset=47 +; CHECK: .ascii "hash_map" # string offset=51 +; CHECK: .ascii ".maps" # string offset=60 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!16, !17, !18} +!llvm.ident = !{!19} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "hash_map", scope: !2, file: !3, line: 8, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 11.0.0 (https://github.com/llvm/llvm-project.git b8409c03ed90807f3d49c7d98dceea98cf461f7a)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "t2.c", directory: "/tmp/home/yhs/tmp1") +!4 = !{} +!5 = !{!0} +!6 = !DIDerivedType(tag: DW_TAG_typedef, name: "__map_type", file: !3, line: 7, baseType: !7) +!7 = !DIDerivedType(tag: DW_TAG_typedef, name: "_map_type", file: !3, line: 6, baseType: !8) +!8 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "map_type", file: !3, line: 4, size: 64, elements: !9) +!9 = !{!10} +!10 = !DIDerivedType(tag: DW_TAG_member, name: "key", scope: !8, file: !3, line: 5, baseType: !11, size: 64) +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) +!12 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "key_type", file: !3, line: 1, size: 32, elements: !13) +!13 = !{!14} +!14 = !DIDerivedType(tag: DW_TAG_member, name: "a1", scope: !12, file: !3, line: 2, baseType: !15, size: 32) +!15 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!16 = !{i32 7, !"Dwarf Version", i32 4} +!17 = !{i32 2, !"Debug Info Version", i32 3} +!18 = !{i32 1, !"wchar_size", i32 4} +!19 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git b8409c03ed90807f3d49c7d98dceea98cf461f7a)"} diff --git a/llvm/test/CodeGen/BPF/BTF/map-def-3.ll b/llvm/test/CodeGen/BPF/BTF/map-def-3.ll new file mode 100644 index 0000000000000..e05470782ec26 --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/map-def-3.ll @@ -0,0 +1,65 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; +; Source code: +; struct key_type { +; int a1; +; }; +; const struct key_type __attribute__((section(".maps"))) hash_map; +; Compilation flag: +; clang -target bpf -O2 -g -S -emit-llvm t3.c + +%struct.key_type = type { i32 } + +@hash_map = dso_local local_unnamed_addr constant %struct.key_type zeroinitializer, section ".maps", align 4, !dbg !0 + +; CHECK: .long 1 # BTF_KIND_INT(id = 1) +; CHECK-NEXT: .long 16777216 # 0x1000000 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 16777248 # 0x1000020 +; CHECK-NEXT: .long 0 # BTF_KIND_CONST(id = 2) +; CHECK-NEXT: .long 167772160 # 0xa000000 +; CHECK-NEXT: .long 3 +; CHECK-NEXT: .long 5 # BTF_KIND_STRUCT(id = 3) +; CHECK-NEXT: .long 67108865 # 0x4000001 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 14 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 0 # 0x0 +; CHECK-NEXT: .long 17 # BTF_KIND_VAR(id = 4) +; CHECK-NEXT: .long 234881024 # 0xe000000 +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 26 # BTF_KIND_DATASEC(id = 5) +; CHECK-NEXT: .long 251658241 # 0xf000001 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long hash_map +; CHECK-NEXT: .long 4 + +; CHECK: .ascii "int" # string offset=1 +; CHECK: .ascii "key_type" # string offset=5 +; CHECK: .ascii "a1" # string offset=14 +; CHECK: .ascii "hash_map" # string offset=17 +; CHECK: .ascii ".maps" # string offset=26 + + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!11, !12, !13} +!llvm.ident = !{!14} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "hash_map", scope: !2, file: !3, line: 4, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 11.0.0 (https://github.com/llvm/llvm-project.git 5bd074629f00d4798674b411cf00216f38016483)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "t3.c", directory: "/tmp/home/yhs/tmp1") +!4 = !{} +!5 = !{!0} +!6 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !7) +!7 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "key_type", file: !3, line: 1, size: 32, elements: !8) +!8 = !{!9} +!9 = !DIDerivedType(tag: DW_TAG_member, name: "a1", scope: !7, file: !3, line: 2, baseType: !10, size: 32) +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{i32 7, !"Dwarf Version", i32 4} +!12 = !{i32 2, !"Debug Info Version", i32 3} +!13 = !{i32 1, !"wchar_size", i32 4} +!14 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 5bd074629f00d4798674b411cf00216f38016483)"} diff --git a/llvm/test/CodeGen/BPF/BTF/map-def.ll b/llvm/test/CodeGen/BPF/BTF/map-def.ll index cf777880efa17..e12cde3ef98ae 100644 --- a/llvm/test/CodeGen/BPF/BTF/map-def.ll +++ b/llvm/test/CodeGen/BPF/BTF/map-def.ll @@ -28,41 +28,41 @@ ; CHECK-NEXT: .long 168 ; CHECK-NEXT: .long 168 ; CHECK-NEXT: .long 65 -; CHECK-NEXT: .long 1 # BTF_KIND_STRUCT(id = 1) -; CHECK-NEXT: .long 67108866 # 0x4000002 -; CHECK-NEXT: .long 16 -; CHECK-NEXT: .long 10 -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long 0 # 0x0 -; CHECK-NEXT: .long 14 -; CHECK-NEXT: .long 5 -; CHECK-NEXT: .long 64 # 0x40 -; CHECK-NEXT: .long 0 # BTF_KIND_PTR(id = 2) +; CHECK-NEXT: .long 0 # BTF_KIND_PTR(id = 1) ; CHECK-NEXT: .long 33554432 # 0x2000000 -; CHECK-NEXT: .long 3 -; CHECK-NEXT: .long 20 # BTF_KIND_STRUCT(id = 3) +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 1 # BTF_KIND_STRUCT(id = 2) ; CHECK-NEXT: .long 67108866 # 0x4000002 ; CHECK-NEXT: .long 8 -; CHECK-NEXT: .long 29 -; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 10 +; CHECK-NEXT: .long 3 ; CHECK-NEXT: .long 0 # 0x0 -; CHECK-NEXT: .long 31 -; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 12 +; CHECK-NEXT: .long 3 ; CHECK-NEXT: .long 32 # 0x20 -; CHECK-NEXT: .long 33 # BTF_KIND_INT(id = 4) +; CHECK-NEXT: .long 14 # BTF_KIND_INT(id = 3) ; CHECK-NEXT: .long 16777216 # 0x1000000 ; CHECK-NEXT: .long 4 ; CHECK-NEXT: .long 16777248 # 0x1000020 -; CHECK-NEXT: .long 0 # BTF_KIND_PTR(id = 5) +; CHECK-NEXT: .long 0 # BTF_KIND_PTR(id = 4) ; CHECK-NEXT: .long 33554432 # 0x2000000 -; CHECK-NEXT: .long 6 -; CHECK-NEXT: .long 37 # BTF_KIND_INT(id = 6) +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long 18 # BTF_KIND_INT(id = 5) ; CHECK-NEXT: .long 16777216 # 0x1000000 ; CHECK-NEXT: .long 4 ; CHECK-NEXT: .long 32 # 0x20 +; CHECK-NEXT: .long 31 # BTF_KIND_STRUCT(id = 6) +; CHECK-NEXT: .long 67108866 # 0x4000002 +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .long 40 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 0 # 0x0 +; CHECK-NEXT: .long 44 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 64 # 0x40 ; CHECK-NEXT: .long 50 # BTF_KIND_VAR(id = 7) ; CHECK-NEXT: .long 234881024 # 0xe000000 -; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 6 ; CHECK-NEXT: .long 1 ; CHECK-NEXT: .long 59 # BTF_KIND_DATASEC(id = 8) ; CHECK-NEXT: .long 251658241 # 0xf000001 @@ -71,21 +71,21 @@ ; CHECK-NEXT: .long hash_map ; CHECK-NEXT: .long 16 ; CHECK-NEXT: .byte 0 # string offset=0 -; CHECK-NEXT: .ascii "map_type" # string offset=1 +; CHECK-NEXT: .ascii "key_type" # string offset=1 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .ascii "key" # string offset=10 +; CHECK-NEXT: .byte 97 # string offset=10 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .ascii "value" # string offset=14 +; CHECK-NEXT: .byte 98 # string offset=12 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .ascii "key_type" # string offset=20 +; CHECK-NEXT: .ascii "int" # string offset=14 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .byte 97 # string offset=29 +; CHECK-NEXT: .ascii "unsigned int" # string offset=18 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .byte 98 # string offset=31 +; CHECK-NEXT: .ascii "map_type" # string offset=31 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .ascii "int" # string offset=33 +; CHECK-NEXT: .ascii "key" # string offset=40 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .ascii "unsigned int" # string offset=37 +; CHECK-NEXT: .ascii "value" # string offset=44 ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .ascii "hash_map" # string offset=50 ; CHECK-NEXT: .byte 0 diff --git a/llvm/test/CodeGen/Generic/MIRStripDebug/no-metadata-present.mir b/llvm/test/CodeGen/Generic/MIRStripDebug/no-metadata-present.mir new file mode 100644 index 0000000000000..3df834845b404 --- /dev/null +++ b/llvm/test/CodeGen/Generic/MIRStripDebug/no-metadata-present.mir @@ -0,0 +1,13 @@ +# RUN: llc -run-pass=mir-strip-debug -mir-strip-debugify-only=0 -o - %s | FileCheck %s + +# CHECK: name: test +# CHECK: body: | +# CHECK-NEXT: bb.0: +# CHECK-EMPTY: +# CHECK-NEXT: ... + +--- +name: test +body: | + bb.0: +... diff --git a/llvm/test/CodeGen/PowerPC/PR33671.ll b/llvm/test/CodeGen/PowerPC/PR33671.ll index a613387f3c93b..9890cc7a20f4a 100644 --- a/llvm/test/CodeGen/PowerPC/PR33671.ll +++ b/llvm/test/CodeGen/PowerPC/PR33671.ll @@ -26,7 +26,7 @@ entry: ret void ; CHECK-LABEL: test2 ; CHECK: addi 3, 3, 8 -; CHECK: lxvx [[LD:[0-9]+]], 0, 3 ; CHECK: addi [[REG:[0-9]+]], 4, 4 +; CHECK: lxvx [[LD:[0-9]+]], 0, 3 ; CHECK: stxvx [[LD]], 0, [[REG]] } diff --git a/llvm/test/CodeGen/PowerPC/botheightreduce.mir b/llvm/test/CodeGen/PowerPC/botheightreduce.mir new file mode 100644 index 0000000000000..72b030273e829 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/botheightreduce.mir @@ -0,0 +1,92 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -run-pass=machine-scheduler -o - %s | FileCheck %s +--- +# Check that machine-scheduler's BotHeightReduce heuristic puts the LD 8 in +# between the final run of MULLDs and the LDXs that feed them, to try to hide +# the latency of the LDXs. +name: test +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $x3, $x4 + ; CHECK: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x4 + ; CHECK: [[COPY1:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3 + ; CHECK: [[ADDI8_:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[COPY1]], 1 + ; CHECK: [[CMPLDI:%[0-9]+]]:crrc = CMPLDI [[COPY]], 1 + ; CHECK: [[LI8_:%[0-9]+]]:g8rc_and_g8rc_nox0 = LI8 1 + ; CHECK: [[ISEL8_:%[0-9]+]]:g8rc = ISEL8 [[COPY]], [[LI8_]], [[CMPLDI]].sub_gt + ; CHECK: MTCTR8loop [[ISEL8_]], implicit-def dead $ctr8 + ; CHECK: [[LI8_1:%[0-9]+]]:g8rc = LI8 0 + ; CHECK: [[LI8_2:%[0-9]+]]:g8rc = LI8 2 + ; CHECK: [[LI8_3:%[0-9]+]]:g8rc = LI8 3 + ; CHECK: [[LI8_4:%[0-9]+]]:g8rc = LI8 5 + ; CHECK: [[LI8_5:%[0-9]+]]:g8rc = LI8 6 + ; CHECK: [[LI8_6:%[0-9]+]]:g8rc = LI8 7 + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 [[ADDI8_]], 1 + ; CHECK: [[LD:%[0-9]+]]:g8rc = LD 0, [[ADDI8_]] :: (load 8) + ; CHECK: [[LDX:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_]] :: (load 8) + ; CHECK: [[LDX1:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_3]] :: (load 8) + ; CHECK: [[LD1:%[0-9]+]]:g8rc = LD 4, [[ADDI8_]] :: (load 8) + ; CHECK: [[LDX2:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_4]] :: (load 8) + ; CHECK: [[LDX3:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_5]] :: (load 8) + ; CHECK: [[LDX4:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_6]] :: (load 8) + ; CHECK: [[LDX5:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_2]] :: (load 8) + ; CHECK: [[MULLD:%[0-9]+]]:g8rc = MULLD [[LDX]], [[LD]] + ; CHECK: [[LD2:%[0-9]+]]:g8rc = LD 8, [[ADDI8_]] :: (load 8) + ; CHECK: [[MULLD1:%[0-9]+]]:g8rc = MULLD [[MULLD]], [[LDX5]] + ; CHECK: [[MULLD2:%[0-9]+]]:g8rc = MULLD [[MULLD1]], [[LDX1]] + ; CHECK: [[MULLD3:%[0-9]+]]:g8rc = MULLD [[MULLD2]], [[LD1]] + ; CHECK: [[MULLD4:%[0-9]+]]:g8rc = MULLD [[MULLD3]], [[LDX2]] + ; CHECK: [[MULLD5:%[0-9]+]]:g8rc = MULLD [[MULLD4]], [[LDX3]] + ; CHECK: [[MULLD6:%[0-9]+]]:g8rc = MULLD [[MULLD5]], [[LDX4]] + ; CHECK: [[MADDLD8_:%[0-9]+]]:g8rc = MADDLD8 [[MULLD6]], [[LD2]], [[MADDLD8_]] + ; CHECK: [[COPY2:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[ADDI8_1]] + ; CHECK: BDNZ8 %bb.1, implicit-def dead $ctr8, implicit $ctr8 + ; CHECK: B %bb.2 + ; CHECK: bb.2: + bb.0: + liveins: $x3, $x4 + + %0:g8rc_and_g8rc_nox0 = COPY $x4 + %1:g8rc_and_g8rc_nox0 = COPY $x3 + %2:g8rc_and_g8rc_nox0 = ADDI8 %1, 1 + %3:crrc = CMPLDI %0, 1 + %4:g8rc_and_g8rc_nox0 = LI8 1 + %5:g8rc = ISEL8 %0, %4, %3.sub_gt + MTCTR8loop %5, implicit-def dead $ctr8 + %6:g8rc = LI8 0 + %7:g8rc = LI8 2 + %8:g8rc = LI8 3 + %9:g8rc = LI8 5 + %10:g8rc = LI8 6 + %11:g8rc = LI8 7 + + bb.1: + %12:g8rc = ADDI8 %2, 1 + %13:g8rc = LD 0, %2 :: (load 8) + %14:g8rc = LDX %2, %4 :: (load 8) + %16:g8rc = LDX %2, %8 :: (load 8) + %17:g8rc = LD 4, %2 :: (load 8) + %18:g8rc = LDX %2, %9 :: (load 8) + %19:g8rc = LDX %2, %10 :: (load 8) + %20:g8rc = LDX %2, %11 :: (load 8) + %21:g8rc = LD 8, %2 :: (load 8) + %22:g8rc = MULLD %14, %13 + %15:g8rc = LDX %2, %7 :: (load 8) + %23:g8rc = MULLD %22, %15 + %24:g8rc = MULLD %23, %16 + %25:g8rc = MULLD %24, %17 + %26:g8rc = MULLD %25, %18 + %27:g8rc = MULLD %26, %19 + %28:g8rc = MULLD %27, %20 + %6:g8rc = MADDLD8 %28, %21, %6 + %2:g8rc_and_g8rc_nox0 = COPY %12 + BDNZ8 %bb.1, implicit-def dead $ctr8, implicit $ctr8 + B %bb.2 + + bb.2: +... diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll index 84bf4032aa34f..3e4a509dc943b 100644 --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll @@ -82,71 +82,71 @@ entry: } declare <2 x i64> @llvm.ppc.vsx.xxblendvd(<2 x i64>, <2 x i64>, <2 x i64>) -define <16 x i8> @testVINSBLX(<16 x i8> %a, i64 %b, i64 %c) { +define <16 x i8> @testVINSBLX(<16 x i8> %a, i32 %b, i32 %c) { ; CHECK-LABEL: testVINSBLX: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vinsblx v2, r5, r6 ; CHECK-NEXT: blr entry: - %0 = tail call <16 x i8> @llvm.ppc.altivec.vinsblx(<16 x i8> %a, i64 %b, i64 %c) + %0 = tail call <16 x i8> @llvm.ppc.altivec.vinsblx(<16 x i8> %a, i32 %b, i32 %c) ret <16 x i8> %0 } -declare <16 x i8> @llvm.ppc.altivec.vinsblx(<16 x i8>, i64, i64) +declare <16 x i8> @llvm.ppc.altivec.vinsblx(<16 x i8>, i32, i32) -define <16 x i8> @testVINSBRX(<16 x i8> %a, i64 %b, i64 %c) { +define <16 x i8> @testVINSBRX(<16 x i8> %a, i32 %b, i32 %c) { ; CHECK-LABEL: testVINSBRX: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vinsbrx v2, r5, r6 ; CHECK-NEXT: blr entry: - %0 = tail call <16 x i8> @llvm.ppc.altivec.vinsbrx(<16 x i8> %a, i64 %b, i64 %c) + %0 = tail call <16 x i8> @llvm.ppc.altivec.vinsbrx(<16 x i8> %a, i32 %b, i32 %c) ret <16 x i8> %0 } -declare <16 x i8> @llvm.ppc.altivec.vinsbrx(<16 x i8>, i64, i64) +declare <16 x i8> @llvm.ppc.altivec.vinsbrx(<16 x i8>, i32, i32) -define <8 x i16> @testVINSHLX(<8 x i16> %a, i64 %b, i64 %c) { +define <8 x i16> @testVINSHLX(<8 x i16> %a, i32 %b, i32 %c) { ; CHECK-LABEL: testVINSHLX: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vinshlx v2, r5, r6 ; CHECK-NEXT: blr entry: - %0 = tail call <8 x i16> @llvm.ppc.altivec.vinshlx(<8 x i16> %a, i64 %b, i64 %c) + %0 = tail call <8 x i16> @llvm.ppc.altivec.vinshlx(<8 x i16> %a, i32 %b, i32 %c) ret <8 x i16> %0 } -declare <8 x i16> @llvm.ppc.altivec.vinshlx(<8 x i16>, i64, i64) +declare <8 x i16> @llvm.ppc.altivec.vinshlx(<8 x i16>, i32, i32) -define <8 x i16> @testVINSHRX(<8 x i16> %a, i64 %b, i64 %c) { +define <8 x i16> @testVINSHRX(<8 x i16> %a, i32 %b, i32 %c) { ; CHECK-LABEL: testVINSHRX: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vinshrx v2, r5, r6 ; CHECK-NEXT: blr entry: - %0 = tail call <8 x i16> @llvm.ppc.altivec.vinshrx(<8 x i16> %a, i64 %b, i64 %c) + %0 = tail call <8 x i16> @llvm.ppc.altivec.vinshrx(<8 x i16> %a, i32 %b, i32 %c) ret <8 x i16> %0 } -declare <8 x i16> @llvm.ppc.altivec.vinshrx(<8 x i16>, i64, i64) +declare <8 x i16> @llvm.ppc.altivec.vinshrx(<8 x i16>, i32, i32) -define <4 x i32> @testVINSWLX(<4 x i32> %a, i64 %b, i64 %c) { +define <4 x i32> @testVINSWLX(<4 x i32> %a, i32 %b, i32 %c) { ; CHECK-LABEL: testVINSWLX: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vinswlx v2, r5, r6 ; CHECK-NEXT: blr entry: - %0 = tail call <4 x i32> @llvm.ppc.altivec.vinswlx(<4 x i32> %a, i64 %b, i64 %c) + %0 = tail call <4 x i32> @llvm.ppc.altivec.vinswlx(<4 x i32> %a, i32 %b, i32 %c) ret <4 x i32> %0 } -declare <4 x i32> @llvm.ppc.altivec.vinswlx(<4 x i32>, i64, i64) +declare <4 x i32> @llvm.ppc.altivec.vinswlx(<4 x i32>, i32, i32) -define <4 x i32> @testVINSWRX(<4 x i32> %a, i64 %b, i64 %c) { +define <4 x i32> @testVINSWRX(<4 x i32> %a, i32 %b, i32 %c) { ; CHECK-LABEL: testVINSWRX: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vinswrx v2, r5, r6 ; CHECK-NEXT: blr entry: - %0 = tail call <4 x i32> @llvm.ppc.altivec.vinswrx(<4 x i32> %a, i64 %b, i64 %c) + %0 = tail call <4 x i32> @llvm.ppc.altivec.vinswrx(<4 x i32> %a, i32 %b, i32 %c) ret <4 x i32> %0 } -declare <4 x i32> @llvm.ppc.altivec.vinswrx(<4 x i32>, i64, i64) +declare <4 x i32> @llvm.ppc.altivec.vinswrx(<4 x i32>, i32, i32) define <2 x i64> @testVINSDLX(<2 x i64> %a, i64 %b, i64 %c) { ; CHECK-LABEL: testVINSDLX: @@ -232,16 +232,16 @@ entry: } declare <4 x i32> @llvm.ppc.altivec.vinswvrx(<4 x i32>, i64, <4 x i32>) -define <4 x i32> @testVINSW(<4 x i32> %a, i64 %b) { +define <4 x i32> @testVINSW(<4 x i32> %a, i32 %b) { ; CHECK-LABEL: testVINSW: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vinsw v2, r5, 1 ; CHECK-NEXT: blr entry: - %0 = tail call <4 x i32> @llvm.ppc.altivec.vinsw(<4 x i32> %a, i64 %b, i32 1) + %0 = tail call <4 x i32> @llvm.ppc.altivec.vinsw(<4 x i32> %a, i32 %b, i32 1) ret <4 x i32> %0 } -declare <4 x i32> @llvm.ppc.altivec.vinsw(<4 x i32>, i64, i32 immarg) +declare <4 x i32> @llvm.ppc.altivec.vinsw(<4 x i32>, i32, i32 immarg) define <2 x i64> @testVINSD(<2 x i64> %a, i64 %b) { ; CHECK-LABEL: testVINSD: diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll new file mode 100644 index 0000000000000..d4e71d18c6ebb --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; These test cases aims to test the builtins for the Power10 VSX vector +; instructions introduced in ISA 3.1. + +declare i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8>, i1) + +define signext i32 @test_vec_test_lsbb_all_ones(<16 x i8> %vuca) { +; CHECK-LABEL: test_vec_test_lsbb_all_ones: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtlsbb cr0, v2 +; CHECK-NEXT: mfocrf r3, 128 +; CHECK-NEXT: srwi r3, r3, 31 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i1 1) + ret i32 %0 +} + +define signext i32 @test_vec_test_lsbb_all_zeros(<16 x i8> %vuca) { +; CHECK-LABEL: test_vec_test_lsbb_all_zeros: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtlsbb cr0, v2 +; CHECK-NEXT: mfocrf r3, 128 +; CHECK-NEXT: rlwinm r3, r3, 3, 31, 31 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i1 0) + ret i32 %0 +} diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll index ada7c73cd9ed5..11bc2bae9871f 100644 --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -343,30 +343,57 @@ test_entry: unreachable } +define dso_local <16 x i8> @no_crash_bitcast(i32 %a) { +; CHECK-P8-LABEL: no_crash_bitcast: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: mtvsrwz v2, r3 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: no_crash_bitcast: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: mtvsrws v2, r3 +; CHECK-P9-NEXT: blr +; +; CHECK-NOVSX-LABEL: no_crash_bitcast: +; CHECK-NOVSX: # %bb.0: # %entry +; CHECK-NOVSX-NEXT: addis r4, r2, .LCPI14_0@toc@ha +; CHECK-NOVSX-NEXT: stw r3, -16(r1) +; CHECK-NOVSX-NEXT: addi r3, r1, -16 +; CHECK-NOVSX-NEXT: addi r4, r4, .LCPI14_0@toc@l +; CHECK-NOVSX-NEXT: lvx v3, 0, r3 +; CHECK-NOVSX-NEXT: lvx v2, 0, r4 +; CHECK-NOVSX-NEXT: vperm v2, v3, v3, v2 +; CHECK-NOVSX-NEXT: blr +entry: + %cast = bitcast i32 %a to <4 x i8> + %ret = shufflevector <4 x i8> %cast, <4 x i8> undef, <16 x i32> + ret <16 x i8> %ret +} + define dso_local <4 x i32> @replace_undefs_in_splat(<4 x i32> %a) local_unnamed_addr #0 { ; CHECK-P8-LABEL: replace_undefs_in_splat: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: addis r3, r2, .LCPI14_0@toc@ha -; CHECK-P8-NEXT: addi r3, r3, .LCPI14_0@toc@l +; CHECK-P8-NEXT: addis r3, r2, .LCPI15_0@toc@ha +; CHECK-P8-NEXT: addi r3, r3, .LCPI15_0@toc@l ; CHECK-P8-NEXT: lvx v3, 0, r3 ; CHECK-P8-NEXT: vmrgow v2, v3, v2 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: replace_undefs_in_splat: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: addis r3, r2, .LCPI14_0@toc@ha -; CHECK-P9-NEXT: addi r3, r3, .LCPI14_0@toc@l +; CHECK-P9-NEXT: addis r3, r2, .LCPI15_0@toc@ha +; CHECK-P9-NEXT: addi r3, r3, .LCPI15_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r3 ; CHECK-P9-NEXT: vmrgow v2, v3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-NOVSX-LABEL: replace_undefs_in_splat: ; CHECK-NOVSX: # %bb.0: # %entry -; CHECK-NOVSX-NEXT: addis r3, r2, .LCPI14_0@toc@ha -; CHECK-NOVSX-NEXT: addis r4, r2, .LCPI14_1@toc@ha -; CHECK-NOVSX-NEXT: addi r3, r3, .LCPI14_0@toc@l +; CHECK-NOVSX-NEXT: addis r3, r2, .LCPI15_0@toc@ha +; CHECK-NOVSX-NEXT: addis r4, r2, .LCPI15_1@toc@ha +; CHECK-NOVSX-NEXT: addi r3, r3, .LCPI15_0@toc@l ; CHECK-NOVSX-NEXT: lvx v3, 0, r3 -; CHECK-NOVSX-NEXT: addi r3, r4, .LCPI14_1@toc@l +; CHECK-NOVSX-NEXT: addi r3, r4, .LCPI15_1@toc@l ; CHECK-NOVSX-NEXT: lvx v4, 0, r3 ; CHECK-NOVSX-NEXT: vperm v2, v4, v2, v3 ; CHECK-NOVSX-NEXT: blr @@ -378,10 +405,10 @@ entry: define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(i32* nocapture readonly %ptr, i32 signext %offset) local_unnamed_addr #0 { ; CHECK-P8-LABEL: no_RAUW_in_combine_during_legalize: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: addis r5, r2, .LCPI15_0@toc@ha +; CHECK-P8-NEXT: addis r5, r2, .LCPI16_0@toc@ha ; CHECK-P8-NEXT: sldi r4, r4, 2 ; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: addi r5, r5, .LCPI15_0@toc@l +; CHECK-P8-NEXT: addi r5, r5, .LCPI16_0@toc@l ; CHECK-P8-NEXT: lxsiwzx v2, r3, r4 ; CHECK-P8-NEXT: lvx v3, 0, r5 ; CHECK-P8-NEXT: vperm v2, v4, v2, v3 @@ -390,11 +417,11 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(i32* nocapture re ; CHECK-P9-LABEL: no_RAUW_in_combine_during_legalize: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: sldi r4, r4, 2 +; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: lxsiwzx v2, r3, r4 -; CHECK-P9-NEXT: addis r3, r2, .LCPI15_0@toc@ha -; CHECK-P9-NEXT: addi r3, r3, .LCPI15_0@toc@l +; CHECK-P9-NEXT: addis r3, r2, .LCPI16_0@toc@ha +; CHECK-P9-NEXT: addi r3, r3, .LCPI16_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r3 -; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: vperm v2, v4, v2, v3 ; CHECK-P9-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/dform-adjust.ll b/llvm/test/CodeGen/PowerPC/dform-adjust.ll index c32655233d860..4884bd248537f 100644 --- a/llvm/test/CodeGen/PowerPC/dform-adjust.ll +++ b/llvm/test/CodeGen/PowerPC/dform-adjust.ll @@ -5,18 +5,18 @@ define dso_local i64 @test1(i8* nocapture readonly %p, i32 signext %count) local ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li 5, -13 -; CHECK-NEXT: lxvx 0, 3, 5 -; CHECK-NEXT: li 5, 19 -; CHECK-NEXT: lxvx 1, 3, 5 -; CHECK-NEXT: li 5, 3 ; CHECK-NEXT: li 6, 7 ; CHECK-NEXT: li 7, 11 ; CHECK-NEXT: li 8, 15 -; CHECK-NEXT: mfvsrld 9, 0 -; CHECK-NEXT: ldx 5, 3, 5 +; CHECK-NEXT: lxvx 0, 3, 5 +; CHECK-NEXT: li 5, 19 ; CHECK-NEXT: ldx 6, 3, 6 ; CHECK-NEXT: ldx 7, 3, 7 +; CHECK-NEXT: lxvx 1, 3, 5 +; CHECK-NEXT: li 5, 3 +; CHECK-NEXT: ldx 5, 3, 5 ; CHECK-NEXT: ldx 3, 3, 8 +; CHECK-NEXT: mfvsrld 9, 0 ; CHECK-NEXT: mffprd 8, 0 ; CHECK-NEXT: mfvsrld 10, 1 ; CHECK-NEXT: mffprd 11, 1 diff --git a/llvm/test/CodeGen/PowerPC/early-ret-verify.mir b/llvm/test/CodeGen/PowerPC/early-ret-verify.mir index 5a01aa4effa5a..967e53302607f 100644 --- a/llvm/test/CodeGen/PowerPC/early-ret-verify.mir +++ b/llvm/test/CodeGen/PowerPC/early-ret-verify.mir @@ -40,7 +40,7 @@ body: | ; CHECK-LABEL: testEarlyRet ; CHECK: bb.0.entry: - ; CHECK: BCLR undef renamable $cr5lt, implicit $lr, implicit $rm, implicit $lr, implicit $rm + ; CHECK: BCLR undef renamable $cr5lt, implicit $lr, implicit $rm ; CHECK: bb.1: ; CHECK: renamable $r3 = IMPLICIT_DEF ; CHECK: renamable $r4 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/PowerPC/early-ret.mir b/llvm/test/CodeGen/PowerPC/early-ret.mir index bfbaa8edeb972..9bc97695bb65c 100644 --- a/llvm/test/CodeGen/PowerPC/early-ret.mir +++ b/llvm/test/CodeGen/PowerPC/early-ret.mir @@ -27,7 +27,7 @@ body: | ; CHECK: bb.0.entry: ; CHECK: renamable $cr0 = CMPWI renamable $r3, 0 ; CHECK: BC killed renamable $cr0gt, %bb.1 - ; CHECK: BLR implicit $lr, implicit $rm, implicit $lr, implicit $rm, implicit killed $r3 + ; CHECK: BLR implicit $lr, implicit $rm, implicit killed $r3 ; CHECK: bb.1.entry: ; CHECK: renamable $r3 = ADDI killed renamable $r4, 0 ; CHECK: BLR implicit $lr, implicit $rm, implicit killed $r3 @@ -106,7 +106,7 @@ body: | ; CHECK-LABEL: name: testBCLR ; CHECK: bb.0.entry: ; CHECK: renamable $cr0 = FCMPUS killed renamable $f3, killed renamable $f4 - ; CHECK: BCLR killed renamable $cr0eq, implicit $lr, implicit $rm, implicit $lr, implicit $rm, implicit killed $v2 + ; CHECK: BCLR killed renamable $cr0eq, implicit $lr, implicit $rm, implicit killed $v2 ; CHECK: bb.1.entry: ; CHECK: renamable $cr0 = FCMPUS killed renamable $f1, killed renamable $f2 ; CHECK: BCLRn killed renamable $cr0eq, implicit $lr, implicit $rm, implicit killed $v2 @@ -139,8 +139,8 @@ body: | ; CHECK: bb.0.entry: ; CHECK: renamable $r4 = LI 0 ; CHECK: renamable $cr0 = CMPLWI killed renamable $r4, 0 - ; CHECK: BCCLR 68, renamable $cr0, implicit $lr, implicit $rm, implicit $lr, implicit $rm + ; CHECK: BCCLR 68, renamable $cr0, implicit $lr, implicit $rm ; CHECK: bb.1: - ; CHECK: BCCLR 68, killed renamable $cr0, implicit $lr, implicit $rm, implicit $lr, implicit $rm + ; CHECK: BCCLR 68, killed renamable $cr0, implicit $lr, implicit $rm ; CHECK: BLR implicit $lr, implicit $rm ... diff --git a/llvm/test/CodeGen/PowerPC/extract-and-store.ll b/llvm/test/CodeGen/PowerPC/extract-and-store.ll index fe1e56b839f8c..86c9930b1f559 100644 --- a/llvm/test/CodeGen/PowerPC/extract-and-store.ll +++ b/llvm/test/CodeGen/PowerPC/extract-and-store.ll @@ -508,9 +508,9 @@ define dso_local void @test_consecutive_i32(<4 x i32> %a, i32* nocapture %b) loc ; CHECK-P9-BE-LABEL: test_consecutive_i32: ; CHECK-P9-BE: # %bb.0: # %entry ; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-P9-BE-NEXT: li r3, 4 ; CHECK-P9-BE-NEXT: stfiwx f0, 0, r5 ; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 -; CHECK-P9-BE-NEXT: li r3, 4 ; CHECK-P9-BE-NEXT: stfiwx f0, r5, r3 ; CHECK-P9-BE-NEXT: blr entry: @@ -544,9 +544,9 @@ define dso_local void @test_consecutive_float(<4 x float> %a, float* nocapture % ; CHECK-P9-LABEL: test_consecutive_float: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-P9-NEXT: li r3, 4 ; CHECK-P9-NEXT: stfiwx f0, 0, r5 ; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3 -; CHECK-P9-NEXT: li r3, 4 ; CHECK-P9-NEXT: stfiwx f0, r5, r3 ; CHECK-P9-NEXT: blr ; @@ -597,9 +597,9 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, i32* nocapture ; CHECK-P9-LABEL: test_stores_exceed_vec_size: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: addis r3, r2, .LCPI16_0@toc@ha +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 ; CHECK-P9-NEXT: addi r3, r3, .LCPI16_0@toc@l ; CHECK-P9-NEXT: lxvx vs35, 0, r3 -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 ; CHECK-P9-NEXT: li r3, 16 ; CHECK-P9-NEXT: stfiwx f0, r5, r3 ; CHECK-P9-NEXT: li r3, 20 @@ -611,10 +611,10 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, i32* nocapture ; CHECK-P9-BE-LABEL: test_stores_exceed_vec_size: ; CHECK-P9-BE: # %bb.0: # %entry ; CHECK-P9-BE-NEXT: xxspltw vs0, vs34, 0 -; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs0, 2 ; CHECK-P9-BE-NEXT: li r3, 16 ; CHECK-P9-BE-NEXT: stxsiwx vs34, r5, r3 ; CHECK-P9-BE-NEXT: li r3, 20 +; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs0, 2 ; CHECK-P9-BE-NEXT: stxv vs0, 0(r5) ; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 1 ; CHECK-P9-BE-NEXT: stfiwx f0, r5, r3 @@ -676,9 +676,9 @@ define void @test_5_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b) ; CHECK-P9-LABEL: test_5_consecutive_stores_of_bytes: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: vsldoi v3, v2, v2, 4 +; CHECK-P9-NEXT: li r3, 1 ; CHECK-P9-NEXT: stxsibx vs35, 0, r5 ; CHECK-P9-NEXT: vsldoi v3, v2, v2, 12 -; CHECK-P9-NEXT: li r3, 1 ; CHECK-P9-NEXT: stxsibx vs35, r5, r3 ; CHECK-P9-NEXT: vsldoi v3, v2, v2, 15 ; CHECK-P9-NEXT: li r3, 2 @@ -694,9 +694,9 @@ define void @test_5_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b) ; CHECK-P9-BE-LABEL: test_5_consecutive_stores_of_bytes: ; CHECK-P9-BE: # %bb.0: # %entry ; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 13 +; CHECK-P9-BE-NEXT: li r3, 1 ; CHECK-P9-BE-NEXT: stxsibx vs35, 0, r5 ; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 5 -; CHECK-P9-BE-NEXT: li r3, 1 ; CHECK-P9-BE-NEXT: stxsibx vs35, r5, r3 ; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 2 ; CHECK-P9-BE-NEXT: li r3, 2 @@ -807,9 +807,9 @@ define void @test_13_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b) ; CHECK-P9-NEXT: li r3, 4 ; CHECK-P9-NEXT: stxsibx vs35, r5, r3 ; CHECK-P9-NEXT: vsldoi v3, v2, v2, 4 +; CHECK-P9-NEXT: li r3, 5 ; CHECK-P9-NEXT: stxsibx vs35, 0, r5 ; CHECK-P9-NEXT: vsldoi v3, v2, v2, 8 -; CHECK-P9-NEXT: li r3, 5 ; CHECK-P9-NEXT: stxsibx vs35, r5, r3 ; CHECK-P9-NEXT: vsldoi v3, v2, v2, 13 ; CHECK-P9-NEXT: li r3, 6 @@ -848,9 +848,9 @@ define void @test_13_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b) ; CHECK-P9-BE-NEXT: li r3, 4 ; CHECK-P9-BE-NEXT: stxsibx vs35, r5, r3 ; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 13 +; CHECK-P9-BE-NEXT: li r3, 5 ; CHECK-P9-BE-NEXT: stxsibx vs35, 0, r5 ; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 9 -; CHECK-P9-BE-NEXT: li r3, 5 ; CHECK-P9-BE-NEXT: stxsibx vs35, r5, r3 ; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 4 ; CHECK-P9-BE-NEXT: li r3, 6 @@ -947,8 +947,8 @@ define void @test_elements_from_two_vec(<4 x i32> %a, <4 x i32> %b, i32* nocaptu ; CHECK-P9-BE: # %bb.0: # %entry ; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 3 ; CHECK-P9-BE-NEXT: li r3, 4 -; CHECK-P9-BE-NEXT: stfiwx f0, r7, r3 ; CHECK-P9-BE-NEXT: stxsiwx vs35, 0, r7 +; CHECK-P9-BE-NEXT: stfiwx f0, r7, r3 ; CHECK-P9-BE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 0 @@ -996,9 +996,9 @@ define dso_local void @test_elements_from_three_vec(<4 x float> %a, <4 x float> ; CHECK-P9-BE-LABEL: test_elements_from_three_vec: ; CHECK-P9-BE: # %bb.0: # %entry ; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 2 +; CHECK-P9-BE-NEXT: li r3, 4 ; CHECK-P9-BE-NEXT: stfiwx f0, 0, r9 ; CHECK-P9-BE-NEXT: xxsldwi vs0, vs35, vs35, 1 -; CHECK-P9-BE-NEXT: li r3, 4 ; CHECK-P9-BE-NEXT: stfiwx f0, r9, r3 ; CHECK-P9-BE-NEXT: li r3, 8 ; CHECK-P9-BE-NEXT: stxsiwx vs36, r9, r3 diff --git a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll index 094d29e2f258d..956c30f7d8ec6 100644 --- a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll +++ b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll @@ -228,8 +228,8 @@ define fp128 @testMixedAggregate_03([4 x i128] %sa.coerce) { ; CHECK-LABEL: testMixedAggregate_03: ; CHECK: # %bb.0: # %entry ; CHECK: mtvsrwa v2, r3 -; CHECK: xscvsdqp v2, v2 -; CHECK: mtvsrdd v3, r6, r5 +; CHECK-DAG: xscvsdqp v2, v2 +; CHECK-DAG: mtvsrdd v3, r6, r5 ; CHECK: xsaddqp v2, v3, v2 ; CHECK: mtvsrd v[[REG1:[0-9]+]], r10 ; CHECK: xscvsdqp v[[REG:[0-9]+]], v[[REG1]] @@ -350,12 +350,12 @@ define fp128 @sum_float128(i32 signext %count, ...) { ; CHECK-NEXT: bltlr cr0 ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: addi r3, r1, 40 +; CHECK-NEXT: addi [[REG2:r[0-9]+]], r1, 72 ; CHECK-NEXT: lxvx v3, 0, r3 +; CHECK-NEXT: std [[REG2]], -8(r1) ; CHECK-NEXT: xsaddqp v2, v3, v2 ; CHECK-NEXT: lxv v3, 16(r3) ; CHECK-NEXT: xsaddqp v2, v2, v3 -; CHECK-NEXT: addi [[REG2:r[0-9]+]], r1, 72 -; CHECK-NEXT: std [[REG2]], -8(r1) ; CHECK-NEXT: blr entry: %ap = alloca i8*, align 8 diff --git a/llvm/test/CodeGen/PowerPC/f128-conv.ll b/llvm/test/CodeGen/PowerPC/f128-conv.ll index 2cb3174925457..7f0c13a23ffc1 100644 --- a/llvm/test/CodeGen/PowerPC/f128-conv.ll +++ b/llvm/test/CodeGen/PowerPC/f128-conv.ll @@ -444,10 +444,10 @@ define void @qpConv2dp_03(double* nocapture %res, i32 signext %idx) { ; CHECK-LABEL: qpConv2dp_03: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis r5, r2, .LC7@toc@ha +; CHECK-NEXT: sldi r4, r4, 3 ; CHECK-NEXT: ld r5, .LC7@toc@l(r5) ; CHECK-NEXT: lxvx v2, 0, r5 ; CHECK-NEXT: xscvqpdp v2, v2 -; CHECK-NEXT: sldi r4, r4, 3 ; CHECK-NEXT: stxsdx v2, r3, r4 ; CHECK-NEXT: blr entry: @@ -517,11 +517,11 @@ define void @qpConv2sp_03(float* nocapture %res, i32 signext %idx) { ; CHECK-LABEL: qpConv2sp_03: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis r5, r2, .LC7@toc@ha +; CHECK-NEXT: sldi r4, r4, 2 ; CHECK-NEXT: ld r5, .LC7@toc@l(r5) ; CHECK-NEXT: lxv v2, 48(r5) ; CHECK-NEXT: xscvqpdpo v2, v2 ; CHECK-NEXT: xsrsp f0, v2 -; CHECK-NEXT: sldi r4, r4, 2 ; CHECK-NEXT: stfsx f0, r3, r4 ; CHECK-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll index 8b2db6b035109..028a9855a67bb 100644 --- a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll +++ b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll @@ -153,13 +153,13 @@ define fp128 @mixParam_02(fp128 %p1, double %p2, i64* nocapture %p3, ; CHECK: # %bb.0: # %entry ; CHECK: lwz r3, 96(r1) ; CHECK: add r4, r7, r9 +; CHECK: xscpsgndp v[[REG0:[0-9]+]], f1, f1 ; CHECK: add r4, r4, r10 +; CHECK: xscvdpqp v[[REG0]], v[[REG0]] ; CHECK: add r3, r4, r3 ; CHECK: clrldi r3, r3, 32 ; CHECK: std r3, 0(r6) ; CHECK: lxv v[[REG1:[0-9]+]], 0(r8) -; CHECK: xscpsgndp v[[REG0:[0-9]+]], f1, f1 -; CHECK: xscvdpqp v[[REG0]], v[[REG0]] ; CHECK: xsaddqp v2, v[[REG1]], v2 ; CHECK: xsaddqp v2, v2, v3 ; CHECK-NEXT: blr @@ -185,13 +185,13 @@ define fastcc fp128 @mixParam_02f(fp128 %p1, double %p2, i64* nocapture %p3, ; CHECK-LABEL: mixParam_02f: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: add r4, r4, r6 +; CHECK-NEXT: xscpsgndp v[[REG0:[0-9]+]], f1, f1 ; CHECK-NEXT: add r4, r4, r7 +; CHECK-NEXT: xscvdpqp v[[REG0]], v[[REG0]] ; CHECK-NEXT: add r4, r4, r8 ; CHECK-NEXT: clrldi r4, r4, 32 ; CHECK-DAG: std r4, 0(r3) ; CHECK-DAG: lxv v[[REG1:[0-9]+]], 0(r5) -; CHECK-NEXT: xscpsgndp v[[REG0:[0-9]+]], f1, f1 -; CHECK-NEXT: xscvdpqp v[[REG0]], v[[REG0]] ; CHECK-NEXT: xsaddqp v2, v[[REG1]], v2 ; CHECK-NEXT: xsaddqp v2, v2, v[[REG0]] ; CHECK-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll b/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll index caeff71553343..095361716438a 100644 --- a/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll +++ b/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll @@ -32,10 +32,19 @@ define signext i32 @test() nounwind { ; CHECK-NEXT: std 0, 16(1) ; CHECK-NEXT: stdu 1, -192(1) ; CHECK-NEXT: addis 3, 2, a1@toc@ha +; CHECK-NEXT: addis 5, 2, a16@toc@ha +; CHECK-NEXT: addis 6, 2, a17@toc@ha +; CHECK-NEXT: addis 4, 2, a15@toc@ha ; CHECK-NEXT: lfd 1, a1@toc@l(3) ; CHECK-NEXT: addis 3, 2, a2@toc@ha +; CHECK-NEXT: addi 5, 5, a16@toc@l +; CHECK-NEXT: addi 6, 6, a17@toc@l +; CHECK-NEXT: ld 4, a15@toc@l(4) ; CHECK-NEXT: lfd 2, a2@toc@l(3) ; CHECK-NEXT: addis 3, 2, a3@toc@ha +; CHECK-NEXT: lxvx 34, 0, 6 +; CHECK-NEXT: lxvx 0, 0, 5 +; CHECK-NEXT: li 5, 152 ; CHECK-NEXT: lfd 3, a3@toc@l(3) ; CHECK-NEXT: addis 3, 2, a4@toc@ha ; CHECK-NEXT: lfd 4, a4@toc@l(3) @@ -54,17 +63,8 @@ define signext i32 @test() nounwind { ; CHECK-NEXT: addis 3, 2, a11@toc@ha ; CHECK-NEXT: lfd 11, a11@toc@l(3) ; CHECK-NEXT: addis 3, 2, a12@toc@ha -; CHECK-NEXT: addis 5, 2, a16@toc@ha -; CHECK-NEXT: addis 6, 2, a17@toc@ha -; CHECK-NEXT: addi 6, 6, a17@toc@l -; CHECK-NEXT: lxvx 34, 0, 6 ; CHECK-NEXT: lfd 12, a12@toc@l(3) ; CHECK-NEXT: addis 3, 2, a13@toc@ha -; CHECK-NEXT: addi 5, 5, a16@toc@l -; CHECK-NEXT: addis 4, 2, a15@toc@ha -; CHECK-NEXT: lxvx 0, 0, 5 -; CHECK-NEXT: ld 4, a15@toc@l(4) -; CHECK-NEXT: li 5, 152 ; CHECK-NEXT: lfd 13, a13@toc@l(3) ; CHECK-NEXT: addis 3, 2, a14@toc@ha ; CHECK-NEXT: ld 3, a14@toc@l(3) diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll new file mode 100644 index 0000000000000..ab806a19c158e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll @@ -0,0 +1,274 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names < %s -mcpu=e500 \ +; RUN: -mtriple=powerpc-unknown-linux-gnu -mattr=spe | FileCheck %s \ +; RUN: -check-prefix=SPE + +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) + +declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f32(float, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f32(float, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) + +declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata) + +declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata) + +define i32 @d_to_i32(double %m) #0 { +; SPE-LABEL: d_to_i32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: efdctsiz r3, r3 +; SPE-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @d_to_i64(double %m) #0 { +; SPE-LABEL: d_to_i64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: bl __fixdfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i64 @d_to_u64(double %m) #0 { +; SPE-LABEL: d_to_u64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: bl __fixunsdfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define zeroext i32 @d_to_u32(double %m) #0 { +; SPE-LABEL: d_to_u32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: efdctuiz r3, r3 +; SPE-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define signext i32 @f_to_i32(float %m) #0 { +; SPE-LABEL: f_to_i32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efsctsiz r3, r3 +; SPE-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @f_to_i64(float %m) #0 { +; SPE-LABEL: f_to_i64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixsfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i64 @f_to_u64(float %m) #0 { +; SPE-LABEL: f_to_u64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixunssfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define zeroext i32 @f_to_u32(float %m) #0 { +; SPE-LABEL: f_to_u32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efsctuiz r3, r3 +; SPE-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define double @i32_to_d(i32 signext %m) #0 { +; SPE-LABEL: i32_to_d: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efdcfsi r4, r3 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +entry: + %conv = tail call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret double %conv +} + +define double @i64_to_d(i64 %m) #0 { +; SPE-LABEL: i64_to_d: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __floatdidf +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret double %conv +} + +define double @u32_to_d(i32 zeroext %m) #0 { +; SPE-LABEL: u32_to_d: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efdcfui r4, r3 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +entry: + %conv = tail call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret double %conv +} + +define double @u64_to_d(i64 %m) #0 { +; SPE-LABEL: u64_to_d: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __floatundidf +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret double %conv +} + +define float @i32_to_f(i32 signext %m) #0 { +; SPE-LABEL: i32_to_f: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efscfsi r3, r3 +; SPE-NEXT: blr +entry: + %conv = tail call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %conv +} + +define float @i64_to_f(i64 %m) #0 { +; SPE-LABEL: i64_to_f: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __floatdisf +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %conv +} + +define float @u32_to_f(i32 zeroext %m) #0 { +; SPE-LABEL: u32_to_f: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efscfui r3, r3 +; SPE-NEXT: blr +entry: + %conv = tail call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %conv +} + +define float @u64_to_f(i64 %m) #0 { +; SPE-LABEL: u64_to_f: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __floatundisf +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %conv +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll index caaa4fa0db852..48a10eda1cf1d 100644 --- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll +++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll @@ -29,6 +29,20 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { ret i32 %f } +define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) { +; CHECK-LABEL: fshl_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: andi. 5, 5, 63 +; CHECK-NEXT: subfic 6, 5, 64 +; CHECK-NEXT: sld 5, 3, 5 +; CHECK-NEXT: srd 4, 4, 6 +; CHECK-NEXT: or 4, 5, 4 +; CHECK-NEXT: iseleq 3, 3, 4 +; CHECK-NEXT: blr + %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z) + ret i64 %f +} + ; Verify that weird types are minimally supported. declare i37 @llvm.fshl.i37(i37, i37, i37) define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { @@ -135,6 +149,20 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { ret i32 %f } +define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) { +; CHECK-LABEL: fshr_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: andi. 5, 5, 63 +; CHECK-NEXT: subfic 6, 5, 64 +; CHECK-NEXT: srd 5, 4, 5 +; CHECK-NEXT: sld 3, 3, 6 +; CHECK-NEXT: or 3, 3, 5 +; CHECK-NEXT: iseleq 3, 4, 3 +; CHECK-NEXT: blr + %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z) + ret i64 %f +} + ; Verify that weird types are minimally supported. declare i37 @llvm.fshr.i37(i37, i37, i37) define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { diff --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll index bb15e52e1b029..9977b6b33560d 100644 --- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll +++ b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll @@ -697,10 +697,10 @@ define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { ; CHECK-NEXT: lhz r3, 0(r3) ; CHECK-NEXT: xxmrghd vs0, vs0, vs1 ; CHECK-NEXT: mtfprwz f3, r3 +; CHECK-NEXT: xvcvdpsp vs35, vs0 ; CHECK-NEXT: xscvhpdp f3, f3 ; CHECK-NEXT: xxmrghd vs2, vs2, vs3 ; CHECK-NEXT: xvcvdpsp vs34, vs2 -; CHECK-NEXT: xvcvdpsp vs35, vs0 ; CHECK-NEXT: vmrgew v2, v3, v2 ; CHECK-NEXT: blr ; @@ -906,12 +906,12 @@ define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 { ; CHECK-LABEL: test_trunc32_vec4: ; CHECK: # %bb.0: ; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 1 ; CHECK-NEXT: xscvspdpn f0, vs0 +; CHECK-NEXT: xscvspdpn f1, vs1 ; CHECK-NEXT: xscvdphp f0, f0 ; CHECK-NEXT: mffprwz r3, f0 ; CHECK-NEXT: xxswapd vs0, vs34 -; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 1 -; CHECK-NEXT: xscvspdpn f1, vs1 ; CHECK-NEXT: xscvspdpn f0, vs0 ; CHECK-NEXT: xscvdphp f0, f0 ; CHECK-NEXT: xscvdphp f1, f1 @@ -920,8 +920,8 @@ define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 { ; CHECK-NEXT: xscvdphp f1, f1 ; CHECK-NEXT: sth r4, 4(r5) ; CHECK-NEXT: mffprwz r4, f0 -; CHECK-NEXT: sth r4, 2(r5) ; CHECK-NEXT: sth r3, 0(r5) +; CHECK-NEXT: sth r4, 2(r5) ; CHECK-NEXT: mffprwz r6, f1 ; CHECK-NEXT: sth r6, 6(r5) ; CHECK-NEXT: blr @@ -1059,10 +1059,10 @@ define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) #0 { ; CHECK-NEXT: xscvdphp f1, vs34 ; CHECK-NEXT: mffprwz r4, f1 ; CHECK-NEXT: xscvdphp f1, vs35 +; CHECK-NEXT: sth r3, 0(r7) ; CHECK-NEXT: sth r4, 2(r7) ; CHECK-NEXT: mffprwz r4, f0 ; CHECK-NEXT: sth r4, 4(r7) -; CHECK-NEXT: sth r3, 0(r7) ; CHECK-NEXT: mffprwz r5, f1 ; CHECK-NEXT: sth r5, 6(r7) ; CHECK-NEXT: blr @@ -1169,8 +1169,8 @@ define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { ; CHECK-LABEL: test_sitofp_fadd_i32: ; CHECK: # %bb.0: ; CHECK-NEXT: mtfprwa f1, r3 -; CHECK-NEXT: xscvsxdsp f1, f1 ; CHECK-NEXT: lhz r4, 0(r4) +; CHECK-NEXT: xscvsxdsp f1, f1 ; CHECK-NEXT: mtfprwz f0, r4 ; CHECK-NEXT: xscvhpdp f0, f0 ; CHECK-NEXT: xscvdphp f1, f1 diff --git a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll index 985dee83a62d8..6b57ab1507dde 100644 --- a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll +++ b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll @@ -132,8 +132,8 @@ define <4 x i32> @load_swap11(<4 x i32>* %vp1, <4 x i32>* %vp2) { ; CHECK-P9-BE-LABEL: load_swap11: ; CHECK-P9-BE: # %bb.0: ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l ; CHECK-P9-BE-NEXT: lxv v2, 0(r4) +; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l ; CHECK-P9-BE-NEXT: lxvx v3, 0, r3 ; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 ; CHECK-P9-BE-NEXT: blr @@ -208,8 +208,8 @@ define <8 x i16> @load_swap21(<8 x i16>* %vp1, <8 x i16>* %vp2){ ; CHECK-P9-BE-LABEL: load_swap21: ; CHECK-P9-BE: # %bb.0: ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l ; CHECK-P9-BE-NEXT: lxv v2, 0(r4) +; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l ; CHECK-P9-BE-NEXT: lxvx v3, 0, r3 ; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 ; CHECK-P9-BE-NEXT: blr @@ -382,8 +382,8 @@ define <4 x float> @load_swap51(<4 x float>* %vp1, <4 x float>* %vp2) { ; CHECK-P9-BE-LABEL: load_swap51: ; CHECK-P9-BE: # %bb.0: ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI10_0@toc@ha -; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI10_0@toc@l ; CHECK-P9-BE-NEXT: lxv v2, 0(r4) +; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI10_0@toc@l ; CHECK-P9-BE-NEXT: lxvx v3, 0, r3 ; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 ; CHECK-P9-BE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll index a6f674e129131..12c9dfec50555 100644 --- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll @@ -23,11 +23,11 @@ define i64 @test_no_prep(i8* %0, i32 signext %1) { ; CHECK: .LBB0_2: # ; CHECK-NEXT: ldx r9, r3, r6 ; CHECK-NEXT: ldx r10, r3, r7 -; CHECK-NEXT: mulld r9, r10, r9 ; CHECK-NEXT: ldx r11, r3, r8 -; CHECK-NEXT: mulld r9, r9, r11 ; CHECK-NEXT: ld r12, 0(r3) ; CHECK-NEXT: addi r3, r3, 1 +; CHECK-NEXT: mulld r9, r10, r9 +; CHECK-NEXT: mulld r9, r9, r11 ; CHECK-NEXT: maddld r5, r9, r12, r5 ; CHECK-NEXT: bdnz .LBB0_2 %3 = sext i32 %1 to i64 @@ -87,11 +87,11 @@ define i64 @test_ds_prep(i8* %0, i32 signext %1) { ; CHECK: .LBB1_2: # ; CHECK-NEXT: ldx r9, r6, r7 ; CHECK-NEXT: ld r10, 0(r6) -; CHECK-NEXT: mulld r9, r10, r9 ; CHECK-NEXT: ldx r11, r6, r5 -; CHECK-NEXT: mulld r9, r9, r11 ; CHECK-NEXT: addi r8, r6, 1 ; CHECK-NEXT: ld r6, 4(r6) +; CHECK-NEXT: mulld r9, r10, r9 +; CHECK-NEXT: mulld r9, r9, r11 ; CHECK-NEXT: maddld r3, r9, r6, r3 ; CHECK-NEXT: mr r6, r8 ; CHECK-NEXT: bdnz .LBB1_2 @@ -162,22 +162,22 @@ define i64 @test_max_number_reminder(i8* %0, i32 signext %1) { ; CHECK: .LBB2_2: # ; CHECK-NEXT: ldx r12, r9, r6 ; CHECK-NEXT: ld r0, 0(r9) -; CHECK-NEXT: mulld r12, r0, r12 +; CHECK-NEXT: ldx r30, r9, r5 +; CHECK-NEXT: ldx r29, r9, r7 ; CHECK-NEXT: addi r11, r9, 1 -; CHECK-NEXT: ldx r30, r9, r7 -; CHECK-NEXT: ld r29, 4(r9) -; CHECK-NEXT: ldx r28, r9, r8 -; CHECK-NEXT: ld r27, 12(r9) -; CHECK-NEXT: ld r26, 8(r9) -; CHECK-NEXT: ldx r25, r9, r10 -; CHECK-NEXT: ldx r9, r9, r5 -; CHECK-NEXT: mulld r9, r12, r9 -; CHECK-NEXT: mulld r9, r9, r30 -; CHECK-NEXT: mulld r9, r9, r29 -; CHECK-NEXT: mulld r9, r9, r28 -; CHECK-NEXT: mulld r9, r9, r27 -; CHECK-NEXT: mulld r9, r9, r26 -; CHECK-NEXT: maddld r3, r9, r25, r3 +; CHECK-NEXT: mulld r12, r0, r12 +; CHECK-NEXT: ld r28, 4(r9) +; CHECK-NEXT: ldx r27, r9, r8 +; CHECK-NEXT: ld r26, 12(r9) +; CHECK-NEXT: ld r25, 8(r9) +; CHECK-NEXT: ldx r9, r9, r10 +; CHECK-NEXT: mulld r12, r12, r30 +; CHECK-NEXT: mulld r12, r12, r29 +; CHECK-NEXT: mulld r12, r12, r28 +; CHECK-NEXT: mulld r12, r12, r27 +; CHECK-NEXT: mulld r12, r12, r26 +; CHECK-NEXT: mulld r12, r12, r25 +; CHECK-NEXT: maddld r3, r12, r9, r3 ; CHECK-NEXT: mr r9, r11 ; CHECK-NEXT: bdnz .LBB2_2 %3 = sext i32 %1 to i64 @@ -257,10 +257,10 @@ define dso_local i64 @test_update_ds_prep_interact(i8* %0, i32 signext %1) { ; CHECK: .LBB3_2: # ; CHECK-NEXT: ldu r8, 4(r3) ; CHECK-NEXT: ldx r9, r3, r7 -; CHECK-NEXT: mulld r8, r8, r9 ; CHECK-NEXT: ldx r10, r3, r6 -; CHECK-NEXT: mulld r8, r8, r10 ; CHECK-NEXT: ld r11, 4(r3) +; CHECK-NEXT: mulld r8, r8, r9 +; CHECK-NEXT: mulld r8, r8, r10 ; CHECK-NEXT: maddld r5, r8, r11, r5 ; CHECK-NEXT: bdnz .LBB3_2 %3 = sext i32 %1 to i64 @@ -391,21 +391,21 @@ define dso_local i64 @test_ds_multiple_chains(i8* %0, i8* %1, i32 signext %2) { ; CHECK: .LBB5_2: # ; CHECK-NEXT: ld r8, 0(r3) ; CHECK-NEXT: ldx r9, r3, r7 -; CHECK-NEXT: mulld r8, r9, r8 -; CHECK-NEXT: ld r9, 4(r3) -; CHECK-NEXT: mulld r8, r8, r9 -; CHECK-NEXT: ld r10, 8(r3) +; CHECK-NEXT: ld r10, 4(r3) +; CHECK-NEXT: ld r11, 8(r3) ; CHECK-NEXT: addi r3, r3, 1 +; CHECK-NEXT: mulld r8, r9, r8 +; CHECK-NEXT: ld r12, 0(r4) +; CHECK-NEXT: ldx r0, r4, r7 +; CHECK-NEXT: ld r30, 4(r4) +; CHECK-NEXT: ld r9, 8(r4) +; CHECK-NEXT: addi r4, r4, 1 ; CHECK-NEXT: mulld r8, r8, r10 -; CHECK-NEXT: ld r11, 0(r4) ; CHECK-NEXT: mulld r8, r8, r11 -; CHECK-NEXT: ldx r12, r4, r7 ; CHECK-NEXT: mulld r8, r8, r12 -; CHECK-NEXT: ld r0, 4(r4) ; CHECK-NEXT: mulld r8, r8, r0 -; CHECK-NEXT: ld r30, 8(r4) -; CHECK-NEXT: addi r4, r4, 1 -; CHECK-NEXT: maddld r6, r8, r30, r6 +; CHECK-NEXT: mulld r8, r8, r30 +; CHECK-NEXT: maddld r6, r8, r9, r6 ; CHECK-NEXT: bdnz .LBB5_2 %4 = sext i32 %2 to i64 %5 = icmp eq i32 %2, 0 @@ -710,10 +710,10 @@ define float @test_ds_combine_float_int(i8* %0, i32 signext %1) { ; CHECK-NEXT: lfsx f0, r3, r4 ; CHECK-NEXT: xscvuxdsp f4, f4 ; CHECK-NEXT: lfs f2, 20(r3) -; CHECK-NEXT: xsmulsp f0, f0, f4 -; CHECK-NEXT: xsmulsp f0, f2, f0 ; CHECK-NEXT: lfs f3, 60(r3) ; CHECK-NEXT: addi r3, r3, 1 +; CHECK-NEXT: xsmulsp f0, f0, f4 +; CHECK-NEXT: xsmulsp f0, f2, f0 ; CHECK-NEXT: xsmulsp f0, f3, f0 ; CHECK-NEXT: xsaddsp f1, f1, f0 ; CHECK-NEXT: bdnz .LBB8_2 diff --git a/llvm/test/CodeGen/PowerPC/machine-pre.ll b/llvm/test/CodeGen/PowerPC/machine-pre.ll index ff1e2cf70a6f9..98ed27db965b6 100644 --- a/llvm/test/CodeGen/PowerPC/machine-pre.ll +++ b/llvm/test/CodeGen/PowerPC/machine-pre.ll @@ -109,10 +109,10 @@ define dso_local signext i32 @foo(i32 signext %x, i32 signext %y) nounwind { ; CHECK-P9-NEXT: b .LBB1_2 ; CHECK-P9-NEXT: .LBB1_7: # %while.end ; CHECK-P9-NEXT: lis r3, -13108 -; CHECK-P9-NEXT: ori r3, r3, 52429 -; CHECK-P9-NEXT: mullw r3, r28, r3 ; CHECK-P9-NEXT: lis r4, 13107 +; CHECK-P9-NEXT: ori r3, r3, 52429 ; CHECK-P9-NEXT: ori r4, r4, 13108 +; CHECK-P9-NEXT: mullw r3, r28, r3 ; CHECK-P9-NEXT: cmplw r3, r4 ; CHECK-P9-NEXT: blt cr0, .LBB1_9 ; CHECK-P9-NEXT: # %bb.8: # %if.then8 diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll index fea72eb596875..8617422aba7ea 100644 --- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -1397,10 +1397,10 @@ define void @test_constrained_libcall_multichain(float* %firstptr, ppc_fp128* %r ; PC64LE9-NEXT: li 3, 0 ; PC64LE9-NEXT: xxlxor 2, 2, 2 ; PC64LE9-NEXT: xxlxor 4, 4, 4 +; PC64LE9-NEXT: mr 30, 4 ; PC64LE9-NEXT: std 3, 8(4) ; PC64LE9-NEXT: fmr 1, 31 ; PC64LE9-NEXT: fmr 3, 31 -; PC64LE9-NEXT: mr 30, 4 ; PC64LE9-NEXT: stfd 31, 0(4) ; PC64LE9-NEXT: bl __gcc_qadd ; PC64LE9-NEXT: nop diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll b/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll index c48ba256094fc..b3ccc4e6646f8 100644 --- a/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll @@ -13,7 +13,7 @@ entry: store ppc_fp128 %0, ppc_fp128* @g, align 16 ret void } -; CHECK: @callee +; CHECK-LABEL: @callee ; CHECK: ld [[REG:[0-9]+]], .LC ; CHECK: stfd 2, 8([[REG]]) ; CHECK: stfd 1, 0([[REG]]) @@ -25,7 +25,7 @@ entry: call void @test(ppc_fp128 %0) ret void } -; CHECK: @caller +; CHECK-LABEL: @caller ; CHECK: ld [[REG:[0-9]+]], .LC ; CHECK: lfd 2, 8([[REG]]) ; CHECK: lfd 1, 0([[REG]]) @@ -42,7 +42,7 @@ entry: ; CHECK: .long 0x3f800000 ; CHECK: .LCPI[[LC]]_1: ; CHECK: .long 0 -; CHECK: @caller_const +; CHECK-LABEL: @caller_const ; CHECK: addis [[REG0:[0-9]+]], 2, .LCPI[[LC]]_0@toc@ha ; CHECK: addis [[REG1:[0-9]+]], 2, .LCPI[[LC]]_1@toc@ha ; CHECK: lfs 1, .LCPI[[LC]]_0@toc@l([[REG0]]) @@ -54,7 +54,7 @@ entry: %0 = load ppc_fp128, ppc_fp128* @g, align 16 ret ppc_fp128 %0 } -; CHECK: @result +; CHECK-LABEL: @result ; CHECK: ld [[REG:[0-9]+]], .LC ; CHECK: lfd 1, 0([[REG]]) ; CHECK: lfd 2, 8([[REG]]) @@ -66,7 +66,7 @@ entry: store ppc_fp128 %call, ppc_fp128* @g, align 16 ret void } -; CHECK: @use_result +; CHECK-LABEL: @use_result ; CHECK: bl test_result ; CHECK: ld [[REG:[0-9]+]], .LC ; CHECK: stfd 2, 8([[REG]]) @@ -81,7 +81,7 @@ entry: tail call void @test(ppc_fp128 %call) ret void } -; CHECK: @caller_result +; CHECK-LABEL: @caller_result ; CHECK: bl test_result ; CHECK-NEXT: nop ; CHECK-NEXT: bl test @@ -92,7 +92,7 @@ entry: %0 = bitcast ppc_fp128 %x to i128 ret i128 %0 } -; CHECK: @convert_from +; CHECK-LABEL: @convert_from ; CHECK: stfd 1, [[OFF1:.*]](1) ; CHECK: stfd 2, [[OFF2:.*]](1) ; CHECK: ld 3, [[OFF1]](1) @@ -104,7 +104,7 @@ entry: %0 = bitcast i128 %x to ppc_fp128 ret ppc_fp128 %0 } -; CHECK: convert_to: +; CHECK-LABEL: convert_to: ; CHECK-DAG: std 3, [[OFF1:.*]](1) ; CHECK-DAG: std 4, [[OFF2:.*]](1) ; CHECK: lfd 1, [[OFF1]](1) @@ -118,7 +118,7 @@ entry: ret ppc_fp128 %0 } -; CHECK: convert_to2: +; CHECK-LABEL: convert_to2: ; CHECK: std 3, [[OFF1:.*]](1) ; CHECK: std 5, [[OFF2:.*]](1) ; CHECK: lfd 1, [[OFF1]](1) @@ -131,7 +131,7 @@ entry: %conv = fptrunc ppc_fp128 %cast to double ret double %conv } -; CHECK: @convert_vector +; CHECK-LABEL: @convert_vector ; CHECK: addi [[REG:[0-9]+]], 1, [[OFF:.*]] ; CHECK: stvx 2, 0, [[REG]] ; CHECK: lfd 1, [[OFF]](1) @@ -148,7 +148,7 @@ entry: %conv = fptrunc ppc_fp128 %arg to double ret double %conv } -; CHECK: @vararg +; CHECK-LABEL: @vararg ; CHECK: lfd 1, 0({{[0-9]+}}) ; CHECK: blr diff --git a/llvm/test/CodeGen/PowerPC/pr45432.ll b/llvm/test/CodeGen/PowerPC/pr45432.ll index 9adc3c1551bca..7ce996f893f58 100644 --- a/llvm/test/CodeGen/PowerPC/pr45432.ll +++ b/llvm/test/CodeGen/PowerPC/pr45432.ll @@ -14,8 +14,8 @@ define dso_local void @h() local_unnamed_addr #0 { ; CHECK-NEXT: std 0, 16(1) ; CHECK-NEXT: stdu 1, -64(1) ; CHECK-NEXT: addis 3, 2, g@toc@ha -; CHECK-NEXT: lwz 3, g@toc@l(3) ; CHECK-NEXT: std 30, 48(1) # 8-byte Folded Spill +; CHECK-NEXT: lwz 3, g@toc@l(3) ; CHECK-NEXT: extswsli 30, 3, 2 ; CHECK-NEXT: addis 3, 2, f@got@tlsld@ha ; CHECK-NEXT: addi 3, 3, f@got@tlsld@l diff --git a/llvm/test/CodeGen/PowerPC/pr45448.ll b/llvm/test/CodeGen/PowerPC/pr45448.ll index 6ee0e3a00335e..ce6d19ed24fb6 100644 --- a/llvm/test/CodeGen/PowerPC/pr45448.ll +++ b/llvm/test/CodeGen/PowerPC/pr45448.ll @@ -20,13 +20,13 @@ define hidden void @julia_tryparse_internal_45896() #0 { ; CHECK-NEXT: .LBB0_6: # %L1057.preheader ; CHECK-NEXT: .LBB0_7: # %L670 ; CHECK-NEXT: lis r5, 4095 -; CHECK-NEXT: ori r5, r5, 65533 -; CHECK-NEXT: sldi r5, r5, 4 ; CHECK-NEXT: cmpdi r3, 0 ; CHECK-NEXT: sradi r4, r3, 63 +; CHECK-NEXT: ori r5, r5, 65533 +; CHECK-NEXT: crnot 4*cr5+gt, eq +; CHECK-NEXT: sldi r5, r5, 4 ; CHECK-NEXT: mulhdu r3, r3, r5 ; CHECK-NEXT: maddld r6, r4, r5, r3 -; CHECK-NEXT: crnot 4*cr5+gt, eq ; CHECK-NEXT: cmpld r6, r3 ; CHECK-NEXT: mulld r3, r4, r5 ; CHECK-NEXT: cmpldi cr1, r3, 0 diff --git a/llvm/test/CodeGen/PowerPC/pr45628.ll b/llvm/test/CodeGen/PowerPC/pr45628.ll index e17e56d2db605..5ea3d05db5b55 100644 --- a/llvm/test/CodeGen/PowerPC/pr45628.ll +++ b/llvm/test/CodeGen/PowerPC/pr45628.ll @@ -223,9 +223,9 @@ define <1 x i128> @rotl_28(<1 x i128> %num) { ; P9-NOVSX-NEXT: rldimi r5, r3, 28, 0 ; P9-NOVSX-NEXT: rotldi r3, r3, 28 ; P9-NOVSX-NEXT: rldimi r3, r4, 28, 0 +; P9-NOVSX-NEXT: std r5, -8(r1) ; P9-NOVSX-NEXT: std r3, -16(r1) ; P9-NOVSX-NEXT: addi r3, r1, -16 -; P9-NOVSX-NEXT: std r5, -8(r1) ; P9-NOVSX-NEXT: lvx v2, 0, r3 ; P9-NOVSX-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll new file mode 100644 index 0000000000000..2c0af89500998 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr46759.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: -mtriple=powerpc64le-linux-gnu < %s | FileCheck \ +; RUN: -check-prefix=CHECK-LE %s + +define void @foo(i32 %vla_size) #0 { +; CHECK-LE-LABEL: foo: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: std r31, -8(r1) +; CHECK-LE-NEXT: std r30, -16(r1) +; CHECK-LE-NEXT: mr r30, r1 +; CHECK-LE-NEXT: mr r12, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 +; CHECK-LE-NEXT: clrldi r0, r12, 53 +; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: stdu r12, -2048(r1) +; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: .cfi_def_cfa_register r30 +; CHECK-LE-NEXT: .cfi_offset r31, -8 +; CHECK-LE-NEXT: .cfi_offset r30, -16 +; CHECK-LE-NEXT: clrldi r3, r3, 32 +; CHECK-LE-NEXT: li r6, -4096 +; CHECK-LE-NEXT: ld r4, 0(r1) +; CHECK-LE-NEXT: mr r31, r1 +; CHECK-LE-NEXT: addi r3, r3, 15 +; CHECK-LE-NEXT: rldicl r3, r3, 60, 4 +; CHECK-LE-NEXT: rldicl r3, r3, 4, 31 +; CHECK-LE-NEXT: neg r5, r3 +; CHECK-LE-NEXT: li r3, -2048 +; CHECK-LE-NEXT: divd r7, r5, r6 +; CHECK-LE-NEXT: and r3, r5, r3 +; CHECK-LE-NEXT: add r3, r1, r3 +; CHECK-LE-NEXT: mulld r6, r7, r6 +; CHECK-LE-NEXT: sub r5, r5, r6 +; CHECK-LE-NEXT: stdux r4, r1, r5 +; CHECK-LE-NEXT: cmpd r1, r3 +; CHECK-LE-NEXT: beq cr0, .LBB0_2 +; CHECK-LE-NEXT: .LBB0_1: # %entry +; CHECK-LE-NEXT: # +; CHECK-LE-NEXT: stdu r4, -4096(r1) +; CHECK-LE-NEXT: cmpd r1, r3 +; CHECK-LE-NEXT: bne cr0, .LBB0_1 +; CHECK-LE-NEXT: .LBB0_2: # %entry +; CHECK-LE-NEXT: addi r3, r1, 2048 +; CHECK-LE-NEXT: lbz r3, 0(r3) +; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: ld r31, -8(r1) +; CHECK-LE-NEXT: ld r30, -16(r1) +; CHECK-LE-NEXT: blr +entry: + %0 = zext i32 %vla_size to i64 + %vla = alloca i8, i64 %0, align 2048 + %1 = load volatile i8, i8* %vla, align 2048 + ret void +} + +attributes #0 = { "probe-stack"="inline-asm" } diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll index 6584cb74bdb51..93d0d296e51a1 100644 --- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -13,29 +13,29 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxsd v5, 0(r5) ; CHECK-NEXT: addis r5, r2, .LCPI0_0@toc@ha +; CHECK-NEXT: xxlxor v3, v3, v3 +; CHECK-NEXT: li r6, 0 ; CHECK-NEXT: addi r5, r5, .LCPI0_0@toc@l ; CHECK-NEXT: lxvx v2, 0, r5 ; CHECK-NEXT: addis r5, r2, .LCPI0_1@toc@ha ; CHECK-NEXT: addi r5, r5, .LCPI0_1@toc@l ; CHECK-NEXT: lxvx v4, 0, r5 ; CHECK-NEXT: li r5, 4 -; CHECK-NEXT: xxlxor v3, v3, v3 ; CHECK-NEXT: vperm v0, v3, v5, v2 ; CHECK-NEXT: mtctr r5 ; CHECK-NEXT: li r5, 0 ; CHECK-NEXT: vperm v1, v3, v5, v4 -; CHECK-NEXT: li r6, 0 ; CHECK-NEXT: xvnegsp v5, v0 ; CHECK-NEXT: xvnegsp v0, v1 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # %for.cond1.preheader ; CHECK-NEXT: # ; CHECK-NEXT: lxsd v1, 0(r3) +; CHECK-NEXT: add r7, r3, r4 ; CHECK-NEXT: vperm v6, v3, v1, v4 ; CHECK-NEXT: vperm v1, v3, v1, v2 ; CHECK-NEXT: xvnegsp v1, v1 ; CHECK-NEXT: xvnegsp v6, v6 -; CHECK-NEXT: add r7, r3, r4 ; CHECK-NEXT: vabsduw v1, v1, v5 ; CHECK-NEXT: vabsduw v6, v6, v0 ; CHECK-NEXT: vadduwm v1, v6, v1 @@ -47,10 +47,11 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig ; CHECK-NEXT: vextuwrx r3, r5, v1 ; CHECK-NEXT: vperm v7, v3, v6, v4 ; CHECK-NEXT: vperm v6, v3, v6, v2 +; CHECK-NEXT: add r6, r3, r6 +; CHECK-NEXT: add r3, r7, r4 ; CHECK-NEXT: xvnegsp v6, v6 ; CHECK-NEXT: xvnegsp v1, v7 ; CHECK-NEXT: vabsduw v6, v6, v5 -; CHECK-NEXT: add r6, r3, r6 ; CHECK-NEXT: vabsduw v1, v1, v0 ; CHECK-NEXT: vadduwm v1, v1, v6 ; CHECK-NEXT: xxswapd v6, v1 @@ -58,7 +59,6 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig ; CHECK-NEXT: xxspltw v6, v1, 2 ; CHECK-NEXT: vadduwm v1, v1, v6 ; CHECK-NEXT: vextuwrx r8, r5, v1 -; CHECK-NEXT: add r3, r7, r4 ; CHECK-NEXT: add r6, r8, r6 ; CHECK-NEXT: bdnz .LBB0_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup @@ -69,25 +69,26 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: lfd f0, 0(r5) ; P9BE-NEXT: addis r5, r2, .LCPI0_0@toc@ha +; P9BE-NEXT: xxlxor v3, v3, v3 +; P9BE-NEXT: li r6, 0 ; P9BE-NEXT: addi r5, r5, .LCPI0_0@toc@l ; P9BE-NEXT: lxvx v2, 0, r5 ; P9BE-NEXT: addis r5, r2, .LCPI0_1@toc@ha +; P9BE-NEXT: xxlor v5, vs0, vs0 ; P9BE-NEXT: addi r5, r5, .LCPI0_1@toc@l ; P9BE-NEXT: lxvx v4, 0, r5 ; P9BE-NEXT: li r5, 4 -; P9BE-NEXT: xxlor v5, vs0, vs0 -; P9BE-NEXT: xxlxor v3, v3, v3 ; P9BE-NEXT: vperm v0, v3, v5, v2 ; P9BE-NEXT: mtctr r5 ; P9BE-NEXT: li r5, 0 ; P9BE-NEXT: vperm v1, v3, v5, v4 -; P9BE-NEXT: li r6, 0 ; P9BE-NEXT: xvnegsp v5, v0 ; P9BE-NEXT: xvnegsp v0, v1 ; P9BE-NEXT: .p2align 4 ; P9BE-NEXT: .LBB0_1: # %for.cond1.preheader ; P9BE-NEXT: # ; P9BE-NEXT: lfd f0, 0(r3) +; P9BE-NEXT: add r7, r3, r4 ; P9BE-NEXT: xxlor v1, vs0, vs0 ; P9BE-NEXT: lfdx f0, r3, r4 ; P9BE-NEXT: vperm v6, v3, v1, v4 @@ -104,20 +105,19 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig ; P9BE-NEXT: xxlor v6, vs0, vs0 ; P9BE-NEXT: vperm v7, v3, v6, v4 ; P9BE-NEXT: vperm v6, v3, v6, v2 -; P9BE-NEXT: add r7, r3, r4 ; P9BE-NEXT: vextuwlx r3, r5, v1 ; P9BE-NEXT: xvnegsp v6, v6 +; P9BE-NEXT: add r6, r3, r6 ; P9BE-NEXT: xvnegsp v1, v7 -; P9BE-NEXT: vabsduw v1, v1, v0 +; P9BE-NEXT: add r3, r7, r4 ; P9BE-NEXT: vabsduw v6, v6, v5 +; P9BE-NEXT: vabsduw v1, v1, v0 ; P9BE-NEXT: vadduwm v1, v1, v6 ; P9BE-NEXT: xxswapd v6, v1 -; P9BE-NEXT: add r6, r3, r6 ; P9BE-NEXT: vadduwm v1, v1, v6 ; P9BE-NEXT: xxspltw v6, v1, 1 ; P9BE-NEXT: vadduwm v1, v1, v6 ; P9BE-NEXT: vextuwlx r8, r5, v1 -; P9BE-NEXT: add r3, r7, r4 ; P9BE-NEXT: add r6, r8, r6 ; P9BE-NEXT: bdnz .LBB0_1 ; P9BE-NEXT: # %bb.2: # %for.cond.cleanup @@ -180,13 +180,14 @@ define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* noc ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxsd v2, 0(r3) ; CHECK-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-NEXT: lxsd v1, 0(r4) +; CHECK-NEXT: xxlxor v3, v3, v3 ; CHECK-NEXT: addi r3, r3, .LCPI1_0@toc@l ; CHECK-NEXT: lxvx v4, 0, r3 ; CHECK-NEXT: addis r3, r2, .LCPI1_1@toc@ha ; CHECK-NEXT: addi r3, r3, .LCPI1_1@toc@l ; CHECK-NEXT: lxvx v0, 0, r3 -; CHECK-NEXT: lxsd v1, 0(r4) -; CHECK-NEXT: xxlxor v3, v3, v3 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: vperm v5, v3, v2, v4 ; CHECK-NEXT: vperm v2, v3, v2, v0 ; CHECK-NEXT: vperm v0, v3, v1, v0 @@ -198,7 +199,6 @@ define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* noc ; CHECK-NEXT: vadduwm v2, v2, v3 ; CHECK-NEXT: xxspltw v3, v2, 2 ; CHECK-NEXT: vadduwm v2, v2, v3 -; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: vextuwrx r3, r3, v2 ; CHECK-NEXT: extsw r3, r3 ; CHECK-NEXT: blr @@ -207,6 +207,7 @@ define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* noc ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: lfd f0, 0(r3) ; P9BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; P9BE-NEXT: xxlxor v3, v3, v3 ; P9BE-NEXT: addi r3, r3, .LCPI1_0@toc@l ; P9BE-NEXT: lxvx v4, 0, r3 ; P9BE-NEXT: addis r3, r2, .LCPI1_1@toc@ha @@ -214,8 +215,8 @@ define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* noc ; P9BE-NEXT: xxlor v2, vs0, vs0 ; P9BE-NEXT: lfd f0, 0(r4) ; P9BE-NEXT: lxvx v0, 0, r3 -; P9BE-NEXT: xxlxor v3, v3, v3 ; P9BE-NEXT: xxlor v1, vs0, vs0 +; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vperm v5, v3, v2, v4 ; P9BE-NEXT: vperm v2, v3, v2, v0 ; P9BE-NEXT: vperm v0, v3, v1, v0 @@ -227,7 +228,6 @@ define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* noc ; P9BE-NEXT: vadduwm v2, v2, v3 ; P9BE-NEXT: xxspltw v3, v2, 1 ; P9BE-NEXT: vadduwm v2, v2, v3 -; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuwlx r3, r3, v2 ; P9BE-NEXT: extsw r3, r3 ; P9BE-NEXT: blr @@ -283,11 +283,11 @@ define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) { ; CHECK-NEXT: add r5, r3, r4 ; CHECK-NEXT: lxsiwzx v2, r3, r4 ; CHECK-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-NEXT: xxlxor v3, v3, v3 ; CHECK-NEXT: addi r3, r3, .LCPI2_0@toc@l ; CHECK-NEXT: lxvx v4, 0, r3 ; CHECK-NEXT: li r3, 4 ; CHECK-NEXT: lxsiwzx v5, r5, r3 -; CHECK-NEXT: xxlxor v3, v3, v3 ; CHECK-NEXT: vperm v2, v2, v3, v4 ; CHECK-NEXT: vperm v3, v5, v3, v4 ; CHECK-NEXT: vspltisw v4, 8 @@ -304,12 +304,12 @@ define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) { ; P9BE-NEXT: add r5, r3, r4 ; P9BE-NEXT: lfiwzx f0, r3, r4 ; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; P9BE-NEXT: xxlxor v3, v3, v3 +; P9BE-NEXT: xxsldwi v2, f0, f0, 1 ; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l ; P9BE-NEXT: lxvx v4, 0, r3 ; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: xxsldwi v2, f0, f0, 1 ; P9BE-NEXT: lfiwzx f0, r5, r3 -; P9BE-NEXT: xxlxor v3, v3, v3 ; P9BE-NEXT: vperm v2, v3, v2, v4 ; P9BE-NEXT: xxsldwi v5, f0, f0, 1 ; P9BE-NEXT: vperm v3, v3, v5, v4 @@ -349,16 +349,16 @@ define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signe ; CHECK-LABEL: test16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: sldi r4, r4, 1 -; CHECK-NEXT: add r6, r3, r4 ; CHECK-NEXT: li r7, 16 -; CHECK-NEXT: lxsihzx v2, r6, r7 +; CHECK-NEXT: add r6, r3, r4 ; CHECK-NEXT: lxsihzx v4, r3, r4 +; CHECK-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-NEXT: lxsihzx v2, r6, r7 ; CHECK-NEXT: li r6, 0 +; CHECK-NEXT: addi r3, r3, .LCPI3_0@toc@l ; CHECK-NEXT: mtvsrd v3, r6 ; CHECK-NEXT: vsplth v4, v4, 3 ; CHECK-NEXT: vsplth v2, v2, 3 -; CHECK-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-NEXT: addi r3, r3, .LCPI3_0@toc@l ; CHECK-NEXT: vmrghh v4, v3, v4 ; CHECK-NEXT: vmrghh v2, v3, v2 ; CHECK-NEXT: vsplth v3, v3, 3 @@ -376,17 +376,17 @@ define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signe ; P9BE-LABEL: test16: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: sldi r4, r4, 1 -; P9BE-NEXT: add r6, r3, r4 ; P9BE-NEXT: li r7, 16 -; P9BE-NEXT: lxsihzx v2, r6, r7 +; P9BE-NEXT: add r6, r3, r4 ; P9BE-NEXT: lxsihzx v4, r3, r4 +; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; P9BE-NEXT: lxsihzx v2, r6, r7 ; P9BE-NEXT: li r6, 0 +; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l ; P9BE-NEXT: sldi r6, r6, 48 ; P9BE-NEXT: vsplth v4, v4, 3 ; P9BE-NEXT: mtvsrd v3, r6 ; P9BE-NEXT: vsplth v2, v2, 3 -; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l ; P9BE-NEXT: vmrghh v4, v3, v4 ; P9BE-NEXT: vmrghh v2, v3, v2 ; P9BE-NEXT: vsplth v3, v3, 0 @@ -441,11 +441,11 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext ; CHECK-NEXT: mtvsrd v3, r3 ; CHECK-NEXT: li r3, 8 ; CHECK-NEXT: lxsibzx v5, r6, r3 +; CHECK-NEXT: vspltb v4, v3, 7 ; CHECK-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-NEXT: addi r3, r3, .LCPI4_0@toc@l ; CHECK-NEXT: vspltb v2, v2, 7 +; CHECK-NEXT: addi r3, r3, .LCPI4_0@toc@l ; CHECK-NEXT: vmrghb v2, v3, v2 -; CHECK-NEXT: vspltb v4, v3, 7 ; CHECK-NEXT: vspltb v5, v5, 7 ; CHECK-NEXT: vmrglh v2, v2, v4 ; CHECK-NEXT: vmrghb v3, v3, v5 @@ -466,9 +466,11 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: add r6, r3, r4 ; P9BE-NEXT: li r7, 8 -; P9BE-NEXT: lxsibzx v2, r6, r7 ; P9BE-NEXT: lxsibzx v4, r3, r4 +; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; P9BE-NEXT: lxsibzx v2, r6, r7 ; P9BE-NEXT: li r6, 0 +; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l ; P9BE-NEXT: sldi r6, r6, 56 ; P9BE-NEXT: vspltb v4, v4, 7 ; P9BE-NEXT: mtvsrd v3, r6 @@ -476,8 +478,6 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext ; P9BE-NEXT: vmrghb v4, v3, v4 ; P9BE-NEXT: vmrghb v2, v3, v2 ; P9BE-NEXT: vspltb v3, v3, 0 -; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l ; P9BE-NEXT: vmrghh v4, v4, v3 ; P9BE-NEXT: xxspltw v3, v3, 0 ; P9BE-NEXT: vmrghw v2, v4, v2 diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll index 7ceddd95e5735..e3894bcd23f5a 100644 --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -804,8 +804,8 @@ define double @foo3_fmf(double %a) nounwind { ; CHECK-P9-LABEL: foo3_fmf: ; CHECK-P9: # %bb.0: ; CHECK-P9-NEXT: addis 3, 2, .LCPI20_2@toc@ha -; CHECK-P9-NEXT: lfd 2, .LCPI20_2@toc@l(3) ; CHECK-P9-NEXT: xsabsdp 0, 1 +; CHECK-P9-NEXT: lfd 2, .LCPI20_2@toc@l(3) ; CHECK-P9-NEXT: xscmpudp 0, 0, 2 ; CHECK-P9-NEXT: xxlxor 0, 0, 0 ; CHECK-P9-NEXT: blt 0, .LBB20_2 @@ -899,8 +899,8 @@ define float @goo3_fmf(float %a) nounwind { ; CHECK-P9-LABEL: goo3_fmf: ; CHECK-P9: # %bb.0: ; CHECK-P9-NEXT: addis 3, 2, .LCPI22_2@toc@ha -; CHECK-P9-NEXT: lfs 2, .LCPI22_2@toc@l(3) ; CHECK-P9-NEXT: xsabsdp 0, 1 +; CHECK-P9-NEXT: lfs 2, .LCPI22_2@toc@l(3) ; CHECK-P9-NEXT: fcmpu 0, 0, 2 ; CHECK-P9-NEXT: xxlxor 0, 0, 0 ; CHECK-P9-NEXT: blt 0, .LBB22_2 diff --git a/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll b/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll index 9ad5cb8864722..67e353257dd6a 100644 --- a/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll +++ b/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll @@ -28,69 +28,80 @@ define zeroext i32 @test1(i64 %0, i64* %1) { ; CHECK-NEXT: .cfi_offset r31, -8 ; CHECK-NEXT: .cfi_offset r2, -152 ; CHECK-NEXT: lis 5, 4 +; CHECK-NEXT: std 30, 704(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 696(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 6, 5, 6292 +; CHECK-NEXT: std 28, 688(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, 680(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 672(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, 664(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 5, 5, 6291 +; CHECK-NEXT: std 14, 576(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 584(1) # 8-byte Folded Spill +; CHECK-NEXT: std 16, 592(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, 600(1) # 8-byte Folded Spill +; CHECK-NEXT: std 18, 608(1) # 8-byte Folded Spill +; CHECK-NEXT: std 19, 616(1) # 8-byte Folded Spill +; CHECK-NEXT: std 20, 624(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 632(1) # 8-byte Folded Spill +; CHECK-NEXT: std 22, 640(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, 648(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, 656(1) # 8-byte Folded Spill +; CHECK-NEXT: std 31, 712(1) # 8-byte Folded Spill +; CHECK-NEXT: std 2, 568(1) # 8-byte Folded Spill ; CHECK-NEXT: sldi 6, 6, 32 ; CHECK-NEXT: oris 7, 6, 13030 ; CHECK-NEXT: oris 8, 6, 13066 -; CHECK-NEXT: ori 7, 7, 3704 ; CHECK-NEXT: oris 9, 6, 13054 +; CHECK-NEXT: oris 10, 6, 13042 +; CHECK-NEXT: oris 11, 6, 13078 +; CHECK-NEXT: oris 12, 6, 13115 +; CHECK-NEXT: oris 0, 6, 13103 +; CHECK-NEXT: oris 30, 6, 13091 +; CHECK-NEXT: oris 29, 6, 13127 +; CHECK-NEXT: oris 28, 6, 13164 +; CHECK-NEXT: oris 27, 6, 13152 +; CHECK-NEXT: oris 26, 6, 13139 +; CHECK-NEXT: oris 25, 6, 13176 +; CHECK-NEXT: ori 7, 7, 3704 ; CHECK-NEXT: ori 8, 8, 44408 ; CHECK-NEXT: ori 9, 9, 30840 -; CHECK-NEXT: add 7, 4, 7 -; CHECK-NEXT: oris 10, 6, 13042 ; CHECK-NEXT: ori 10, 10, 17272 -; CHECK-NEXT: std 7, 384(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 8 -; CHECK-NEXT: oris 11, 6, 13078 ; CHECK-NEXT: ori 11, 11, 57976 -; CHECK-NEXT: std 7, 376(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 9 -; CHECK-NEXT: oris 12, 6, 13115 ; CHECK-NEXT: ori 12, 12, 33144 -; CHECK-NEXT: std 7, 368(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 10 -; CHECK-NEXT: oris 0, 6, 13103 ; CHECK-NEXT: ori 0, 0, 19576 -; CHECK-NEXT: std 7, 360(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 11 -; CHECK-NEXT: std 30, 704(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 30, 6, 13091 ; CHECK-NEXT: ori 30, 30, 6008 -; CHECK-NEXT: std 7, 352(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 12 -; CHECK-NEXT: std 29, 696(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 29, 6, 13127 ; CHECK-NEXT: ori 29, 29, 46712 +; CHECK-NEXT: ori 28, 28, 21880 +; CHECK-NEXT: ori 27, 27, 8312 +; CHECK-NEXT: ori 26, 26, 60280 +; CHECK-NEXT: ori 25, 25, 35448 +; CHECK-NEXT: add 7, 4, 7 ; CHECK-NEXT: sldi 5, 5, 32 ; CHECK-NEXT: oris 5, 5, 29347 ; CHECK-NEXT: ori 5, 5, 20088 +; CHECK-NEXT: std 7, 384(1) # 8-byte Folded Spill +; CHECK-NEXT: add 7, 4, 8 ; CHECK-NEXT: lis 8, 402 +; CHECK-NEXT: std 7, 376(1) # 8-byte Folded Spill +; CHECK-NEXT: add 7, 4, 9 ; CHECK-NEXT: lis 9, 451 +; CHECK-NEXT: std 7, 368(1) # 8-byte Folded Spill +; CHECK-NEXT: add 7, 4, 10 ; CHECK-NEXT: lis 10, 500 +; CHECK-NEXT: std 7, 360(1) # 8-byte Folded Spill +; CHECK-NEXT: add 7, 4, 11 ; CHECK-NEXT: lis 11, 549 -; CHECK-NEXT: std 31, 712(1) # 8-byte Folded Spill -; CHECK-NEXT: std 2, 568(1) # 8-byte Folded Spill +; CHECK-NEXT: std 7, 352(1) # 8-byte Folded Spill +; CHECK-NEXT: add 7, 4, 12 ; CHECK-NEXT: std 7, 344(1) # 8-byte Folded Spill ; CHECK-NEXT: add 7, 4, 0 -; CHECK-NEXT: std 28, 688(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 28, 6, 13164 -; CHECK-NEXT: ori 28, 28, 21880 ; CHECK-NEXT: std 7, 336(1) # 8-byte Folded Spill ; CHECK-NEXT: add 7, 4, 30 -; CHECK-NEXT: std 27, 680(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 27, 6, 13152 -; CHECK-NEXT: ori 27, 27, 8312 ; CHECK-NEXT: std 7, 328(1) # 8-byte Folded Spill ; CHECK-NEXT: add 7, 4, 29 -; CHECK-NEXT: std 26, 672(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 26, 6, 13139 -; CHECK-NEXT: ori 26, 26, 60280 ; CHECK-NEXT: std 7, 320(1) # 8-byte Folded Spill ; CHECK-NEXT: add 7, 4, 28 -; CHECK-NEXT: std 25, 664(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 25, 6, 13176 -; CHECK-NEXT: ori 25, 25, 35448 ; CHECK-NEXT: std 7, 312(1) # 8-byte Folded Spill ; CHECK-NEXT: add 7, 4, 27 ; CHECK-NEXT: std 7, 304(1) # 8-byte Folded Spill @@ -112,6 +123,10 @@ define zeroext i32 @test1(i64 %0, i64* %1) { ; CHECK-NEXT: lis 5, 268 ; CHECK-NEXT: std 4, 256(1) # 8-byte Folded Spill ; CHECK-NEXT: lis 4, 585 +; CHECK-NEXT: std 6, 264(1) # 8-byte Folded Spill +; CHECK-NEXT: lis 6, 305 +; CHECK-NEXT: std 7, 272(1) # 8-byte Folded Spill +; CHECK-NEXT: lis 7, 354 ; CHECK-NEXT: ori 4, 4, 61440 ; CHECK-NEXT: std 4, 560(1) # 8-byte Folded Spill ; CHECK-NEXT: lis 4, 48 @@ -200,94 +215,79 @@ define zeroext i32 @test1(i64 %0, i64* %1) { ; CHECK-NEXT: std 4, 192(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 5, 36352 ; CHECK-NEXT: lis 5, 317 +; CHECK-NEXT: ld 30, 192(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 184(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 5, 25088 ; CHECK-NEXT: lis 5, 366 +; CHECK-NEXT: ld 29, 184(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 176(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 5, 13824 ; CHECK-NEXT: lis 5, 415 +; CHECK-NEXT: ld 28, 176(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 168(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 5, 2560 ; CHECK-NEXT: lis 5, 463 +; CHECK-NEXT: ld 27, 168(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 160(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 5, 56832 ; CHECK-NEXT: lis 5, 512 +; CHECK-NEXT: ld 26, 160(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 152(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 5, 45568 ; CHECK-NEXT: lis 5, 561 +; CHECK-NEXT: ld 25, 152(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 144(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 5, 34304 ; CHECK-NEXT: lis 5, 12 +; CHECK-NEXT: ld 24, 144(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 136(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 5, 13568 ; CHECK-NEXT: lis 5, 61 +; CHECK-NEXT: ld 23, 136(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 128(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 5, 2304 ; CHECK-NEXT: lis 5, 109 ; CHECK-NEXT: std 4, 120(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 5, 56576 ; CHECK-NEXT: lis 5, 158 +; CHECK-NEXT: ld 0, 120(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 112(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 5, 45312 ; CHECK-NEXT: lis 5, 207 +; CHECK-NEXT: ld 22, 112(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 104(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 5, 34048 ; CHECK-NEXT: lis 5, 256 -; CHECK-NEXT: std 6, 264(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 6, 305 -; CHECK-NEXT: ld 30, 192(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 29, 184(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 28, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 27, 168(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 26, 160(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 25, 152(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 0, 120(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 21, 104(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 96(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 5, 22784 -; CHECK-NEXT: std 7, 272(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 7, 354 +; CHECK-NEXT: ld 5, 248(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 20, 96(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 88(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 6, 11520 ; CHECK-NEXT: ld 6, 240(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 19, 88(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 80(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 7, 256 ; CHECK-NEXT: ld 7, 232(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 18, 80(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 72(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 8, 54528 ; CHECK-NEXT: ld 8, 224(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 17, 72(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 64(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 9, 43264 ; CHECK-NEXT: ld 9, 216(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 16, 64(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 56(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 10, 32000 ; CHECK-NEXT: ld 10, 208(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 15, 56(1) # 8-byte Folded Reload ; CHECK-NEXT: std 4, 48(1) # 8-byte Folded Spill ; CHECK-NEXT: ori 4, 11, 20736 ; CHECK-NEXT: ld 11, 200(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 40(1) # 8-byte Folded Spill -; CHECK-NEXT: std 14, 576(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 584(1) # 8-byte Folded Spill -; CHECK-NEXT: std 16, 592(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 600(1) # 8-byte Folded Spill -; CHECK-NEXT: std 18, 608(1) # 8-byte Folded Spill -; CHECK-NEXT: std 19, 616(1) # 8-byte Folded Spill -; CHECK-NEXT: std 20, 624(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 632(1) # 8-byte Folded Spill -; CHECK-NEXT: std 22, 640(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, 648(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, 656(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 5, 248(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 24, 144(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 23, 136(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 22, 112(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 21, 104(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 20, 96(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 19, 88(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 18, 80(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 17, 72(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 16, 64(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 15, 56(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 14, 48(1) # 8-byte Folded Reload +; CHECK-NEXT: std 4, 40(1) # 8-byte Folded Spill ; CHECK-NEXT: li 4, 0 ; CHECK-NEXT: ld 31, 40(1) # 8-byte Folded Reload ; CHECK-NEXT: .p2align 4 @@ -305,6 +305,32 @@ define zeroext i32 @test1(i64 %0, i64* %1) { ; CHECK-NEXT: stdux 3, 12, 2 ; CHECK-NEXT: ld 2, 552(1) # 8-byte Folded Reload ; CHECK-NEXT: stdx 3, 12, 5 +; CHECK-NEXT: stdx 3, 12, 6 +; CHECK-NEXT: stdx 3, 12, 7 +; CHECK-NEXT: stdx 3, 12, 8 +; CHECK-NEXT: stdx 3, 12, 9 +; CHECK-NEXT: stdx 3, 12, 10 +; CHECK-NEXT: stdx 3, 12, 11 +; CHECK-NEXT: stdx 3, 12, 30 +; CHECK-NEXT: stdx 3, 12, 29 +; CHECK-NEXT: stdx 3, 12, 28 +; CHECK-NEXT: stdx 3, 12, 27 +; CHECK-NEXT: stdx 3, 12, 26 +; CHECK-NEXT: stdx 3, 12, 25 +; CHECK-NEXT: stdx 3, 12, 24 +; CHECK-NEXT: stdx 3, 12, 23 +; CHECK-NEXT: stdx 3, 12, 4 +; CHECK-NEXT: stdx 3, 12, 0 +; CHECK-NEXT: stdx 3, 12, 22 +; CHECK-NEXT: stdx 3, 12, 21 +; CHECK-NEXT: stdx 3, 12, 20 +; CHECK-NEXT: stdx 3, 12, 19 +; CHECK-NEXT: stdx 3, 12, 18 +; CHECK-NEXT: stdx 3, 12, 17 +; CHECK-NEXT: stdx 3, 12, 16 +; CHECK-NEXT: stdx 3, 12, 15 +; CHECK-NEXT: stdx 3, 12, 14 +; CHECK-NEXT: stdx 3, 12, 31 ; CHECK-NEXT: stdx 3, 12, 2 ; CHECK-NEXT: ld 2, 544(1) # 8-byte Folded Reload ; CHECK-NEXT: stdx 3, 12, 2 @@ -344,35 +370,11 @@ define zeroext i32 @test1(i64 %0, i64* %1) { ; CHECK-NEXT: stdx 3, 12, 2 ; CHECK-NEXT: ld 2, 400(1) # 8-byte Folded Reload ; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: stdx 3, 12, 6 -; CHECK-NEXT: stdx 3, 12, 7 -; CHECK-NEXT: stdx 3, 12, 8 -; CHECK-NEXT: stdx 3, 12, 9 -; CHECK-NEXT: stdx 3, 12, 10 -; CHECK-NEXT: stdx 3, 12, 11 -; CHECK-NEXT: stdx 3, 12, 30 -; CHECK-NEXT: stdx 3, 12, 29 -; CHECK-NEXT: stdx 3, 12, 28 -; CHECK-NEXT: stdx 3, 12, 27 -; CHECK-NEXT: stdx 3, 12, 26 -; CHECK-NEXT: stdx 3, 12, 25 -; CHECK-NEXT: stdx 3, 12, 24 -; CHECK-NEXT: stdx 3, 12, 23 -; CHECK-NEXT: stdx 3, 12, 4 -; CHECK-NEXT: stdx 3, 12, 0 -; CHECK-NEXT: stdx 3, 12, 22 -; CHECK-NEXT: stdx 3, 12, 21 -; CHECK-NEXT: stdx 3, 12, 20 -; CHECK-NEXT: stdx 3, 12, 19 -; CHECK-NEXT: stdx 3, 12, 18 -; CHECK-NEXT: stdx 3, 12, 17 -; CHECK-NEXT: stdx 3, 12, 16 -; CHECK-NEXT: stdx 3, 12, 15 -; CHECK-NEXT: stdx 3, 12, 14 -; CHECK-NEXT: stdx 3, 12, 31 ; CHECK-NEXT: bdnz .LBB0_2 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: ld 12, 384(1) # 8-byte Folded Reload +; CHECK-NEXT: lwz 4, 396(1) # 4-byte Folded Reload +; CHECK-NEXT: addi 4, 4, 1 ; CHECK-NEXT: std 3, 0(12) ; CHECK-NEXT: ld 12, 376(1) # 8-byte Folded Reload ; CHECK-NEXT: std 3, 0(12) @@ -399,8 +401,6 @@ define zeroext i32 @test1(i64 %0, i64* %1) { ; CHECK-NEXT: ld 12, 288(1) # 8-byte Folded Reload ; CHECK-NEXT: std 3, 0(12) ; CHECK-NEXT: ld 12, 280(1) # 8-byte Folded Reload -; CHECK-NEXT: lwz 4, 396(1) # 4-byte Folded Reload -; CHECK-NEXT: addi 4, 4, 1 ; CHECK-NEXT: std 3, 0(12) ; CHECK-NEXT: ld 12, 272(1) # 8-byte Folded Reload ; CHECK-NEXT: std 3, 0(12) diff --git a/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll b/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll index 151f4a37615ec..ebe2d2f561466 100644 --- a/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll +++ b/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll @@ -40,8 +40,8 @@ define void @redundancy_on_ppc_and_other_targets() nounwind { ; PPC64LE-NEXT: std 0, 16(1) ; PPC64LE-NEXT: stdu 1, -32(1) ; PPC64LE-NEXT: addis 3, 2, .LC0@toc@ha -; PPC64LE-NEXT: ld 3, .LC0@toc@l(3) ; PPC64LE-NEXT: li 4, 0 +; PPC64LE-NEXT: ld 3, .LC0@toc@l(3) ; PPC64LE-NEXT: std 4, 0(3) ; PPC64LE-NEXT: bl barney.94 ; PPC64LE-NEXT: nop diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll index 3dc34533420c3..67262f472b32b 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll @@ -166,8 +166,8 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec ; P9LE-LABEL: s2v_test_f2: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: addi r3, r3, 4 -; P9LE-NEXT: lxsiwzx v3, 0, r3 ; P9LE-NEXT: vmrglw v2, v2, v2 +; P9LE-NEXT: lxsiwzx v3, 0, r3 ; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr @@ -208,17 +208,17 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec ; P9LE-LABEL: s2v_test_f3: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: sldi r4, r7, 2 -; P9LE-NEXT: lxsiwzx v3, r3, r4 ; P9LE-NEXT: vmrglw v2, v2, v2 +; P9LE-NEXT: lxsiwzx v3, r3, r4 ; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test_f3: ; P9BE: # %bb.0: # %entry ; P9BE: sldi r4, r7, 2 -; P9BE: lfiwzx f0, r3, r4 +; P9BE-DAG: lfiwzx f0, r3, r4 ; P9BE-DAG: xxspltw v2, v2, 1 -; P9BE-DAG: xxsldwi v3, f0, f0, 1 +; P9BE: xxsldwi v3, f0, f0, 1 ; P9BE: vmrghw v2, v3, v2 ; P9BE-NEXT: blr @@ -251,17 +251,17 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec ; P9LE-LABEL: s2v_test_f4: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: addi r3, r3, 4 -; P9LE-NEXT: lxsiwzx v3, 0, r3 ; P9LE-NEXT: vmrglw v2, v2, v2 +; P9LE-NEXT: lxsiwzx v3, 0, r3 ; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test_f4: ; P9BE: # %bb.0: # %entry ; P9BE: addi r3, r3, 4 -; P9BE: lfiwzx f0, 0, r3 +; P9BE-DAG: lfiwzx f0, 0, r3 ; P9BE-DAG: xxspltw v2, v2, 1 -; P9BE-DAG: xxsldwi v3, f0, f0, 1 +; P9BE: xxsldwi v3, f0, f0, 1 ; P9BE: vmrghw v2, v3, v2 ; P9BE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/sched-addi.ll b/llvm/test/CodeGen/PowerPC/sched-addi.ll index b49c337e45d15..19647b694a23c 100644 --- a/llvm/test/CodeGen/PowerPC/sched-addi.ll +++ b/llvm/test/CodeGen/PowerPC/sched-addi.ll @@ -18,9 +18,9 @@ define void @test([0 x %_elem_type_of_x]* noalias %.x, [0 x %_elem_type_of_a]* % ; CHECK-P9-NEXT: addi 6, 6, 16 ; CHECK-P9-NEXT: rldicr 5, 5, 0, 58 ; CHECK-P9-NEXT: addi 5, 5, -32 +; CHECK-P9-NEXT: lxvdsx 0, 0, 6 ; CHECK-P9-NEXT: rldicl 5, 5, 59, 5 ; CHECK-P9-NEXT: addi 5, 5, 1 -; CHECK-P9-NEXT: lxvdsx 0, 0, 6 ; CHECK-P9-NEXT: mtctr 5 ; CHECK-P9-NEXT: .p2align 4 ; CHECK-P9-NEXT: .LBB0_1: # %vector.body @@ -36,13 +36,13 @@ define void @test([0 x %_elem_type_of_x]* noalias %.x, [0 x %_elem_type_of_a]* % ; CHECK-P9-NEXT: xvmuldp 4, 4, 0 ; CHECK-P9-NEXT: xvmuldp 3, 3, 0 ; CHECK-P9-NEXT: xvmuldp 5, 5, 0 +; CHECK-P9-NEXT: addi 4, 4, 256 +; CHECK-P9-NEXT: xvmuldp 6, 6, 0 ; CHECK-P9-NEXT: stxv 1, 16(3) +; CHECK-P9-NEXT: stxv 2, 0(3) ; CHECK-P9-NEXT: stxv 3, 48(3) ; CHECK-P9-NEXT: stxv 4, 32(3) ; CHECK-P9-NEXT: stxv 5, 240(3) -; CHECK-P9-NEXT: addi 4, 4, 256 -; CHECK-P9-NEXT: xvmuldp 6, 6, 0 -; CHECK-P9-NEXT: stxv 2, 0(3) ; CHECK-P9-NEXT: stxv 6, 224(3) ; CHECK-P9-NEXT: addi 3, 3, 256 ; CHECK-P9-NEXT: bdnz .LBB0_1 @@ -57,9 +57,9 @@ define void @test([0 x %_elem_type_of_x]* noalias %.x, [0 x %_elem_type_of_a]* % ; CHECK-P9-NO-HEURISTIC-NEXT: rldicr 5, 5, 0, 58 ; CHECK-P9-NO-HEURISTIC-NEXT: addi 6, 6, 16 ; CHECK-P9-NO-HEURISTIC-NEXT: addi 5, 5, -32 +; CHECK-P9-NO-HEURISTIC-NEXT: lxvdsx 0, 0, 6 ; CHECK-P9-NO-HEURISTIC-NEXT: rldicl 5, 5, 59, 5 ; CHECK-P9-NO-HEURISTIC-NEXT: addi 5, 5, 1 -; CHECK-P9-NO-HEURISTIC-NEXT: lxvdsx 0, 0, 6 ; CHECK-P9-NO-HEURISTIC-NEXT: mtctr 5 ; CHECK-P9-NO-HEURISTIC-NEXT: .p2align 4 ; CHECK-P9-NO-HEURISTIC-NEXT: .LBB0_1: # %vector.body @@ -76,13 +76,13 @@ define void @test([0 x %_elem_type_of_x]* noalias %.x, [0 x %_elem_type_of_a]* % ; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 3, 3, 0 ; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 6, 6, 0 ; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 5, 5, 0 +; CHECK-P9-NO-HEURISTIC-NEXT: addi 4, 4, 256 ; CHECK-P9-NO-HEURISTIC-NEXT: stxv 1, 16(3) ; CHECK-P9-NO-HEURISTIC-NEXT: stxv 2, 0(3) ; CHECK-P9-NO-HEURISTIC-NEXT: stxv 3, 48(3) ; CHECK-P9-NO-HEURISTIC-NEXT: stxv 4, 32(3) ; CHECK-P9-NO-HEURISTIC-NEXT: stxv 5, 240(3) ; CHECK-P9-NO-HEURISTIC-NEXT: stxv 6, 224(3) -; CHECK-P9-NO-HEURISTIC-NEXT: addi 4, 4, 256 ; CHECK-P9-NO-HEURISTIC-NEXT: addi 3, 3, 256 ; CHECK-P9-NO-HEURISTIC-NEXT: bdnz .LBB0_1 ; CHECK-P9-NO-HEURISTIC-NEXT: # %bb.2: # %return.block diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll index e427e81f40314..6605a1fd78cc4 100644 --- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll +++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll @@ -22,10 +22,10 @@ define void @print_res() nounwind { ; CHECK-NEXT: isellt 3, 3, 4 ; CHECK-NEXT: li 4, 0 ; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: mtctr 3 -; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: li 7, -1 ; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: mtctr 3 +; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: lbz 5, 0(5) ; CHECK-NEXT: bdz .LBB0_6 ; CHECK-NEXT: # %bb.1: @@ -62,23 +62,23 @@ define void @print_res() nounwind { ; CHECK-NEXT: add 4, 4, 6 ; CHECK-NEXT: .LBB0_6: ; CHECK-NEXT: xori 5, 5, 84 -; CHECK-NEXT: cntlzw 5, 5 ; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: li 7, 0 +; CHECK-NEXT: li 8, 3 ; CHECK-NEXT: std 3, 104(1) +; CHECK-NEXT: cntlzw 5, 5 ; CHECK-NEXT: addis 3, 2, .LC0@toc@ha +; CHECK-NEXT: li 10, 0 ; CHECK-NEXT: ld 3, .LC0@toc@l(3) -; CHECK-NEXT: li 7, 0 -; CHECK-NEXT: li 8, 3 ; CHECK-NEXT: srwi 5, 5, 5 ; CHECK-NEXT: add 4, 4, 5 ; CHECK-NEXT: li 5, 0 ; CHECK-NEXT: std 5, 120(1) ; CHECK-NEXT: li 5, 3 -; CHECK-NEXT: std 5, 96(1) ; CHECK-NEXT: clrldi 6, 4, 32 ; CHECK-NEXT: li 4, 3 +; CHECK-NEXT: std 5, 96(1) ; CHECK-NEXT: li 5, 0 -; CHECK-NEXT: li 10, 0 ; CHECK-NEXT: bl printf ; CHECK-NEXT: nop %1 = load i32, i32* undef, align 4 diff --git a/llvm/test/CodeGen/PowerPC/sms-grp-order.ll b/llvm/test/CodeGen/PowerPC/sms-grp-order.ll index c462e18d9f939..5525a1975a7b7 100644 --- a/llvm/test/CodeGen/PowerPC/sms-grp-order.ll +++ b/llvm/test/CodeGen/PowerPC/sms-grp-order.ll @@ -7,8 +7,8 @@ define void @lame_encode_buffer_interleaved() local_unnamed_addr { ; CHECK: # %bb.0: ; CHECK-NEXT: lha 3, 0(3) ; CHECK-NEXT: li 5, 1 -; CHECK-NEXT: sldi 5, 5, 62 ; CHECK-NEXT: lhz 4, 0(0) +; CHECK-NEXT: sldi 5, 5, 62 ; CHECK-NEXT: mtctr 5 ; CHECK-NEXT: srawi 3, 3, 1 ; CHECK-NEXT: addze 3, 3 diff --git a/llvm/test/CodeGen/PowerPC/sms-phi-3.ll b/llvm/test/CodeGen/PowerPC/sms-phi-3.ll index cdb5100b29026..39e368a4611c1 100644 --- a/llvm/test/CodeGen/PowerPC/sms-phi-3.ll +++ b/llvm/test/CodeGen/PowerPC/sms-phi-3.ll @@ -21,9 +21,9 @@ define void @phi3(i32*) nounwind { ; CHECK-NEXT: nop ; CHECK-NEXT: addi 7, 30, -4 ; CHECK-NEXT: mtctr 3 -; CHECK-NEXT: lwzu 8, 4(7) ; CHECK-NEXT: addi 4, 29, -8 ; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: lwzu 8, 4(7) ; CHECK-NEXT: bdz .LBB0_5 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: extswsli 6, 5, 5 diff --git a/llvm/test/CodeGen/PowerPC/sms-simple.ll b/llvm/test/CodeGen/PowerPC/sms-simple.ll index 1761b4ea533e2..d147079a9fb9f 100644 --- a/llvm/test/CodeGen/PowerPC/sms-simple.ll +++ b/llvm/test/CodeGen/PowerPC/sms-simple.ll @@ -10,17 +10,17 @@ define dso_local i32* @foo() local_unnamed_addr { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis r5, r2, x@toc@ha -; CHECK-NEXT: addi r5, r5, x@toc@l -; CHECK-NEXT: addi r5, r5, -8 ; CHECK-NEXT: addis r6, r2, y@toc@ha ; CHECK-NEXT: li r7, 340 +; CHECK-NEXT: addi r5, r5, x@toc@l +; CHECK-NEXT: addi r5, r5, -8 ; CHECK-NEXT: addi r3, r6, y@toc@l ; CHECK-NEXT: lwz r6, y@toc@l(r6) ; CHECK-NEXT: mtctr r7 +; CHECK-NEXT: addi r4, r3, -8 ; CHECK-NEXT: lwzu r7, 12(r5) ; CHECK-NEXT: maddld r6, r7, r7, r6 ; CHECK-NEXT: lwz r7, 4(r5) -; CHECK-NEXT: addi r4, r3, -8 ; CHECK-NEXT: stwu r6, 12(r4) ; CHECK-NEXT: maddld r6, r7, r7, r6 ; CHECK-NEXT: lwz r7, 8(r5) @@ -29,12 +29,12 @@ define dso_local i32* @foo() local_unnamed_addr { ; CHECK-NEXT: # ; CHECK-NEXT: maddld r7, r7, r7, r6 ; CHECK-NEXT: lwzu r8, 12(r5) -; CHECK-NEXT: maddld r8, r8, r8, r7 ; CHECK-NEXT: stw r6, 4(r4) ; CHECK-NEXT: lwz r6, 4(r5) -; CHECK-NEXT: maddld r6, r6, r6, r8 +; CHECK-NEXT: maddld r8, r8, r8, r7 ; CHECK-NEXT: stw r7, 8(r4) ; CHECK-NEXT: lwz r7, 8(r5) +; CHECK-NEXT: maddld r6, r6, r6, r8 ; CHECK-NEXT: stwu r8, 12(r4) ; CHECK-NEXT: bdnz .LBB0_1 ; CHECK-NEXT: # %bb.2: diff --git a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll index 097ba07a5b1e7..e2808a4ae1418 100644 --- a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll @@ -12,8 +12,8 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { ; P9LE-LABEL: fold_srem_vec_1: ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: lis r4, -21386 +; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 37253 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 @@ -26,9 +26,9 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { ; P9LE-NEXT: lis r4, 31710 ; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: ori r4, r4, 63421 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: ori r4, r4, 63421 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: sub r4, r4, r3 ; P9LE-NEXT: srwi r5, r4, 31 @@ -39,21 +39,21 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { ; P9LE-NEXT: lis r4, 21399 ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 +; P9LE-NEXT: ori r4, r4, 33437 ; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: ori r4, r4, 33437 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 5 ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 98 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: lis r4, -16728 ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r4, -16728 ; P9LE-NEXT: ori r4, r4, 63249 +; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: srwi r5, r4, 31 @@ -69,8 +69,8 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { ; P9BE-LABEL: fold_srem_vec_1: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 2 -; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: lis r4, 31710 +; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 63421 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r4, r3, r4 @@ -82,11 +82,11 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, -21386 ; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: ori r4, r4, 37253 ; P9BE-NEXT: mtvsrd v3, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: ori r4, r4, 37253 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 @@ -96,11 +96,12 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, -16728 ; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: ori r4, r4, 63249 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: ori r4, r4, 63249 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 8 @@ -109,12 +110,11 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, 21399 ; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: vmrghh v3, v4, v3 +; P9BE-NEXT: ori r4, r4, 33437 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: ori r4, r4, 33437 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 5 @@ -247,8 +247,8 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { ; P9LE-LABEL: fold_srem_vec_2: ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: lis r4, -21386 +; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 37253 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r5, r3, r4 @@ -272,6 +272,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r5, r3, r4 ; P9LE-NEXT: add r5, r5, r3 @@ -280,7 +281,6 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { ; P9LE-NEXT: add r5, r5, r6 ; P9LE-NEXT: mulli r5, r5, 95 ; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 @@ -300,8 +300,8 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { ; P9BE-LABEL: fold_srem_vec_2: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 -; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: lis r4, -21386 +; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 37253 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r5, r3, r4 @@ -327,6 +327,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r5, r3, r4 ; P9BE-NEXT: add r5, r5, r3 @@ -336,7 +337,6 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 ; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 @@ -468,8 +468,8 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; P9LE-LABEL: combine_srem_sdiv: ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: lis r4, -21386 +; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 37253 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r5, r3, r4 @@ -493,6 +493,7 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: extsh r7, r3 ; P9LE-NEXT: mulhw r8, r7, r4 ; P9LE-NEXT: add r7, r8, r7 @@ -501,7 +502,6 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; P9LE-NEXT: add r7, r7, r8 ; P9LE-NEXT: mulli r8, r7, 95 ; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 @@ -512,6 +512,7 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; P9LE-NEXT: srawi r4, r4, 6 ; P9LE-NEXT: add r4, r4, r8 ; P9LE-NEXT: mulli r8, r4, 95 +; P9LE-NEXT: mtvsrd v5, r4 ; P9LE-NEXT: sub r3, r3, r8 ; P9LE-NEXT: mtvsrd v2, r3 ; P9LE-NEXT: vmrghh v2, v2, v4 @@ -520,7 +521,6 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; P9LE-NEXT: mtvsrd v3, r5 ; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: mtvsrd v4, r7 -; P9LE-NEXT: mtvsrd v5, r4 ; P9LE-NEXT: vmrghh v4, v5, v4 ; P9LE-NEXT: vmrglw v3, v4, v3 ; P9LE-NEXT: vadduhm v2, v2, v3 @@ -529,8 +529,8 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; P9BE-LABEL: combine_srem_sdiv: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 -; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: lis r5, -21386 +; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r5, r5, 37253 ; P9BE-NEXT: extsh r4, r3 ; P9BE-NEXT: mulhw r6, r4, r5 @@ -556,6 +556,7 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: extsh r7, r3 ; P9BE-NEXT: mulhw r8, r7, r5 ; P9BE-NEXT: add r7, r8, r7 @@ -565,7 +566,6 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; P9BE-NEXT: mulli r8, r7, 95 ; P9BE-NEXT: sub r3, r3, r8 ; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 @@ -747,9 +747,10 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) { ; P9LE-NEXT: lis r4, -21386 ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: ori r4, r4, 37253 ; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: ori r4, r4, 37253 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: add r4, r4, r3 ; P9LE-NEXT: srwi r5, r4, 31 @@ -757,7 +758,6 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) { ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 @@ -791,11 +791,12 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) { ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, -21386 ; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: ori r4, r4, 37253 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: ori r4, r4, 37253 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 @@ -804,7 +805,6 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) { ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 @@ -914,8 +914,8 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { ; P9LE-LABEL: dont_fold_srem_one: ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: lis r4, -14230 +; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 30865 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 @@ -928,11 +928,12 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { ; P9LE-NEXT: lis r4, -19946 ; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: ori r4, r4, 17097 ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: vmrghh v3, v3, v4 ; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: ori r4, r4, 17097 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: add r4, r4, r3 ; P9LE-NEXT: srwi r5, r4, 31 @@ -940,12 +941,11 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: vmrghh v3, v3, v4 +; P9LE-NEXT: lis r4, 24749 ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r4, 24749 ; P9LE-NEXT: ori r4, r4, 47143 +; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: srwi r5, r4, 31 @@ -961,8 +961,8 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { ; P9BE-LABEL: dont_fold_srem_one: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: lis r4, -19946 +; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 17097 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r4, r3, r4 @@ -974,11 +974,11 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, 24749 ; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: ori r4, r4, 47143 ; P9BE-NEXT: mtvsrd v3, r3 ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: ori r4, r4, 47143 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 11 @@ -987,11 +987,12 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, -14230 ; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: ori r4, r4, 30865 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: vmrghh v3, v3, v4 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: ori r4, r4, 30865 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 @@ -1003,7 +1004,6 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { ; P9BE-NEXT: mtvsrd v2, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: vmrghh v3, v3, v4 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: vmrghh v2, v4, v2 ; P9BE-NEXT: vmrghw v2, v2, v3 @@ -1112,8 +1112,8 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { ; P9LE-LABEL: dont_fold_urem_i16_smax: ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: lis r4, -19946 +; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 17097 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 @@ -1126,9 +1126,9 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { ; P9LE-NEXT: lis r4, 24749 ; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: ori r4, r4, 47143 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: ori r4, r4, 47143 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 11 @@ -1138,6 +1138,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: srawi r4, r3, 15 ; P9LE-NEXT: addze r4, r4 @@ -1145,7 +1146,6 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { ; P9LE-NEXT: sub r3, r3, r4 ; P9LE-NEXT: mtvsrd v2, r3 ; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: vmrghh v2, v2, v4 ; P9LE-NEXT: vmrglw v2, v3, v2 @@ -1154,8 +1154,8 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { ; P9BE-LABEL: dont_fold_urem_i16_smax: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: lis r4, -19946 +; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 17097 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r4, r3, r4 @@ -1167,11 +1167,11 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, 24749 ; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: ori r4, r4, 47143 ; P9BE-NEXT: mtvsrd v3, r3 ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: ori r4, r4, 47143 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 11 @@ -1182,6 +1182,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: vmrghh v3, v3, v4 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: srawi r4, r3, 15 ; P9BE-NEXT: addze r4, r4 @@ -1191,7 +1192,6 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { ; P9BE-NEXT: mtvsrd v2, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: vmrghh v3, v3, v4 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: vmrghh v2, v4, v2 ; P9BE-NEXT: vmrghw v2, v2, v3 @@ -1290,10 +1290,10 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) { ; P9LE-LABEL: dont_fold_srem_i64: ; P9LE: # %bb.0: ; P9LE-NEXT: lis r4, 24749 +; P9LE-NEXT: mfvsrd r3, v3 ; P9LE-NEXT: ori r4, r4, 47142 ; P9LE-NEXT: sldi r4, r4, 32 ; P9LE-NEXT: oris r4, r4, 58853 -; P9LE-NEXT: mfvsrd r3, v3 ; P9LE-NEXT: ori r4, r4, 6055 ; P9LE-NEXT: mulhd r4, r3, r4 ; P9LE-NEXT: rldicl r5, r4, 1, 63 @@ -1316,10 +1316,10 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) { ; P9LE-NEXT: sub r4, r4, r5 ; P9LE-NEXT: mtvsrdd v3, r3, r4 ; P9LE-NEXT: lis r4, 25653 +; P9LE-NEXT: mfvsrd r3, v2 ; P9LE-NEXT: ori r4, r4, 15432 ; P9LE-NEXT: sldi r4, r4, 32 ; P9LE-NEXT: oris r4, r4, 1603 -; P9LE-NEXT: mfvsrd r3, v2 ; P9LE-NEXT: ori r4, r4, 21445 ; P9LE-NEXT: mulhd r4, r3, r4 ; P9LE-NEXT: rldicl r5, r4, 1, 63 @@ -1334,10 +1334,10 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) { ; P9BE-LABEL: dont_fold_srem_i64: ; P9BE: # %bb.0: ; P9BE-NEXT: lis r4, 24749 +; P9BE-NEXT: mfvsrld r3, v3 ; P9BE-NEXT: ori r4, r4, 47142 ; P9BE-NEXT: sldi r4, r4, 32 ; P9BE-NEXT: oris r4, r4, 58853 -; P9BE-NEXT: mfvsrld r3, v3 ; P9BE-NEXT: ori r4, r4, 6055 ; P9BE-NEXT: mulhd r4, r3, r4 ; P9BE-NEXT: rldicl r5, r4, 1, 63 @@ -1360,10 +1360,10 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) { ; P9BE-NEXT: sub r4, r4, r5 ; P9BE-NEXT: mtvsrdd v3, r4, r3 ; P9BE-NEXT: lis r4, 25653 +; P9BE-NEXT: mfvsrld r3, v2 ; P9BE-NEXT: ori r4, r4, 15432 ; P9BE-NEXT: sldi r4, r4, 32 ; P9BE-NEXT: oris r4, r4, 1603 -; P9BE-NEXT: mfvsrld r3, v2 ; P9BE-NEXT: ori r4, r4, 21445 ; P9BE-NEXT: mulhd r4, r3, r4 ; P9BE-NEXT: rldicl r5, r4, 1, 63 diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll index 6c136e9a541c4..b475a2f7fbf1c 100644 --- a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll @@ -51,15 +51,15 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 "stack-probe-size"="32768" nounwin ; CHECK-P9-LE-NEXT: stdu r1, -48(r1) ; CHECK-P9-LE-NEXT: rldic r3, r3, 2, 30 ; CHECK-P9-LE-NEXT: addi r3, r3, 15 +; CHECK-P9-LE-NEXT: li r6, -32768 +; CHECK-P9-LE-NEXT: mr r31, r1 +; CHECK-P9-LE-NEXT: addi r4, r31, 48 ; CHECK-P9-LE-NEXT: rldicl r3, r3, 60, 4 ; CHECK-P9-LE-NEXT: rldicl r3, r3, 4, 29 ; CHECK-P9-LE-NEXT: neg r5, r3 -; CHECK-P9-LE-NEXT: li r6, -32768 ; CHECK-P9-LE-NEXT: divd r7, r5, r6 -; CHECK-P9-LE-NEXT: mulld r6, r7, r6 -; CHECK-P9-LE-NEXT: mr r31, r1 -; CHECK-P9-LE-NEXT: addi r4, r31, 48 ; CHECK-P9-LE-NEXT: add r3, r1, r5 +; CHECK-P9-LE-NEXT: mulld r6, r7, r6 ; CHECK-P9-LE-NEXT: sub r5, r5, r6 ; CHECK-P9-LE-NEXT: stdux r4, r1, r5 ; CHECK-P9-LE-NEXT: cmpd r1, r3 @@ -69,8 +69,8 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 "stack-probe-size"="32768" nounwin ; CHECK-P9-LE-NEXT: cmpd r1, r3 ; CHECK-P9-LE-NEXT: bne cr0, .LBB0_1 ; CHECK-P9-LE-NEXT: .LBB0_2: -; CHECK-P9-LE-NEXT: addi r3, r1, 32 ; CHECK-P9-LE-NEXT: li r4, 1 +; CHECK-P9-LE-NEXT: addi r3, r1, 32 ; CHECK-P9-LE-NEXT: stw r4, 4792(r3) ; CHECK-P9-LE-NEXT: lwz r3, 0(r3) ; CHECK-P9-LE-NEXT: ld r1, 0(r1) @@ -190,15 +190,15 @@ define i32 @bar(i32 %n) local_unnamed_addr #0 nounwind { ; CHECK-P9-LE-NEXT: stdu r1, -48(r1) ; CHECK-P9-LE-NEXT: rldic r4, r3, 2, 30 ; CHECK-P9-LE-NEXT: addi r4, r4, 15 +; CHECK-P9-LE-NEXT: li r7, -4096 +; CHECK-P9-LE-NEXT: mr r31, r1 +; CHECK-P9-LE-NEXT: addi r5, r31, 48 ; CHECK-P9-LE-NEXT: rldicl r4, r4, 60, 4 ; CHECK-P9-LE-NEXT: rldicl r4, r4, 4, 29 ; CHECK-P9-LE-NEXT: neg r6, r4 -; CHECK-P9-LE-NEXT: li r7, -4096 ; CHECK-P9-LE-NEXT: divd r8, r6, r7 -; CHECK-P9-LE-NEXT: mulld r7, r8, r7 -; CHECK-P9-LE-NEXT: mr r31, r1 -; CHECK-P9-LE-NEXT: addi r5, r31, 48 ; CHECK-P9-LE-NEXT: add r4, r1, r6 +; CHECK-P9-LE-NEXT: mulld r7, r8, r7 ; CHECK-P9-LE-NEXT: sub r6, r6, r7 ; CHECK-P9-LE-NEXT: stdux r5, r1, r6 ; CHECK-P9-LE-NEXT: cmpd r1, r4 @@ -208,10 +208,10 @@ define i32 @bar(i32 %n) local_unnamed_addr #0 nounwind { ; CHECK-P9-LE-NEXT: cmpd r1, r4 ; CHECK-P9-LE-NEXT: bne cr0, .LBB1_1 ; CHECK-P9-LE-NEXT: .LBB1_2: -; CHECK-P9-LE-NEXT: addi r4, r1, 32 ; CHECK-P9-LE-NEXT: extswsli r3, r3, 2 -; CHECK-P9-LE-NEXT: add r3, r4, r3 ; CHECK-P9-LE-NEXT: li r5, 1 +; CHECK-P9-LE-NEXT: addi r4, r1, 32 +; CHECK-P9-LE-NEXT: add r3, r4, r3 ; CHECK-P9-LE-NEXT: stw r5, 4096(r3) ; CHECK-P9-LE-NEXT: lwz r3, 0(r4) ; CHECK-P9-LE-NEXT: ld r1, 0(r1) @@ -334,16 +334,16 @@ define i32 @f(i32 %n) local_unnamed_addr #0 "stack-probe-size"="65536" nounwind ; CHECK-P9-LE-NEXT: stdu r1, -48(r1) ; CHECK-P9-LE-NEXT: rldic r3, r3, 2, 30 ; CHECK-P9-LE-NEXT: addi r3, r3, 15 -; CHECK-P9-LE-NEXT: rldicl r3, r3, 60, 4 -; CHECK-P9-LE-NEXT: rldicl r3, r3, 4, 29 ; CHECK-P9-LE-NEXT: lis r5, -1 ; CHECK-P9-LE-NEXT: ori r5, r5, 0 -; CHECK-P9-LE-NEXT: neg r6, r3 -; CHECK-P9-LE-NEXT: divd r7, r6, r5 -; CHECK-P9-LE-NEXT: mulld r7, r7, r5 ; CHECK-P9-LE-NEXT: mr r31, r1 ; CHECK-P9-LE-NEXT: addi r4, r31, 48 +; CHECK-P9-LE-NEXT: rldicl r3, r3, 60, 4 +; CHECK-P9-LE-NEXT: rldicl r3, r3, 4, 29 +; CHECK-P9-LE-NEXT: neg r6, r3 +; CHECK-P9-LE-NEXT: divd r7, r6, r5 ; CHECK-P9-LE-NEXT: add r3, r1, r6 +; CHECK-P9-LE-NEXT: mulld r7, r7, r5 ; CHECK-P9-LE-NEXT: sub r6, r6, r7 ; CHECK-P9-LE-NEXT: stdux r4, r1, r6 ; CHECK-P9-LE-NEXT: cmpd r1, r3 @@ -353,8 +353,8 @@ define i32 @f(i32 %n) local_unnamed_addr #0 "stack-probe-size"="65536" nounwind ; CHECK-P9-LE-NEXT: cmpd r1, r3 ; CHECK-P9-LE-NEXT: bne cr0, .LBB2_1 ; CHECK-P9-LE-NEXT: .LBB2_2: -; CHECK-P9-LE-NEXT: addi r3, r1, 32 ; CHECK-P9-LE-NEXT: li r4, 1 +; CHECK-P9-LE-NEXT: addi r3, r1, 32 ; CHECK-P9-LE-NEXT: stw r4, 4792(r3) ; CHECK-P9-LE-NEXT: lwz r3, 0(r3) ; CHECK-P9-LE-NEXT: ld r1, 0(r1) diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll new file mode 100644 index 0000000000000..e595d8a732a5c --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll @@ -0,0 +1,474 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: -mtriple=powerpc64le-linux-gnu < %s | FileCheck \ +; RUN: -check-prefix=CHECK-LE %s +; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: -mtriple=powerpc64-linux-gnu < %s | FileCheck \ +; RUN: -check-prefix=CHECK-BE %s +; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: -mtriple=powerpc-linux-gnu < %s | FileCheck \ +; RUN: -check-prefix=CHECK-32 %s + +; Free probe +define i8 @f0() #0 nounwind { +; CHECK-LE-LABEL: f0: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: li r3, 3 +; CHECK-LE-NEXT: stb r3, -64(r1) +; CHECK-LE-NEXT: lbz r3, -64(r1) +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: f0: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: li r3, 3 +; CHECK-BE-NEXT: stb r3, -64(r1) +; CHECK-BE-NEXT: lbz r3, -64(r1) +; CHECK-BE-NEXT: blr +; +; CHECK-32-LABEL: f0: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stwu r1, -80(r1) +; CHECK-32-NEXT: li r3, 3 +; CHECK-32-NEXT: stb r3, 16(r1) +; CHECK-32-NEXT: lbz r3, 16(r1) +; CHECK-32-NEXT: addi r1, r1, 80 +; CHECK-32-NEXT: blr +entry: + %a = alloca i8, i64 64 + %b = getelementptr inbounds i8, i8* %a, i64 63 + store volatile i8 3, i8* %a + %c = load volatile i8, i8* %a + ret i8 %c +} + +define i8 @f1() #0 "stack-probe-size"="0" nounwind { +; CHECK-LE-LABEL: f1: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mr r12, r1 +; CHECK-LE-NEXT: li r0, 259 +; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: .LBB1_1: # %entry +; CHECK-LE-NEXT: # +; CHECK-LE-NEXT: stdu r12, -16(r1) +; CHECK-LE-NEXT: bdnz .LBB1_1 +; CHECK-LE-NEXT: # %bb.2: # %entry +; CHECK-LE-NEXT: li r3, 3 +; CHECK-LE-NEXT: stb r3, 48(r1) +; CHECK-LE-NEXT: lbz r3, 48(r1) +; CHECK-LE-NEXT: addi r1, r1, 4144 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: f1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mr r12, r1 +; CHECK-BE-NEXT: li r0, 260 +; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: .LBB1_1: # %entry +; CHECK-BE-NEXT: # +; CHECK-BE-NEXT: stdu r12, -16(r1) +; CHECK-BE-NEXT: bdnz .LBB1_1 +; CHECK-BE-NEXT: # %bb.2: # %entry +; CHECK-BE-NEXT: li r3, 3 +; CHECK-BE-NEXT: stb r3, 64(r1) +; CHECK-BE-NEXT: lbz r3, 64(r1) +; CHECK-BE-NEXT: addi r1, r1, 4160 +; CHECK-BE-NEXT: blr +; +; CHECK-32-LABEL: f1: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: li r0, 257 +; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: .LBB1_1: # %entry +; CHECK-32-NEXT: # +; CHECK-32-NEXT: stwu r12, -16(r1) +; CHECK-32-NEXT: bdnz .LBB1_1 +; CHECK-32-NEXT: # %bb.2: # %entry +; CHECK-32-NEXT: li r3, 3 +; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: stb r3, 16(r1) +; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: lbz r3, 16(r1) +; CHECK-32-NEXT: addi r1, r1, 4112 +; CHECK-32-NEXT: blr +entry: + %a = alloca i8, i64 4096 + %b = getelementptr inbounds i8, i8* %a, i64 63 + store volatile i8 3, i8* %a + %c = load volatile i8, i8* %a + ret i8 %c +} + +define i8 @f2() #0 nounwind { +; CHECK-LE-LABEL: f2: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mr r12, r1 +; CHECK-LE-NEXT: stdu r12, -48(r1) +; CHECK-LE-NEXT: li r0, 16 +; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: .LBB2_1: # %entry +; CHECK-LE-NEXT: # +; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: bdnz .LBB2_1 +; CHECK-LE-NEXT: # %bb.2: # %entry +; CHECK-LE-NEXT: li r3, 3 +; CHECK-LE-NEXT: stb r3, 48(r1) +; CHECK-LE-NEXT: lbz r3, 48(r1) +; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: f2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mr r12, r1 +; CHECK-BE-NEXT: stdu r12, -64(r1) +; CHECK-BE-NEXT: li r0, 16 +; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: .LBB2_1: # %entry +; CHECK-BE-NEXT: # +; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: bdnz .LBB2_1 +; CHECK-BE-NEXT: # %bb.2: # %entry +; CHECK-BE-NEXT: li r3, 3 +; CHECK-BE-NEXT: stb r3, 64(r1) +; CHECK-BE-NEXT: lbz r3, 64(r1) +; CHECK-BE-NEXT: ld r1, 0(r1) +; CHECK-BE-NEXT: blr +; +; CHECK-32-LABEL: f2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: stwu r12, -16(r1) +; CHECK-32-NEXT: li r0, 16 +; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: .LBB2_1: # %entry +; CHECK-32-NEXT: # +; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: bdnz .LBB2_1 +; CHECK-32-NEXT: # %bb.2: # %entry +; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: li r3, 3 +; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: stb r3, 16(r1) +; CHECK-32-NEXT: mr r0, r31 +; CHECK-32-NEXT: lbz r3, 16(r1) +; CHECK-32-NEXT: lwz r31, 0(r1) +; CHECK-32-NEXT: mr r1, r31 +; CHECK-32-NEXT: mr r31, r0 +; CHECK-32-NEXT: blr +entry: + %a = alloca i8, i64 65536 + %b = getelementptr inbounds i8, i8* %a, i64 63 + store volatile i8 3, i8* %a + %c = load volatile i8, i8* %a + ret i8 %c +} + +define i8 @f3() #0 "stack-probe-size"="32768" nounwind { +; CHECK-LE-LABEL: f3: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mr r12, r1 +; CHECK-LE-NEXT: stdu r12, -48(r1) +; CHECK-LE-NEXT: stdu r12, -32768(r1) +; CHECK-LE-NEXT: stdu r12, -32768(r1) +; CHECK-LE-NEXT: li r3, 3 +; CHECK-LE-NEXT: stb r3, 48(r1) +; CHECK-LE-NEXT: lbz r3, 48(r1) +; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: f3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mr r12, r1 +; CHECK-BE-NEXT: stdu r12, -64(r1) +; CHECK-BE-NEXT: stdu r12, -32768(r1) +; CHECK-BE-NEXT: stdu r12, -32768(r1) +; CHECK-BE-NEXT: li r3, 3 +; CHECK-BE-NEXT: stb r3, 64(r1) +; CHECK-BE-NEXT: lbz r3, 64(r1) +; CHECK-BE-NEXT: ld r1, 0(r1) +; CHECK-BE-NEXT: blr +; +; CHECK-32-LABEL: f3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: stwu r12, -16(r1) +; CHECK-32-NEXT: stwu r12, -32768(r1) +; CHECK-32-NEXT: stwu r12, -32768(r1) +; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: li r3, 3 +; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: stb r3, 16(r1) +; CHECK-32-NEXT: mr r0, r31 +; CHECK-32-NEXT: lbz r3, 16(r1) +; CHECK-32-NEXT: lwz r31, 0(r1) +; CHECK-32-NEXT: mr r1, r31 +; CHECK-32-NEXT: mr r31, r0 +; CHECK-32-NEXT: blr +entry: + %a = alloca i8, i64 65536 + %b = getelementptr inbounds i8, i8* %a, i64 63 + store volatile i8 3, i8* %a + %c = load volatile i8, i8* %a + ret i8 %c +} + +; Same as f2, but without protection. +define i8 @f4() nounwind { +; CHECK-LE-LABEL: f4: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lis r0, -2 +; CHECK-LE-NEXT: ori r0, r0, 65488 +; CHECK-LE-NEXT: stdux r1, r1, r0 +; CHECK-LE-NEXT: li r3, 3 +; CHECK-LE-NEXT: stb r3, 48(r1) +; CHECK-LE-NEXT: lbz r3, 48(r1) +; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: f4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lis r0, -2 +; CHECK-BE-NEXT: ori r0, r0, 65472 +; CHECK-BE-NEXT: stdux r1, r1, r0 +; CHECK-BE-NEXT: li r3, 3 +; CHECK-BE-NEXT: stb r3, 64(r1) +; CHECK-BE-NEXT: lbz r3, 64(r1) +; CHECK-BE-NEXT: ld r1, 0(r1) +; CHECK-BE-NEXT: blr +; +; CHECK-32-LABEL: f4: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lis r0, -2 +; CHECK-32-NEXT: ori r0, r0, 65520 +; CHECK-32-NEXT: stwux r1, r1, r0 +; CHECK-32-NEXT: li r3, 3 +; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: stb r3, 16(r1) +; CHECK-32-NEXT: mr r0, r31 +; CHECK-32-NEXT: lbz r3, 16(r1) +; CHECK-32-NEXT: lwz r31, 0(r1) +; CHECK-32-NEXT: mr r1, r31 +; CHECK-32-NEXT: mr r31, r0 +; CHECK-32-NEXT: blr +entry: + %a = alloca i8, i64 65536 + %b = getelementptr inbounds i8, i8* %a, i64 63 + store volatile i8 3, i8* %a + %c = load volatile i8, i8* %a + ret i8 %c +} + +define i8 @f5() #0 "stack-probe-size"="65536" nounwind { +; CHECK-LE-LABEL: f5: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mr r12, r1 +; CHECK-LE-NEXT: stdu r12, -48(r1) +; CHECK-LE-NEXT: li r0, 16 +; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: lis r0, -1 +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: .LBB5_1: # %entry +; CHECK-LE-NEXT: # +; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: bdnz .LBB5_1 +; CHECK-LE-NEXT: # %bb.2: # %entry +; CHECK-LE-NEXT: li r3, 3 +; CHECK-LE-NEXT: stb r3, 48(r1) +; CHECK-LE-NEXT: lbz r3, 48(r1) +; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: f5: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mr r12, r1 +; CHECK-BE-NEXT: stdu r12, -64(r1) +; CHECK-BE-NEXT: li r0, 16 +; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: lis r0, -1 +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: .LBB5_1: # %entry +; CHECK-BE-NEXT: # +; CHECK-BE-NEXT: stdux r12, r1, r0 +; CHECK-BE-NEXT: bdnz .LBB5_1 +; CHECK-BE-NEXT: # %bb.2: # %entry +; CHECK-BE-NEXT: li r3, 3 +; CHECK-BE-NEXT: stb r3, 64(r1) +; CHECK-BE-NEXT: lbz r3, 64(r1) +; CHECK-BE-NEXT: ld r1, 0(r1) +; CHECK-BE-NEXT: blr +; +; CHECK-32-LABEL: f5: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: stwu r12, -16(r1) +; CHECK-32-NEXT: li r0, 16 +; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: lis r0, -1 +; CHECK-32-NEXT: nop +; CHECK-32-NEXT: .LBB5_1: # %entry +; CHECK-32-NEXT: # +; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: bdnz .LBB5_1 +; CHECK-32-NEXT: # %bb.2: # %entry +; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: li r3, 3 +; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: stb r3, 16(r1) +; CHECK-32-NEXT: mr r0, r31 +; CHECK-32-NEXT: lbz r3, 16(r1) +; CHECK-32-NEXT: lwz r31, 0(r1) +; CHECK-32-NEXT: mr r1, r31 +; CHECK-32-NEXT: mr r31, r0 +; CHECK-32-NEXT: blr +entry: + %a = alloca i8, i64 1048576 + %b = getelementptr inbounds i8, i8* %a, i64 63 + store volatile i8 3, i8* %a + %c = load volatile i8, i8* %a + ret i8 %c +} + +define i8 @f6() #0 nounwind { +; CHECK-LE-LABEL: f6: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mr r12, r1 +; CHECK-LE-NEXT: stdu r12, -48(r1) +; CHECK-LE-NEXT: lis r0, 4 +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: .LBB6_1: # %entry +; CHECK-LE-NEXT: # +; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: bdnz .LBB6_1 +; CHECK-LE-NEXT: # %bb.2: # %entry +; CHECK-LE-NEXT: li r3, 3 +; CHECK-LE-NEXT: stb r3, 48(r1) +; CHECK-LE-NEXT: lbz r3, 48(r1) +; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: f6: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mr r12, r1 +; CHECK-BE-NEXT: stdu r12, -64(r1) +; CHECK-BE-NEXT: lis r0, 4 +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: .LBB6_1: # %entry +; CHECK-BE-NEXT: # +; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: bdnz .LBB6_1 +; CHECK-BE-NEXT: # %bb.2: # %entry +; CHECK-BE-NEXT: li r3, 3 +; CHECK-BE-NEXT: stb r3, 64(r1) +; CHECK-BE-NEXT: lbz r3, 64(r1) +; CHECK-BE-NEXT: ld r1, 0(r1) +; CHECK-BE-NEXT: blr +; +; CHECK-32-LABEL: f6: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: stwu r12, -16(r1) +; CHECK-32-NEXT: lis r0, 4 +; CHECK-32-NEXT: nop +; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: .LBB6_1: # %entry +; CHECK-32-NEXT: # +; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: bdnz .LBB6_1 +; CHECK-32-NEXT: # %bb.2: # %entry +; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: li r3, 3 +; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: stb r3, 16(r1) +; CHECK-32-NEXT: mr r0, r31 +; CHECK-32-NEXT: lbz r3, 16(r1) +; CHECK-32-NEXT: lwz r31, 0(r1) +; CHECK-32-NEXT: mr r1, r31 +; CHECK-32-NEXT: mr r31, r0 +; CHECK-32-NEXT: blr +entry: + %a = alloca i8, i64 1073741824 + %b = getelementptr inbounds i8, i8* %a, i64 63 + store volatile i8 3, i8* %a + %c = load volatile i8, i8* %a + ret i8 %c +} + +define i8 @f7() #0 "stack-probe-size"="65536" nounwind { +; CHECK-LE-LABEL: f7: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lis r0, -1 +; CHECK-LE-NEXT: mr r12, r1 +; CHECK-LE-NEXT: ori r0, r0, 13776 +; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: li r0, 15258 +; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: lis r0, -1 +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: .LBB7_1: # %entry +; CHECK-LE-NEXT: # +; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: bdnz .LBB7_1 +; CHECK-LE-NEXT: # %bb.2: # %entry +; CHECK-LE-NEXT: li r3, 3 +; CHECK-LE-NEXT: stb r3, 41(r1) +; CHECK-LE-NEXT: lbz r3, 41(r1) +; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: f7: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lis r0, -1 +; CHECK-BE-NEXT: mr r12, r1 +; CHECK-BE-NEXT: ori r0, r0, 13760 +; CHECK-BE-NEXT: stdux r12, r1, r0 +; CHECK-BE-NEXT: li r0, 15258 +; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: lis r0, -1 +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: .LBB7_1: # %entry +; CHECK-BE-NEXT: # +; CHECK-BE-NEXT: stdux r12, r1, r0 +; CHECK-BE-NEXT: bdnz .LBB7_1 +; CHECK-BE-NEXT: # %bb.2: # %entry +; CHECK-BE-NEXT: li r3, 3 +; CHECK-BE-NEXT: stb r3, 57(r1) +; CHECK-BE-NEXT: lbz r3, 57(r1) +; CHECK-BE-NEXT: ld r1, 0(r1) +; CHECK-BE-NEXT: blr +; +; CHECK-32-LABEL: f7: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lis r0, -1 +; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: ori r0, r0, 13808 +; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: li r0, 15258 +; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: lis r0, -1 +; CHECK-32-NEXT: nop +; CHECK-32-NEXT: .LBB7_1: # %entry +; CHECK-32-NEXT: # +; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: bdnz .LBB7_1 +; CHECK-32-NEXT: # %bb.2: # %entry +; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: li r3, 3 +; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: stb r3, 9(r1) +; CHECK-32-NEXT: mr r0, r31 +; CHECK-32-NEXT: lbz r3, 9(r1) +; CHECK-32-NEXT: lwz r31, 0(r1) +; CHECK-32-NEXT: mr r1, r31 +; CHECK-32-NEXT: mr r31, r0 +; CHECK-32-NEXT: blr +entry: + %a = alloca i8, i64 1000000007 + %b = getelementptr inbounds i8, i8* %a, i64 101 + store volatile i8 3, i8* %a + %c = load volatile i8, i8* %a + ret i8 %c +} + +attributes #0 = { "probe-stack"="inline-asm" } diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll index e595d8a732a5c..cb513be9128cc 100644 --- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll @@ -41,10 +41,11 @@ entry: ret i8 %c } -define i8 @f1() #0 "stack-probe-size"="0" nounwind { +define i8 @f1() #0 "stack-probe-size"="0" { ; CHECK-LE-LABEL: f1: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: mr r12, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 ; CHECK-LE-NEXT: li r0, 259 ; CHECK-LE-NEXT: mtctr r0 ; CHECK-LE-NEXT: .LBB1_1: # %entry @@ -52,6 +53,8 @@ define i8 @f1() #0 "stack-probe-size"="0" nounwind { ; CHECK-LE-NEXT: stdu r12, -16(r1) ; CHECK-LE-NEXT: bdnz .LBB1_1 ; CHECK-LE-NEXT: # %bb.2: # %entry +; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: .cfi_def_cfa_offset 4144 ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) ; CHECK-LE-NEXT: lbz r3, 48(r1) @@ -61,6 +64,7 @@ define i8 @f1() #0 "stack-probe-size"="0" nounwind { ; CHECK-BE-LABEL: f1: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mr r12, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 ; CHECK-BE-NEXT: li r0, 260 ; CHECK-BE-NEXT: mtctr r0 ; CHECK-BE-NEXT: .LBB1_1: # %entry @@ -68,6 +72,8 @@ define i8 @f1() #0 "stack-probe-size"="0" nounwind { ; CHECK-BE-NEXT: stdu r12, -16(r1) ; CHECK-BE-NEXT: bdnz .LBB1_1 ; CHECK-BE-NEXT: # %bb.2: # %entry +; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: .cfi_def_cfa_offset 4160 ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) ; CHECK-BE-NEXT: lbz r3, 64(r1) @@ -77,6 +83,7 @@ define i8 @f1() #0 "stack-probe-size"="0" nounwind { ; CHECK-32-LABEL: f1: ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: .cfi_def_cfa r12, 0 ; CHECK-32-NEXT: li r0, 257 ; CHECK-32-NEXT: mtctr r0 ; CHECK-32-NEXT: .LBB1_1: # %entry @@ -84,10 +91,12 @@ define i8 @f1() #0 "stack-probe-size"="0" nounwind { ; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: bdnz .LBB1_1 ; CHECK-32-NEXT: # %bb.2: # %entry -; CHECK-32-NEXT: li r3, 3 +; CHECK-32-NEXT: .cfi_def_cfa_register r1 ; CHECK-32-NEXT: sub r0, r1, r12 -; CHECK-32-NEXT: stb r3, 16(r1) ; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: .cfi_def_cfa_offset 4112 +; CHECK-32-NEXT: li r3, 3 +; CHECK-32-NEXT: stb r3, 16(r1) ; CHECK-32-NEXT: lbz r3, 16(r1) ; CHECK-32-NEXT: addi r1, r1, 4112 ; CHECK-32-NEXT: blr @@ -99,10 +108,11 @@ entry: ret i8 %c } -define i8 @f2() #0 nounwind { +define i8 @f2() #0 { ; CHECK-LE-LABEL: f2: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: mr r12, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 ; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: li r0, 16 ; CHECK-LE-NEXT: mtctr r0 @@ -111,6 +121,8 @@ define i8 @f2() #0 nounwind { ; CHECK-LE-NEXT: stdu r12, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB2_1 ; CHECK-LE-NEXT: # %bb.2: # %entry +; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: .cfi_def_cfa_offset 65584 ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) ; CHECK-LE-NEXT: lbz r3, 48(r1) @@ -120,6 +132,7 @@ define i8 @f2() #0 nounwind { ; CHECK-BE-LABEL: f2: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mr r12, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 ; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: li r0, 16 ; CHECK-BE-NEXT: mtctr r0 @@ -128,6 +141,8 @@ define i8 @f2() #0 nounwind { ; CHECK-BE-NEXT: stdu r12, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB2_1 ; CHECK-BE-NEXT: # %bb.2: # %entry +; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: .cfi_def_cfa_offset 65600 ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) ; CHECK-BE-NEXT: lbz r3, 64(r1) @@ -137,6 +152,7 @@ define i8 @f2() #0 nounwind { ; CHECK-32-LABEL: f2: ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: .cfi_def_cfa r12, 0 ; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: li r0, 16 ; CHECK-32-NEXT: mtctr r0 @@ -145,9 +161,11 @@ define i8 @f2() #0 nounwind { ; CHECK-32-NEXT: stwu r12, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB2_1 ; CHECK-32-NEXT: # %bb.2: # %entry +; CHECK-32-NEXT: .cfi_def_cfa_register r1 ; CHECK-32-NEXT: sub r0, r1, r12 -; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: .cfi_def_cfa_offset 65552 +; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: stb r3, 16(r1) ; CHECK-32-NEXT: mr r0, r31 ; CHECK-32-NEXT: lbz r3, 16(r1) @@ -163,13 +181,16 @@ entry: ret i8 %c } -define i8 @f3() #0 "stack-probe-size"="32768" nounwind { +define i8 @f3() #0 "stack-probe-size"="32768" { ; CHECK-LE-LABEL: f3: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: mr r12, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 ; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: stdu r12, -32768(r1) ; CHECK-LE-NEXT: stdu r12, -32768(r1) +; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: .cfi_def_cfa_offset 65584 ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) ; CHECK-LE-NEXT: lbz r3, 48(r1) @@ -179,9 +200,12 @@ define i8 @f3() #0 "stack-probe-size"="32768" nounwind { ; CHECK-BE-LABEL: f3: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mr r12, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 ; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: stdu r12, -32768(r1) ; CHECK-BE-NEXT: stdu r12, -32768(r1) +; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: .cfi_def_cfa_offset 65600 ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) ; CHECK-BE-NEXT: lbz r3, 64(r1) @@ -191,12 +215,15 @@ define i8 @f3() #0 "stack-probe-size"="32768" nounwind { ; CHECK-32-LABEL: f3: ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: .cfi_def_cfa r12, 0 ; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: stwu r12, -32768(r1) ; CHECK-32-NEXT: stwu r12, -32768(r1) +; CHECK-32-NEXT: .cfi_def_cfa_register r1 ; CHECK-32-NEXT: sub r0, r1, r12 -; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: .cfi_def_cfa_offset 65552 +; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: stb r3, 16(r1) ; CHECK-32-NEXT: mr r0, r31 ; CHECK-32-NEXT: lbz r3, 16(r1) @@ -213,12 +240,13 @@ entry: } ; Same as f2, but without protection. -define i8 @f4() nounwind { +define i8 @f4() { ; CHECK-LE-LABEL: f4: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: lis r0, -2 ; CHECK-LE-NEXT: ori r0, r0, 65488 ; CHECK-LE-NEXT: stdux r1, r1, r0 +; CHECK-LE-NEXT: .cfi_def_cfa_offset 65584 ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) ; CHECK-LE-NEXT: lbz r3, 48(r1) @@ -230,6 +258,7 @@ define i8 @f4() nounwind { ; CHECK-BE-NEXT: lis r0, -2 ; CHECK-BE-NEXT: ori r0, r0, 65472 ; CHECK-BE-NEXT: stdux r1, r1, r0 +; CHECK-BE-NEXT: .cfi_def_cfa_offset 65600 ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) ; CHECK-BE-NEXT: lbz r3, 64(r1) @@ -241,8 +270,9 @@ define i8 @f4() nounwind { ; CHECK-32-NEXT: lis r0, -2 ; CHECK-32-NEXT: ori r0, r0, 65520 ; CHECK-32-NEXT: stwux r1, r1, r0 -; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: .cfi_def_cfa_offset 65552 +; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: stb r3, 16(r1) ; CHECK-32-NEXT: mr r0, r31 ; CHECK-32-NEXT: lbz r3, 16(r1) @@ -258,10 +288,11 @@ entry: ret i8 %c } -define i8 @f5() #0 "stack-probe-size"="65536" nounwind { +define i8 @f5() #0 "stack-probe-size"="65536" { ; CHECK-LE-LABEL: f5: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: mr r12, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 ; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: li r0, 16 ; CHECK-LE-NEXT: mtctr r0 @@ -272,6 +303,8 @@ define i8 @f5() #0 "stack-probe-size"="65536" nounwind { ; CHECK-LE-NEXT: stdux r12, r1, r0 ; CHECK-LE-NEXT: bdnz .LBB5_1 ; CHECK-LE-NEXT: # %bb.2: # %entry +; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: .cfi_def_cfa_offset 1048624 ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) ; CHECK-LE-NEXT: lbz r3, 48(r1) @@ -281,6 +314,7 @@ define i8 @f5() #0 "stack-probe-size"="65536" nounwind { ; CHECK-BE-LABEL: f5: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mr r12, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 ; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: li r0, 16 ; CHECK-BE-NEXT: mtctr r0 @@ -291,6 +325,8 @@ define i8 @f5() #0 "stack-probe-size"="65536" nounwind { ; CHECK-BE-NEXT: stdux r12, r1, r0 ; CHECK-BE-NEXT: bdnz .LBB5_1 ; CHECK-BE-NEXT: # %bb.2: # %entry +; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: .cfi_def_cfa_offset 1048640 ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) ; CHECK-BE-NEXT: lbz r3, 64(r1) @@ -300,6 +336,7 @@ define i8 @f5() #0 "stack-probe-size"="65536" nounwind { ; CHECK-32-LABEL: f5: ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: .cfi_def_cfa r12, 0 ; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: li r0, 16 ; CHECK-32-NEXT: mtctr r0 @@ -310,9 +347,11 @@ define i8 @f5() #0 "stack-probe-size"="65536" nounwind { ; CHECK-32-NEXT: stwux r12, r1, r0 ; CHECK-32-NEXT: bdnz .LBB5_1 ; CHECK-32-NEXT: # %bb.2: # %entry +; CHECK-32-NEXT: .cfi_def_cfa_register r1 ; CHECK-32-NEXT: sub r0, r1, r12 -; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: .cfi_def_cfa_offset 1048592 +; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: stb r3, 16(r1) ; CHECK-32-NEXT: mr r0, r31 ; CHECK-32-NEXT: lbz r3, 16(r1) @@ -328,10 +367,11 @@ entry: ret i8 %c } -define i8 @f6() #0 nounwind { +define i8 @f6() #0 { ; CHECK-LE-LABEL: f6: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: mr r12, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 ; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: lis r0, 4 ; CHECK-LE-NEXT: nop @@ -341,6 +381,8 @@ define i8 @f6() #0 nounwind { ; CHECK-LE-NEXT: stdu r12, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB6_1 ; CHECK-LE-NEXT: # %bb.2: # %entry +; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: .cfi_def_cfa_offset 1073741872 ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) ; CHECK-LE-NEXT: lbz r3, 48(r1) @@ -350,6 +392,7 @@ define i8 @f6() #0 nounwind { ; CHECK-BE-LABEL: f6: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mr r12, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 ; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: lis r0, 4 ; CHECK-BE-NEXT: nop @@ -359,6 +402,8 @@ define i8 @f6() #0 nounwind { ; CHECK-BE-NEXT: stdu r12, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB6_1 ; CHECK-BE-NEXT: # %bb.2: # %entry +; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: .cfi_def_cfa_offset 1073741888 ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) ; CHECK-BE-NEXT: lbz r3, 64(r1) @@ -368,6 +413,7 @@ define i8 @f6() #0 nounwind { ; CHECK-32-LABEL: f6: ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: .cfi_def_cfa r12, 0 ; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: lis r0, 4 ; CHECK-32-NEXT: nop @@ -377,9 +423,11 @@ define i8 @f6() #0 nounwind { ; CHECK-32-NEXT: stwu r12, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB6_1 ; CHECK-32-NEXT: # %bb.2: # %entry +; CHECK-32-NEXT: .cfi_def_cfa_register r1 ; CHECK-32-NEXT: sub r0, r1, r12 -; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: .cfi_def_cfa_offset 1073741840 +; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: stb r3, 16(r1) ; CHECK-32-NEXT: mr r0, r31 ; CHECK-32-NEXT: lbz r3, 16(r1) @@ -395,11 +443,12 @@ entry: ret i8 %c } -define i8 @f7() #0 "stack-probe-size"="65536" nounwind { +define i8 @f7() #0 "stack-probe-size"="65536" { ; CHECK-LE-LABEL: f7: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: lis r0, -1 ; CHECK-LE-NEXT: mr r12, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 +; CHECK-LE-NEXT: lis r0, -1 ; CHECK-LE-NEXT: ori r0, r0, 13776 ; CHECK-LE-NEXT: stdux r12, r1, r0 ; CHECK-LE-NEXT: li r0, 15258 @@ -411,6 +460,8 @@ define i8 @f7() #0 "stack-probe-size"="65536" nounwind { ; CHECK-LE-NEXT: stdux r12, r1, r0 ; CHECK-LE-NEXT: bdnz .LBB7_1 ; CHECK-LE-NEXT: # %bb.2: # %entry +; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: .cfi_def_cfa_offset 1000000048 ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 41(r1) ; CHECK-LE-NEXT: lbz r3, 41(r1) @@ -419,8 +470,9 @@ define i8 @f7() #0 "stack-probe-size"="65536" nounwind { ; ; CHECK-BE-LABEL: f7: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lis r0, -1 ; CHECK-BE-NEXT: mr r12, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 +; CHECK-BE-NEXT: lis r0, -1 ; CHECK-BE-NEXT: ori r0, r0, 13760 ; CHECK-BE-NEXT: stdux r12, r1, r0 ; CHECK-BE-NEXT: li r0, 15258 @@ -432,6 +484,8 @@ define i8 @f7() #0 "stack-probe-size"="65536" nounwind { ; CHECK-BE-NEXT: stdux r12, r1, r0 ; CHECK-BE-NEXT: bdnz .LBB7_1 ; CHECK-BE-NEXT: # %bb.2: # %entry +; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: .cfi_def_cfa_offset 1000000064 ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 57(r1) ; CHECK-BE-NEXT: lbz r3, 57(r1) @@ -440,8 +494,9 @@ define i8 @f7() #0 "stack-probe-size"="65536" nounwind { ; ; CHECK-32-LABEL: f7: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lis r0, -1 ; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: .cfi_def_cfa r12, 0 +; CHECK-32-NEXT: lis r0, -1 ; CHECK-32-NEXT: ori r0, r0, 13808 ; CHECK-32-NEXT: stwux r12, r1, r0 ; CHECK-32-NEXT: li r0, 15258 @@ -453,9 +508,11 @@ define i8 @f7() #0 "stack-probe-size"="65536" nounwind { ; CHECK-32-NEXT: stwux r12, r1, r0 ; CHECK-32-NEXT: bdnz .LBB7_1 ; CHECK-32-NEXT: # %bb.2: # %entry +; CHECK-32-NEXT: .cfi_def_cfa_register r1 ; CHECK-32-NEXT: sub r0, r1, r12 -; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: .cfi_def_cfa_offset 1000000016 +; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: stb r3, 9(r1) ; CHECK-32-NEXT: mr r0, r31 ; CHECK-32-NEXT: lbz r3, 9(r1) diff --git a/llvm/test/CodeGen/PowerPC/topdepthreduce-postra.mir b/llvm/test/CodeGen/PowerPC/topdepthreduce-postra.mir new file mode 100644 index 0000000000000..49211342d1306 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/topdepthreduce-postra.mir @@ -0,0 +1,18 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -run-pass=postmisched -o - %s | FileCheck %s +--- +# Check that postmisched's TopDepthReduce heuristic moves the MULLD later +# because of the dependency on x5 +name: test +body: | + bb.0: + ; CHECK-LABEL: name: test + ; CHECK: renamable $x5 = LD 0, killed renamable $x5 :: (load 8) + ; CHECK: renamable $x4 = LD 0, killed renamable $x4 :: (load 8) + ; CHECK: renamable $x5 = MULLD killed renamable $x5, renamable $x3 + ; CHECK: renamable $x3 = MADDLD8 killed renamable $x4, killed renamable $x3, killed renamable $x5 + renamable $x5 = LD 0, killed renamable $x5 :: (load 8) + renamable $x5 = MULLD killed renamable $x5, renamable $x3 + renamable $x4 = LD 0, killed renamable $x4 :: (load 8) + renamable $x3 = MADDLD8 killed renamable $x4, killed renamable $x3, killed renamable $x5 +... diff --git a/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll b/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll index 2020833fd897f..c04f0ff35f705 100644 --- a/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll +++ b/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll @@ -20,9 +20,9 @@ define dso_local <2 x double> @test1(<8 x i16> %a) { ; P9BE-NEXT: mtfprwz f0, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: xscvuxddp f0, f0 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: xscvuxddp f0, f0 ; P9BE-NEXT: xscvuxddp f1, f1 ; P9BE-NEXT: xxmrghd v2, vs0, vs1 ; P9BE-NEXT: blr @@ -35,9 +35,9 @@ define dso_local <2 x double> @test1(<8 x i16> %a) { ; P9LE-NEXT: mtfprwz f0, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: xscvuxddp f0, f0 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mtfprwz f1, r3 -; P9LE-NEXT: xscvuxddp f0, f0 ; P9LE-NEXT: xscvuxddp f1, f1 ; P9LE-NEXT: xxmrghd v2, vs1, vs0 ; P9LE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/unaligned-addressing-mode.ll b/llvm/test/CodeGen/PowerPC/unaligned-addressing-mode.ll index 6fa8be9650b61..73ba4c93ac8ab 100644 --- a/llvm/test/CodeGen/PowerPC/unaligned-addressing-mode.ll +++ b/llvm/test/CodeGen/PowerPC/unaligned-addressing-mode.ll @@ -6,8 +6,8 @@ define i8 @test_xaddr(i8* %p) { ; CHECK-LABEL: test_xaddr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li r4, 0 -; CHECK-NEXT: ori r4, r4, 40000 ; CHECK-NEXT: std r3, -8(r1) +; CHECK-NEXT: ori r4, r4, 40000 ; CHECK-NEXT: lbzx r3, r3, r4 ; CHECK-NEXT: blr entry: @@ -56,8 +56,8 @@ define void @test_xoaddr(i32* %arr, i32* %arrTo) { ; CHECK-LABEL: test_xoaddr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi r3, r3, 8 -; CHECK-NEXT: lxvx vs0, 0, r3 ; CHECK-NEXT: addi r4, r4, 4 +; CHECK-NEXT: lxvx vs0, 0, r3 ; CHECK-NEXT: stxvx vs0, 0, r4 ; CHECK-NEXT: blr entry: @@ -77,9 +77,9 @@ define i64 @test_xaddrX4_loop(i8* %p) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi r4, r3, -8 ; CHECK-NEXT: li r3, 8 +; CHECK-NEXT: li r5, 3 ; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: li r5, 3 ; loop instruction number is changed from 5 to 4, so its align is changed from 5 to 4. ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB4_1: # %for.body diff --git a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll index 4bb3730aa0437..f889fad8df6ce 100644 --- a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll @@ -12,9 +12,11 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; P9LE-LABEL: fold_urem_vec_1: ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: lis r4, 21399 +; P9LE-NEXT: lis r5, 8456 +; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 33437 +; P9LE-NEXT: ori r5, r5, 16913 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: srwi r4, r4, 5 @@ -23,9 +25,9 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; P9LE-NEXT: lis r4, 16727 ; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: ori r4, r4, 2287 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: ori r4, r4, 2287 ; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: srwi r4, r4, 8 ; P9LE-NEXT: mulli r4, r4, 1003 @@ -33,8 +35,6 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r5, 8456 -; P9LE-NEXT: ori r5, r5, 16913 ; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: clrlwi r4, r3, 16 ; P9LE-NEXT: rlwinm r3, r3, 30, 18, 31 @@ -45,9 +45,9 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; P9LE-NEXT: lis r4, 22765 ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: ori r4, r4, 8969 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: ori r4, r4, 8969 ; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: sub r5, r3, r4 ; P9LE-NEXT: srwi r5, r5, 1 @@ -63,9 +63,11 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; P9BE-LABEL: fold_urem_vec_1: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 -; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: lis r4, 16727 +; P9BE-NEXT: lis r5, 8456 +; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 2287 +; P9BE-NEXT: ori r5, r5, 16913 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: srwi r4, r4, 8 @@ -73,11 +75,11 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, 21399 ; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: ori r4, r4, 33437 ; P9BE-NEXT: mtvsrd v3, r3 ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: ori r4, r4, 33437 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: srwi r4, r4, 5 ; P9BE-NEXT: mulli r4, r4, 98 @@ -86,8 +88,6 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: lis r5, 8456 -; P9BE-NEXT: ori r5, r5, 16913 ; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: clrlwi r4, r3, 16 ; P9BE-NEXT: rlwinm r3, r3, 30, 18, 31 @@ -97,11 +97,11 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; P9BE-NEXT: sub r3, r4, r3 ; P9BE-NEXT: lis r4, 22765 ; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: ori r4, r4, 8969 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: ori r4, r4, 8969 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: sub r5, r3, r4 ; P9BE-NEXT: srwi r5, r5, 1 @@ -223,8 +223,8 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { ; P9LE-LABEL: fold_urem_vec_2: ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: lis r4, 22765 +; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 8969 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r5, r3, r4 @@ -248,6 +248,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r5, r3, r4 ; P9LE-NEXT: sub r6, r3, r5 @@ -256,7 +257,6 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { ; P9LE-NEXT: srwi r5, r5, 6 ; P9LE-NEXT: mulli r5, r5, 95 ; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 @@ -276,8 +276,8 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { ; P9BE-LABEL: fold_urem_vec_2: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 -; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: lis r4, 22765 +; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 8969 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r5, r3, r4 @@ -303,6 +303,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r5, r3, r4 ; P9BE-NEXT: sub r6, r3, r5 @@ -312,7 +313,6 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 ; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 @@ -444,8 +444,8 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; P9LE-LABEL: combine_urem_udiv: ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: lis r4, 22765 +; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 8969 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r5, r3, r4 @@ -469,6 +469,7 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: clrlwi r7, r3, 16 ; P9LE-NEXT: mulhwu r8, r7, r4 ; P9LE-NEXT: sub r7, r7, r8 @@ -477,7 +478,6 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; P9LE-NEXT: srwi r7, r7, 6 ; P9LE-NEXT: mulli r8, r7, 95 ; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 @@ -488,6 +488,7 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; P9LE-NEXT: add r4, r8, r4 ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r8, r4, 95 +; P9LE-NEXT: mtvsrd v5, r4 ; P9LE-NEXT: sub r3, r3, r8 ; P9LE-NEXT: mtvsrd v2, r3 ; P9LE-NEXT: vmrghh v2, v2, v4 @@ -496,7 +497,6 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; P9LE-NEXT: mtvsrd v3, r5 ; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: mtvsrd v4, r7 -; P9LE-NEXT: mtvsrd v5, r4 ; P9LE-NEXT: vmrghh v4, v5, v4 ; P9LE-NEXT: vmrglw v3, v4, v3 ; P9LE-NEXT: vadduhm v2, v2, v3 @@ -505,8 +505,8 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; P9BE-LABEL: combine_urem_udiv: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 -; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: lis r5, 22765 +; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r5, r5, 8969 ; P9BE-NEXT: clrlwi r4, r3, 16 ; P9BE-NEXT: mulhwu r6, r4, r5 @@ -532,6 +532,7 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: clrlwi r7, r3, 16 ; P9BE-NEXT: mulhwu r8, r7, r5 ; P9BE-NEXT: sub r7, r7, r8 @@ -541,7 +542,6 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; P9BE-NEXT: mulli r8, r7, 95 ; P9BE-NEXT: sub r3, r3, r8 ; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 @@ -708,7 +708,9 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { ; P9LE-LABEL: dont_fold_urem_power_of_two: ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: lis r4, 22765 ; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: ori r4, r4, 8969 ; P9LE-NEXT: clrlwi r3, r3, 26 ; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 @@ -717,8 +719,6 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r4, 22765 -; P9LE-NEXT: ori r4, r4, 8969 ; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r4, r3, r4 @@ -740,7 +740,9 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { ; P9BE-LABEL: dont_fold_urem_power_of_two: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: lis r4, 22765 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: ori r4, r4, 8969 ; P9BE-NEXT: clrlwi r3, r3, 27 ; P9BE-NEXT: sldi r3, r3, 48 ; P9BE-NEXT: mtvsrd v3, r3 @@ -751,8 +753,6 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: lis r4, 22765 -; P9BE-NEXT: ori r4, r4, 8969 ; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r4, r3, r4 @@ -844,9 +844,11 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { ; P9LE-LABEL: dont_fold_urem_one: ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: lis r4, -19946 +; P9LE-NEXT: lis r5, -14230 +; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 17097 +; P9LE-NEXT: ori r5, r5, 30865 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: srwi r4, r4, 4 @@ -855,9 +857,9 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { ; P9LE-NEXT: lis r4, 24749 ; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: ori r4, r4, 47143 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: ori r4, r4, 47143 ; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: srwi r4, r4, 11 ; P9LE-NEXT: mulli r4, r4, 5423 @@ -865,8 +867,6 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { ; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: lis r5, -14230 -; P9LE-NEXT: ori r5, r5, 30865 ; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: clrlwi r4, r3, 16 ; P9LE-NEXT: rlwinm r3, r3, 31, 17, 31 @@ -884,9 +884,11 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { ; P9BE-LABEL: dont_fold_urem_one: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 -; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: lis r4, 24749 +; P9BE-NEXT: lis r5, -14230 +; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 47143 +; P9BE-NEXT: ori r5, r5, 30865 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: srwi r4, r4, 11 @@ -894,11 +896,11 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, -19946 ; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: ori r4, r4, 17097 ; P9BE-NEXT: mtvsrd v3, r3 ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: ori r4, r4, 17097 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: srwi r4, r4, 4 ; P9BE-NEXT: mulli r4, r4, 23 @@ -907,8 +909,6 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: lis r5, -14230 -; P9BE-NEXT: ori r5, r5, 30865 ; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: clrlwi r4, r3, 16 ; P9BE-NEXT: rlwinm r3, r3, 31, 17, 31 @@ -1023,10 +1023,10 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) { ; P9LE-LABEL: dont_fold_urem_i64: ; P9LE: # %bb.0: ; P9LE-NEXT: lis r4, 25644 +; P9LE-NEXT: mfvsrld r3, v3 ; P9LE-NEXT: ori r4, r4, 34192 ; P9LE-NEXT: sldi r4, r4, 32 ; P9LE-NEXT: oris r4, r4, 45590 -; P9LE-NEXT: mfvsrld r3, v3 ; P9LE-NEXT: ori r4, r4, 17097 ; P9LE-NEXT: mulhdu r4, r3, r4 ; P9LE-NEXT: sub r5, r3, r4 @@ -1047,9 +1047,9 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) { ; P9LE-NEXT: sub r4, r4, r5 ; P9LE-NEXT: lis r5, 25653 ; P9LE-NEXT: ori r5, r5, 15432 -; P9LE-NEXT: sldi r5, r5, 32 ; P9LE-NEXT: mtvsrdd v3, r4, r3 ; P9LE-NEXT: mfvsrd r3, v2 +; P9LE-NEXT: sldi r5, r5, 32 ; P9LE-NEXT: rldicl r4, r3, 63, 1 ; P9LE-NEXT: oris r5, r5, 1603 ; P9LE-NEXT: ori r5, r5, 21445 @@ -1064,10 +1064,10 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) { ; P9BE-LABEL: dont_fold_urem_i64: ; P9BE: # %bb.0: ; P9BE-NEXT: lis r4, 25644 +; P9BE-NEXT: mfvsrd r3, v3 ; P9BE-NEXT: ori r4, r4, 34192 ; P9BE-NEXT: sldi r4, r4, 32 ; P9BE-NEXT: oris r4, r4, 45590 -; P9BE-NEXT: mfvsrd r3, v3 ; P9BE-NEXT: ori r4, r4, 17097 ; P9BE-NEXT: mulhdu r4, r3, r4 ; P9BE-NEXT: sub r5, r3, r4 @@ -1075,8 +1075,8 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) { ; P9BE-NEXT: add r4, r5, r4 ; P9BE-NEXT: lis r5, -16037 ; P9BE-NEXT: rldicl r4, r4, 60, 4 -; P9BE-NEXT: mulli r4, r4, 23 ; P9BE-NEXT: ori r5, r5, 28749 +; P9BE-NEXT: mulli r4, r4, 23 ; P9BE-NEXT: sldi r5, r5, 32 ; P9BE-NEXT: oris r5, r5, 52170 ; P9BE-NEXT: ori r5, r5, 12109 @@ -1088,9 +1088,9 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) { ; P9BE-NEXT: sub r4, r4, r5 ; P9BE-NEXT: lis r5, 25653 ; P9BE-NEXT: ori r5, r5, 15432 -; P9BE-NEXT: sldi r5, r5, 32 ; P9BE-NEXT: mtvsrdd v3, r3, r4 ; P9BE-NEXT: mfvsrld r3, v2 +; P9BE-NEXT: sldi r5, r5, 32 ; P9BE-NEXT: rldicl r4, r3, 63, 1 ; P9BE-NEXT: oris r5, r5, 1603 ; P9BE-NEXT: ori r5, r5, 21445 diff --git a/llvm/test/CodeGen/PowerPC/vavg.ll b/llvm/test/CodeGen/PowerPC/vavg.ll index 735b39da8056a..6a1ba7b95399a 100644 --- a/llvm/test/CodeGen/PowerPC/vavg.ll +++ b/llvm/test/CodeGen/PowerPC/vavg.ll @@ -138,8 +138,8 @@ define <8 x i16> @test_v8i16_sign_negative(<8 x i16> %m, <8 x i16> %n) { ; CHECK-P9-LABEL: test_v8i16_sign_negative: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; CHECK-P9-NEXT: addi 3, 3, .LCPI6_0@toc@l ; CHECK-P9-NEXT: vadduhm 2, 2, 3 +; CHECK-P9-NEXT: addi 3, 3, .LCPI6_0@toc@l ; CHECK-P9-NEXT: lxvx 35, 0, 3 ; CHECK-P9-NEXT: vadduhm 2, 2, 3 ; CHECK-P9-NEXT: vspltish 3, 1 diff --git a/llvm/test/CodeGen/PowerPC/vec-bswap.ll b/llvm/test/CodeGen/PowerPC/vec-bswap.ll index e6864d8250d26..7ff5b97780b2b 100644 --- a/llvm/test/CodeGen/PowerPC/vec-bswap.ll +++ b/llvm/test/CodeGen/PowerPC/vec-bswap.ll @@ -3,7 +3,8 @@ define dso_local void @test(i32* %Arr, i32 signext %Len) { ; CHECK-LABEL: test: ; CHECK: lxvx [[REG:vs[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}} -; CHECK-NEXT: xxbrw vs{{[0-9]+}}, [[REG]] +; CHECK-NOT: [[REG]] +; CHECK: xxbrw vs{{[0-9]+}}, [[REG]] entry: %cmp1 = icmp slt i32 0, %Len br i1 %cmp1, label %for.body.lr.ph, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll index 48b62f57c1c9e..ecf02feff826d 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll @@ -34,9 +34,9 @@ define i32 @test2elt(i64 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xxswapd v2, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 ; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 @@ -219,10 +219,10 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr # ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 ; CHECK-P9-NEXT: mtvsrd v2, r3 @@ -270,10 +270,10 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr # ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 ; CHECK-BE-NEXT: sldi r3, r3, 48 @@ -298,14 +298,14 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr # ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vmrghh v3, v3, v4 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 ; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 @@ -438,19 +438,20 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs2, 0(r4) +; CHECK-P9-NEXT: lxv vs1, 16(r4) +; CHECK-P9-NEXT: lxv vs0, 32(r4) ; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 ; CHECK-P9-NEXT: xxswapd vs4, vs2 +; CHECK-P9-NEXT: xscvspdpn f5, vs2 +; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 +; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: xscvspdpn f5, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: lxv vs1, 16(r4) -; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 3 ; CHECK-P9-NEXT: xxswapd vs3, vs1 ; CHECK-P9-NEXT: mtvsrd v2, r5 ; CHECK-P9-NEXT: mffprwz r5, f4 @@ -458,6 +459,7 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: mtvsrd v3, r5 ; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r5, f4 ; CHECK-P9-NEXT: xscvspdpn f4, vs6 ; CHECK-P9-NEXT: mtvsrd v3, r5 @@ -465,15 +467,13 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: lxv vs0, 32(r4) ; CHECK-P9-NEXT: mtvsrd v4, r5 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mtvsrd v4, r5 ; CHECK-P9-NEXT: mffprwz r5, f3 ; CHECK-P9-NEXT: xxsldwi vs3, vs0, vs0, 3 @@ -506,6 +506,7 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo ; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: mffprwz r4, f0 ; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 3 +; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: vmrghh v2, v4, v2 @@ -532,31 +533,31 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 ; CHECK-P9-NEXT: stxv vs0, 16(r3) -; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r4) ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r5, f2 ; CHECK-BE-NEXT: xscvspdpn f4, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3 ; CHECK-BE-NEXT: sldi r5, r5, 48 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: mtvsrd v2, r5 ; CHECK-BE-NEXT: mffprwz r5, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f4 -; CHECK-BE-NEXT: lxv vs0, 0(r4) -; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: sldi r5, r5, 48 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mtvsrd v3, r5 ; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r5, f3 @@ -564,7 +565,6 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo ; CHECK-BE-NEXT: mtvsrd v3, r5 ; CHECK-BE-NEXT: mffprwz r5, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: sldi r5, r5, 48 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v4, r5 @@ -591,24 +591,24 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo ; CHECK-BE-NEXT: lxv vs0, 32(r4) ; CHECK-BE-NEXT: xscvspdpn f5, vs1 ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: sldi r5, r5, 48 ; CHECK-BE-NEXT: xxswapd vs3, vs1 +; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-BE-NEXT: sldi r5, r5, 48 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: mtvsrd v0, r5 -; CHECK-BE-NEXT: vmrghh v5, v5, v0 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: vmrghh v5, v5, v0 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghw v3, v5, v4 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vmrghw v3, v5, v4 ; CHECK-BE-NEXT: mffprwz r4, f5 ; CHECK-BE-NEXT: xxmrghd vs4, v3, v2 ; CHECK-BE-NEXT: sldi r4, r4, 48 ; CHECK-BE-NEXT: mtvsrd v2, r4 ; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: stxv vs4, 0(r3) ; CHECK-BE-NEXT: sldi r4, r4, 48 ; CHECK-BE-NEXT: mtvsrd v3, r4 @@ -618,18 +618,18 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo ; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 ; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mtvsrd v4, r4 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs0 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: mtvsrd v4, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: sldi r4, r4, 48 ; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 +; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v3, r4 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 @@ -682,9 +682,9 @@ define i32 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xxswapd v2, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 ; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 @@ -867,10 +867,10 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 ; CHECK-P9-NEXT: mtvsrd v2, r3 @@ -918,10 +918,10 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 ; CHECK-BE-NEXT: sldi r3, r3, 48 @@ -946,14 +946,14 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vmrghh v3, v3, v4 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 ; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 @@ -1086,19 +1086,20 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs2, 0(r4) +; CHECK-P9-NEXT: lxv vs1, 16(r4) +; CHECK-P9-NEXT: lxv vs0, 32(r4) ; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 ; CHECK-P9-NEXT: xxswapd vs4, vs2 +; CHECK-P9-NEXT: xscvspdpn f5, vs2 +; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 +; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: xscvspdpn f5, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: lxv vs1, 16(r4) -; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 3 ; CHECK-P9-NEXT: xxswapd vs3, vs1 ; CHECK-P9-NEXT: mtvsrd v2, r5 ; CHECK-P9-NEXT: mffprwz r5, f4 @@ -1106,6 +1107,7 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: mtvsrd v3, r5 ; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r5, f4 ; CHECK-P9-NEXT: xscvspdpn f4, vs6 ; CHECK-P9-NEXT: mtvsrd v3, r5 @@ -1113,15 +1115,13 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: lxv vs0, 32(r4) ; CHECK-P9-NEXT: mtvsrd v4, r5 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mtvsrd v4, r5 ; CHECK-P9-NEXT: mffprwz r5, f3 ; CHECK-P9-NEXT: xxsldwi vs3, vs0, vs0, 3 @@ -1154,6 +1154,7 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: mffprwz r4, f0 ; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 3 +; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: vmrghh v2, v4, v2 @@ -1180,31 +1181,31 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 ; CHECK-P9-NEXT: stxv vs0, 16(r3) -; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r4) ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r5, f2 ; CHECK-BE-NEXT: xscvspdpn f4, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3 ; CHECK-BE-NEXT: sldi r5, r5, 48 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: mtvsrd v2, r5 ; CHECK-BE-NEXT: mffprwz r5, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f4 -; CHECK-BE-NEXT: lxv vs0, 0(r4) -; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: sldi r5, r5, 48 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mtvsrd v3, r5 ; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r5, f3 @@ -1212,7 +1213,6 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; CHECK-BE-NEXT: mtvsrd v3, r5 ; CHECK-BE-NEXT: mffprwz r5, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: sldi r5, r5, 48 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v4, r5 @@ -1239,24 +1239,24 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; CHECK-BE-NEXT: lxv vs0, 32(r4) ; CHECK-BE-NEXT: xscvspdpn f5, vs1 ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: sldi r5, r5, 48 ; CHECK-BE-NEXT: xxswapd vs3, vs1 +; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-BE-NEXT: sldi r5, r5, 48 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: mtvsrd v0, r5 -; CHECK-BE-NEXT: vmrghh v5, v5, v0 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: vmrghh v5, v5, v0 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghw v3, v5, v4 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vmrghw v3, v5, v4 ; CHECK-BE-NEXT: mffprwz r4, f5 ; CHECK-BE-NEXT: xxmrghd vs4, v3, v2 ; CHECK-BE-NEXT: sldi r4, r4, 48 ; CHECK-BE-NEXT: mtvsrd v2, r4 ; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: stxv vs4, 0(r3) ; CHECK-BE-NEXT: sldi r4, r4, 48 ; CHECK-BE-NEXT: mtvsrd v3, r4 @@ -1266,18 +1266,18 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 ; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mtvsrd v4, r4 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs0 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: mtvsrd v4, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: sldi r4, r4, 48 ; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 +; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v3, r4 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll index 928a19f3a55c9..c7965d6c3e091 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll @@ -37,9 +37,9 @@ define i16 @test2elt(i64 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xxswapd v2, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 ; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 @@ -230,10 +230,10 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 ; CHECK-P9-NEXT: mtvsrd v2, r3 @@ -282,10 +282,10 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 ; CHECK-BE-NEXT: sldi r3, r3, 56 @@ -310,14 +310,14 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 ; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 @@ -451,12 +451,12 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3 -; CHECK-P9-NEXT: xscvspdpn f4, vs4 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) ; CHECK-P9-NEXT: lxv vs2, 16(r3) +; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: mffprwz r3, f4 ; CHECK-P9-NEXT: xxswapd vs4, vs3 ; CHECK-P9-NEXT: mtvsrd v2, r3 @@ -550,12 +550,12 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs3 ; CHECK-BE-NEXT: sldi r3, r3, 56 @@ -580,14 +580,14 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr ; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs2 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 ; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvspdpn f3, vs2 ; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 @@ -606,15 +606,15 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: mtvsrd v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: vmrghb v4, v4, v5 ; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs1 +; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 @@ -633,14 +633,14 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vmrghb v4, v4, v5 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 ; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 @@ -695,9 +695,9 @@ define i16 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: xxswapd v2, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 ; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 @@ -888,10 +888,10 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 ; CHECK-P9-NEXT: mtvsrd v2, r3 @@ -940,10 +940,10 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 ; CHECK-BE-NEXT: sldi r3, r3, 56 @@ -968,14 +968,14 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 ; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 @@ -1109,12 +1109,12 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3 -; CHECK-P9-NEXT: xscvspdpn f4, vs4 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) ; CHECK-P9-NEXT: lxv vs2, 16(r3) +; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: mffprwz r3, f4 ; CHECK-P9-NEXT: xxswapd vs4, vs3 ; CHECK-P9-NEXT: mtvsrd v2, r3 @@ -1208,12 +1208,12 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs3 ; CHECK-BE-NEXT: sldi r3, r3, 56 @@ -1238,14 +1238,14 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam ; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs2 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 ; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvspdpn f3, vs2 ; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 @@ -1264,15 +1264,15 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: mtvsrd v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: vmrghb v4, v4, v5 ; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs1 +; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 @@ -1291,14 +1291,14 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vmrghb v4, v4, v5 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 ; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll index dbc2774fed8cb..0e0a3240f471f 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll @@ -89,10 +89,10 @@ define i64 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 { ; CHECK-P9-LABEL: test4elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 @@ -113,10 +113,10 @@ define i64 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 { ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: sldi r3, r3, 48 ; CHECK-BE-NEXT: mtvsrd v2, r3 @@ -194,12 +194,12 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: xscvdpsxws f4, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: xscvdpsxws f4, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 ; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f3 @@ -237,12 +237,12 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: xscvdpsxws f4, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: sldi r3, r3, 48 ; CHECK-BE-NEXT: mtvsrd v2, r3 @@ -387,18 +387,20 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou ; CHECK-P9-NEXT: lxv vs3, 0(r4) ; CHECK-P9-NEXT: lxv vs2, 16(r4) ; CHECK-P9-NEXT: lxv vs1, 32(r4) -; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: lxv vs0, 48(r4) +; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xscvdpsxws f5, f2 ; CHECK-P9-NEXT: xscvdpsxws f6, f1 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f7, f0 +; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r5, f4 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: xxswapd vs2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mffprwz r5, f4 ; CHECK-P9-NEXT: mtvsrd v2, r5 ; CHECK-P9-NEXT: mffprwz r5, f5 ; CHECK-P9-NEXT: mtvsrd v3, r5 @@ -408,8 +410,6 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou ; CHECK-P9-NEXT: mtvsrd v5, r5 ; CHECK-P9-NEXT: mffprwz r5, f3 ; CHECK-P9-NEXT: lxv vs3, 64(r4) -; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mtvsrd v0, r5 ; CHECK-P9-NEXT: mffprwz r5, f2 ; CHECK-P9-NEXT: lxv vs2, 80(r4) @@ -469,30 +469,30 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs4, 48(r4) +; CHECK-BE-NEXT: lxv vs3, 32(r4) +; CHECK-BE-NEXT: lxv vs2, 16(r4) +; CHECK-BE-NEXT: lxv vs1, 0(r4) ; CHECK-BE-NEXT: xscvdpsxws f5, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: lxv vs3, 32(r4) ; CHECK-BE-NEXT: xscvdpsxws f6, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r5, f5 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: lxv vs2, 16(r4) -; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f7, f2 +; CHECK-BE-NEXT: lxv vs0, 112(r4) ; CHECK-BE-NEXT: xxswapd vs2, vs2 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r5, f5 +; CHECK-BE-NEXT: sldi r5, r5, 48 ; CHECK-BE-NEXT: mtvsrd v2, r5 ; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: lxv vs1, 0(r4) ; CHECK-BE-NEXT: xscvdpsxws f4, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 +; CHECK-BE-NEXT: sldi r5, r5, 48 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mtvsrd v3, r5 ; CHECK-BE-NEXT: mffprwz r5, f6 ; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: lxv vs0, 112(r4) ; CHECK-BE-NEXT: vmrghh v2, v2, v3 ; CHECK-BE-NEXT: mtvsrd v3, r5 ; CHECK-BE-NEXT: mffprwz r5, f3 @@ -524,12 +524,15 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou ; CHECK-BE-NEXT: vmrghh v4, v4, v1 ; CHECK-BE-NEXT: mtvsrd v1, r5 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghh v5, v5, v1 ; CHECK-BE-NEXT: mffprwz r5, f0 ; CHECK-BE-NEXT: lxv vs0, 64(r4) +; CHECK-BE-NEXT: vmrghh v5, v5, v1 +; CHECK-BE-NEXT: sldi r5, r5, 48 ; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: mtvsrd v1, r5 ; CHECK-BE-NEXT: vmrghw v3, v5, v4 +; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: vmrghh v0, v0, v1 ; CHECK-BE-NEXT: xxmrghd vs3, v3, v2 ; CHECK-BE-NEXT: mtvsrd v2, r4 ; CHECK-BE-NEXT: mffprwz r4, f2 @@ -537,10 +540,12 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: sldi r4, r4, 48 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: stxv vs3, 0(r3) ; CHECK-BE-NEXT: mtvsrd v3, r4 ; CHECK-BE-NEXT: vmrghh v2, v2, v3 ; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: vmrghw v2, v2, v0 ; CHECK-BE-NEXT: mtvsrd v3, r4 ; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 @@ -553,11 +558,6 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou ; CHECK-BE-NEXT: sldi r4, r4, 48 ; CHECK-BE-NEXT: mtvsrd v4, r4 ; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v1, r5 -; CHECK-BE-NEXT: vmrghh v0, v0, v1 -; CHECK-BE-NEXT: vmrghw v2, v2, v0 -; CHECK-BE-NEXT: stxv vs3, 0(r3) ; CHECK-BE-NEXT: sldi r4, r4, 48 ; CHECK-BE-NEXT: mtvsrd v5, r4 ; CHECK-BE-NEXT: vmrghh v4, v4, v5 @@ -652,10 +652,10 @@ define i64 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr ; CHECK-P9-LABEL: test4elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 @@ -676,10 +676,10 @@ define i64 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr ; CHECK-BE-LABEL: test4elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: sldi r3, r3, 48 ; CHECK-BE-NEXT: mtvsrd v2, r3 @@ -757,12 +757,12 @@ define <8 x i16> @test8elt_signed(<8 x double>* nocapture readonly) local_unname ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: xscvdpsxws f4, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: xscvdpsxws f4, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 ; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f3 @@ -800,12 +800,12 @@ define <8 x i16> @test8elt_signed(<8 x double>* nocapture readonly) local_unname ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: xscvdpsxws f4, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: sldi r3, r3, 48 ; CHECK-BE-NEXT: mtvsrd v2, r3 @@ -950,18 +950,20 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; CHECK-P9-NEXT: lxv vs3, 0(r4) ; CHECK-P9-NEXT: lxv vs2, 16(r4) ; CHECK-P9-NEXT: lxv vs1, 32(r4) -; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: lxv vs0, 48(r4) +; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xscvdpsxws f5, f2 ; CHECK-P9-NEXT: xscvdpsxws f6, f1 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f7, f0 +; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r5, f4 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: xxswapd vs2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mffprwz r5, f4 ; CHECK-P9-NEXT: mtvsrd v2, r5 ; CHECK-P9-NEXT: mffprwz r5, f5 ; CHECK-P9-NEXT: mtvsrd v3, r5 @@ -971,8 +973,6 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; CHECK-P9-NEXT: mtvsrd v5, r5 ; CHECK-P9-NEXT: mffprwz r5, f3 ; CHECK-P9-NEXT: lxv vs3, 64(r4) -; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mtvsrd v0, r5 ; CHECK-P9-NEXT: mffprwz r5, f2 ; CHECK-P9-NEXT: lxv vs2, 80(r4) @@ -1032,30 +1032,30 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs4, 48(r4) +; CHECK-BE-NEXT: lxv vs3, 32(r4) +; CHECK-BE-NEXT: lxv vs2, 16(r4) +; CHECK-BE-NEXT: lxv vs1, 0(r4) ; CHECK-BE-NEXT: xscvdpsxws f5, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: lxv vs3, 32(r4) ; CHECK-BE-NEXT: xscvdpsxws f6, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r5, f5 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: lxv vs2, 16(r4) -; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f7, f2 +; CHECK-BE-NEXT: lxv vs0, 112(r4) ; CHECK-BE-NEXT: xxswapd vs2, vs2 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r5, f5 +; CHECK-BE-NEXT: sldi r5, r5, 48 ; CHECK-BE-NEXT: mtvsrd v2, r5 ; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: lxv vs1, 0(r4) ; CHECK-BE-NEXT: xscvdpsxws f4, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 +; CHECK-BE-NEXT: sldi r5, r5, 48 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mtvsrd v3, r5 ; CHECK-BE-NEXT: mffprwz r5, f6 ; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: lxv vs0, 112(r4) ; CHECK-BE-NEXT: vmrghh v2, v2, v3 ; CHECK-BE-NEXT: mtvsrd v3, r5 ; CHECK-BE-NEXT: mffprwz r5, f3 @@ -1087,12 +1087,15 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; CHECK-BE-NEXT: vmrghh v4, v4, v1 ; CHECK-BE-NEXT: mtvsrd v1, r5 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghh v5, v5, v1 ; CHECK-BE-NEXT: mffprwz r5, f0 ; CHECK-BE-NEXT: lxv vs0, 64(r4) +; CHECK-BE-NEXT: vmrghh v5, v5, v1 +; CHECK-BE-NEXT: sldi r5, r5, 48 ; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: mtvsrd v1, r5 ; CHECK-BE-NEXT: vmrghw v3, v5, v4 +; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: vmrghh v0, v0, v1 ; CHECK-BE-NEXT: xxmrghd vs3, v3, v2 ; CHECK-BE-NEXT: mtvsrd v2, r4 ; CHECK-BE-NEXT: mffprwz r4, f2 @@ -1100,10 +1103,12 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: sldi r4, r4, 48 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: stxv vs3, 0(r3) ; CHECK-BE-NEXT: mtvsrd v3, r4 ; CHECK-BE-NEXT: vmrghh v2, v2, v3 ; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: vmrghw v2, v2, v0 ; CHECK-BE-NEXT: mtvsrd v3, r4 ; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 @@ -1116,11 +1121,6 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1 ; CHECK-BE-NEXT: sldi r4, r4, 48 ; CHECK-BE-NEXT: mtvsrd v4, r4 ; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v1, r5 -; CHECK-BE-NEXT: vmrghh v0, v0, v1 -; CHECK-BE-NEXT: vmrghw v2, v2, v0 -; CHECK-BE-NEXT: stxv vs3, 0(r3) ; CHECK-BE-NEXT: sldi r4, r4, 48 ; CHECK-BE-NEXT: mtvsrd v5, r4 ; CHECK-BE-NEXT: vmrghh v4, v4, v5 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll index 173ced964ad62..c0d2dd35aeb85 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll @@ -129,10 +129,10 @@ define void @test8elt(<8 x i32>* noalias nocapture sret %agg.result, <8 x double ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs2, 0(r4) ; CHECK-P9-NEXT: lxv vs3, 16(r4) -; CHECK-P9-NEXT: xxmrgld vs4, vs3, vs2 -; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2 ; CHECK-P9-NEXT: lxv vs0, 32(r4) ; CHECK-P9-NEXT: lxv vs1, 48(r4) +; CHECK-P9-NEXT: xxmrgld vs4, vs3, vs2 +; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2 ; CHECK-P9-NEXT: xvcvdpuxws v2, vs4 ; CHECK-P9-NEXT: xvcvdpuxws v3, vs2 ; CHECK-P9-NEXT: xxmrgld vs2, vs1, vs0 @@ -149,10 +149,10 @@ define void @test8elt(<8 x i32>* noalias nocapture sret %agg.result, <8 x double ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs2, 16(r4) ; CHECK-BE-NEXT: lxv vs3, 0(r4) -; CHECK-BE-NEXT: xxmrgld vs4, vs3, vs2 -; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2 ; CHECK-BE-NEXT: lxv vs0, 48(r4) ; CHECK-BE-NEXT: lxv vs1, 32(r4) +; CHECK-BE-NEXT: xxmrgld vs4, vs3, vs2 +; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2 ; CHECK-BE-NEXT: xvcvdpuxws v2, vs4 ; CHECK-BE-NEXT: xvcvdpuxws v3, vs2 ; CHECK-BE-NEXT: xxmrgld vs2, vs1, vs0 @@ -227,23 +227,23 @@ define void @test16elt(<16 x i32>* noalias nocapture sret %agg.result, <16 x dou ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs6, 0(r4) ; CHECK-P9-NEXT: lxv vs7, 16(r4) -; CHECK-P9-NEXT: xxmrgld vs8, vs7, vs6 -; CHECK-P9-NEXT: xxmrghd vs6, vs7, vs6 ; CHECK-P9-NEXT: lxv vs4, 32(r4) ; CHECK-P9-NEXT: lxv vs5, 48(r4) +; CHECK-P9-NEXT: xxmrgld vs8, vs7, vs6 +; CHECK-P9-NEXT: xxmrghd vs6, vs7, vs6 ; CHECK-P9-NEXT: xxmrgld vs7, vs5, vs4 ; CHECK-P9-NEXT: xxmrghd vs4, vs5, vs4 -; CHECK-P9-NEXT: xvcvdpuxws v2, vs8 -; CHECK-P9-NEXT: xvcvdpuxws v3, vs6 ; CHECK-P9-NEXT: lxv vs2, 64(r4) ; CHECK-P9-NEXT: lxv vs3, 80(r4) +; CHECK-P9-NEXT: lxv vs0, 96(r4) +; CHECK-P9-NEXT: lxv vs1, 112(r4) +; CHECK-P9-NEXT: xvcvdpuxws v2, vs8 +; CHECK-P9-NEXT: xvcvdpuxws v3, vs6 ; CHECK-P9-NEXT: xvcvdpuxws v4, vs7 ; CHECK-P9-NEXT: vmrgew v2, v3, v2 ; CHECK-P9-NEXT: xvcvdpuxws v3, vs4 ; CHECK-P9-NEXT: xxmrgld vs4, vs3, vs2 ; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2 -; CHECK-P9-NEXT: lxv vs0, 96(r4) -; CHECK-P9-NEXT: lxv vs1, 112(r4) ; CHECK-P9-NEXT: stxv v2, 0(r3) ; CHECK-P9-NEXT: xvcvdpuxws v5, vs2 ; CHECK-P9-NEXT: xxmrgld vs2, vs1, vs0 @@ -263,23 +263,23 @@ define void @test16elt(<16 x i32>* noalias nocapture sret %agg.result, <16 x dou ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs6, 16(r4) ; CHECK-BE-NEXT: lxv vs7, 0(r4) -; CHECK-BE-NEXT: xxmrgld vs8, vs7, vs6 -; CHECK-BE-NEXT: xxmrghd vs6, vs7, vs6 ; CHECK-BE-NEXT: lxv vs4, 48(r4) ; CHECK-BE-NEXT: lxv vs5, 32(r4) +; CHECK-BE-NEXT: xxmrgld vs8, vs7, vs6 +; CHECK-BE-NEXT: xxmrghd vs6, vs7, vs6 ; CHECK-BE-NEXT: xxmrgld vs7, vs5, vs4 ; CHECK-BE-NEXT: xxmrghd vs4, vs5, vs4 -; CHECK-BE-NEXT: xvcvdpuxws v2, vs8 -; CHECK-BE-NEXT: xvcvdpuxws v3, vs6 ; CHECK-BE-NEXT: lxv vs2, 80(r4) ; CHECK-BE-NEXT: lxv vs3, 64(r4) +; CHECK-BE-NEXT: lxv vs0, 112(r4) +; CHECK-BE-NEXT: lxv vs1, 96(r4) +; CHECK-BE-NEXT: xvcvdpuxws v2, vs8 +; CHECK-BE-NEXT: xvcvdpuxws v3, vs6 ; CHECK-BE-NEXT: xvcvdpuxws v4, vs7 ; CHECK-BE-NEXT: vmrgew v2, v3, v2 ; CHECK-BE-NEXT: xvcvdpuxws v3, vs4 ; CHECK-BE-NEXT: xxmrgld vs4, vs3, vs2 ; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2 -; CHECK-BE-NEXT: lxv vs0, 112(r4) -; CHECK-BE-NEXT: lxv vs1, 96(r4) ; CHECK-BE-NEXT: stxv v2, 0(r3) ; CHECK-BE-NEXT: xvcvdpuxws v5, vs2 ; CHECK-BE-NEXT: xxmrgld vs2, vs1, vs0 @@ -421,10 +421,10 @@ define void @test8elt_signed(<8 x i32>* noalias nocapture sret %agg.result, <8 x ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs2, 0(r4) ; CHECK-P9-NEXT: lxv vs3, 16(r4) -; CHECK-P9-NEXT: xxmrgld vs4, vs3, vs2 -; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2 ; CHECK-P9-NEXT: lxv vs0, 32(r4) ; CHECK-P9-NEXT: lxv vs1, 48(r4) +; CHECK-P9-NEXT: xxmrgld vs4, vs3, vs2 +; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2 ; CHECK-P9-NEXT: xvcvdpsxws v2, vs4 ; CHECK-P9-NEXT: xvcvdpsxws v3, vs2 ; CHECK-P9-NEXT: xxmrgld vs2, vs1, vs0 @@ -441,10 +441,10 @@ define void @test8elt_signed(<8 x i32>* noalias nocapture sret %agg.result, <8 x ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs2, 16(r4) ; CHECK-BE-NEXT: lxv vs3, 0(r4) -; CHECK-BE-NEXT: xxmrgld vs4, vs3, vs2 -; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2 ; CHECK-BE-NEXT: lxv vs0, 48(r4) ; CHECK-BE-NEXT: lxv vs1, 32(r4) +; CHECK-BE-NEXT: xxmrgld vs4, vs3, vs2 +; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2 ; CHECK-BE-NEXT: xvcvdpsxws v2, vs4 ; CHECK-BE-NEXT: xvcvdpsxws v3, vs2 ; CHECK-BE-NEXT: xxmrgld vs2, vs1, vs0 @@ -519,23 +519,23 @@ define void @test16elt_signed(<16 x i32>* noalias nocapture sret %agg.result, <1 ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs6, 0(r4) ; CHECK-P9-NEXT: lxv vs7, 16(r4) -; CHECK-P9-NEXT: xxmrgld vs8, vs7, vs6 -; CHECK-P9-NEXT: xxmrghd vs6, vs7, vs6 ; CHECK-P9-NEXT: lxv vs4, 32(r4) ; CHECK-P9-NEXT: lxv vs5, 48(r4) +; CHECK-P9-NEXT: xxmrgld vs8, vs7, vs6 +; CHECK-P9-NEXT: xxmrghd vs6, vs7, vs6 ; CHECK-P9-NEXT: xxmrgld vs7, vs5, vs4 ; CHECK-P9-NEXT: xxmrghd vs4, vs5, vs4 -; CHECK-P9-NEXT: xvcvdpsxws v2, vs8 -; CHECK-P9-NEXT: xvcvdpsxws v3, vs6 ; CHECK-P9-NEXT: lxv vs2, 64(r4) ; CHECK-P9-NEXT: lxv vs3, 80(r4) +; CHECK-P9-NEXT: lxv vs0, 96(r4) +; CHECK-P9-NEXT: lxv vs1, 112(r4) +; CHECK-P9-NEXT: xvcvdpsxws v2, vs8 +; CHECK-P9-NEXT: xvcvdpsxws v3, vs6 ; CHECK-P9-NEXT: xvcvdpsxws v4, vs7 ; CHECK-P9-NEXT: vmrgew v2, v3, v2 ; CHECK-P9-NEXT: xvcvdpsxws v3, vs4 ; CHECK-P9-NEXT: xxmrgld vs4, vs3, vs2 ; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2 -; CHECK-P9-NEXT: lxv vs0, 96(r4) -; CHECK-P9-NEXT: lxv vs1, 112(r4) ; CHECK-P9-NEXT: stxv v2, 0(r3) ; CHECK-P9-NEXT: xvcvdpsxws v5, vs2 ; CHECK-P9-NEXT: xxmrgld vs2, vs1, vs0 @@ -555,23 +555,23 @@ define void @test16elt_signed(<16 x i32>* noalias nocapture sret %agg.result, <1 ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs6, 16(r4) ; CHECK-BE-NEXT: lxv vs7, 0(r4) -; CHECK-BE-NEXT: xxmrgld vs8, vs7, vs6 -; CHECK-BE-NEXT: xxmrghd vs6, vs7, vs6 ; CHECK-BE-NEXT: lxv vs4, 48(r4) ; CHECK-BE-NEXT: lxv vs5, 32(r4) +; CHECK-BE-NEXT: xxmrgld vs8, vs7, vs6 +; CHECK-BE-NEXT: xxmrghd vs6, vs7, vs6 ; CHECK-BE-NEXT: xxmrgld vs7, vs5, vs4 ; CHECK-BE-NEXT: xxmrghd vs4, vs5, vs4 -; CHECK-BE-NEXT: xvcvdpsxws v2, vs8 -; CHECK-BE-NEXT: xvcvdpsxws v3, vs6 ; CHECK-BE-NEXT: lxv vs2, 80(r4) ; CHECK-BE-NEXT: lxv vs3, 64(r4) +; CHECK-BE-NEXT: lxv vs0, 112(r4) +; CHECK-BE-NEXT: lxv vs1, 96(r4) +; CHECK-BE-NEXT: xvcvdpsxws v2, vs8 +; CHECK-BE-NEXT: xvcvdpsxws v3, vs6 ; CHECK-BE-NEXT: xvcvdpsxws v4, vs7 ; CHECK-BE-NEXT: vmrgew v2, v3, v2 ; CHECK-BE-NEXT: xvcvdpsxws v3, vs4 ; CHECK-BE-NEXT: xxmrgld vs4, vs3, vs2 ; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2 -; CHECK-BE-NEXT: lxv vs0, 112(r4) -; CHECK-BE-NEXT: lxv vs1, 96(r4) ; CHECK-BE-NEXT: stxv v2, 0(r3) ; CHECK-BE-NEXT: xvcvdpsxws v5, vs2 ; CHECK-BE-NEXT: xxmrgld vs2, vs1, vs0 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll index fd28d9a1afdc3..603572b19e1be 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll @@ -96,10 +96,10 @@ define i32 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 { ; CHECK-P9-LABEL: test4elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 @@ -121,10 +121,10 @@ define i32 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 { ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: mtvsrd v2, r3 @@ -205,12 +205,12 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 { ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: xscvdpsxws f4, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: xscvdpsxws f4, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 ; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f3 @@ -249,12 +249,12 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 { ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: xscvdpsxws f4, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: mtvsrd v2, r3 @@ -398,16 +398,16 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs7, 0(r3) -; CHECK-P9-NEXT: xscvdpsxws f8, f7 -; CHECK-P9-NEXT: xxswapd vs7, vs7 -; CHECK-P9-NEXT: xscvdpsxws f7, f7 ; CHECK-P9-NEXT: lxv vs6, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 112(r3) ; CHECK-P9-NEXT: lxv vs1, 96(r3) +; CHECK-P9-NEXT: xscvdpsxws f8, f7 +; CHECK-P9-NEXT: xxswapd vs7, vs7 ; CHECK-P9-NEXT: lxv vs2, 80(r3) ; CHECK-P9-NEXT: lxv vs3, 64(r3) ; CHECK-P9-NEXT: lxv vs4, 48(r3) ; CHECK-P9-NEXT: lxv vs5, 32(r3) +; CHECK-P9-NEXT: xscvdpsxws f7, f7 ; CHECK-P9-NEXT: mffprwz r3, f8 ; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f7 @@ -481,16 +481,16 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs7, 112(r3) -; CHECK-BE-NEXT: xscvdpsxws f8, f7 -; CHECK-BE-NEXT: xxswapd vs7, vs7 -; CHECK-BE-NEXT: xscvdpsxws f7, f7 ; CHECK-BE-NEXT: lxv vs6, 96(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: xscvdpsxws f8, f7 +; CHECK-BE-NEXT: xxswapd vs7, vs7 ; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs4, 64(r3) ; CHECK-BE-NEXT: lxv vs5, 80(r3) +; CHECK-BE-NEXT: xscvdpsxws f7, f7 ; CHECK-BE-NEXT: mffprwz r3, f8 ; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: mtvsrd v2, r3 @@ -669,10 +669,10 @@ define i32 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr ; CHECK-P9-LABEL: test4elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 @@ -694,10 +694,10 @@ define i32 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr ; CHECK-BE-LABEL: test4elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: mtvsrd v2, r3 @@ -778,12 +778,12 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: xscvdpsxws f4, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: xscvdpsxws f4, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 ; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f3 @@ -822,12 +822,12 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: xscvdpsxws f4, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: mtvsrd v2, r3 @@ -971,16 +971,16 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs7, 0(r3) -; CHECK-P9-NEXT: xscvdpsxws f8, f7 -; CHECK-P9-NEXT: xxswapd vs7, vs7 -; CHECK-P9-NEXT: xscvdpsxws f7, f7 ; CHECK-P9-NEXT: lxv vs6, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 112(r3) ; CHECK-P9-NEXT: lxv vs1, 96(r3) +; CHECK-P9-NEXT: xscvdpsxws f8, f7 +; CHECK-P9-NEXT: xxswapd vs7, vs7 ; CHECK-P9-NEXT: lxv vs2, 80(r3) ; CHECK-P9-NEXT: lxv vs3, 64(r3) ; CHECK-P9-NEXT: lxv vs4, 48(r3) ; CHECK-P9-NEXT: lxv vs5, 32(r3) +; CHECK-P9-NEXT: xscvdpsxws f7, f7 ; CHECK-P9-NEXT: mffprwz r3, f8 ; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f7 @@ -1054,16 +1054,16 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs7, 112(r3) -; CHECK-BE-NEXT: xscvdpsxws f8, f7 -; CHECK-BE-NEXT: xxswapd vs7, vs7 -; CHECK-BE-NEXT: xscvdpsxws f7, f7 ; CHECK-BE-NEXT: lxv vs6, 96(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: xscvdpsxws f8, f7 +; CHECK-BE-NEXT: xxswapd vs7, vs7 ; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs4, 64(r3) ; CHECK-BE-NEXT: lxv vs5, 80(r3) +; CHECK-BE-NEXT: xscvdpsxws f7, f7 ; CHECK-BE-NEXT: mffprwz r3, f8 ; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: mtvsrd v2, r3 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll index 5ecd34941b39c..60fb0c29b5588 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll @@ -40,9 +40,9 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: li r3, 2 ; CHECK-P9-NEXT: xscvuxdsp f0, f0 -; CHECK-P9-NEXT: xscvdpspn vs0, f0 ; CHECK-P9-NEXT: vextuhrx r3, r3, v2 ; CHECK-P9-NEXT: clrlwi r3, r3, 16 +; CHECK-P9-NEXT: xscvdpspn vs0, f0 ; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 ; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: xscvuxdsp f0, f0 @@ -98,9 +98,9 @@ define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 { ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrd v2, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r3 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: vperm v2, v2, v4, v3 ; CHECK-BE-NEXT: xvcvuxwsp v2, v2 ; CHECK-BE-NEXT: blr @@ -137,9 +137,9 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i16> ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI2_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI2_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI2_1@toc@l ; CHECK-BE-NEXT: vperm v3, v2, v4, v3 @@ -190,9 +190,9 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i ; CHECK-P9-NEXT: lxv v2, 16(r4) ; CHECK-P9-NEXT: lxv v3, 0(r4) ; CHECK-P9-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; CHECK-P9-NEXT: xxlxor v5, v5, v5 ; CHECK-P9-NEXT: addi r4, r4, .LCPI3_0@toc@l ; CHECK-P9-NEXT: lxvx v4, 0, r4 -; CHECK-P9-NEXT: xxlxor v5, v5, v5 ; CHECK-P9-NEXT: addis r4, r2, .LCPI3_1@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI3_1@toc@l ; CHECK-P9-NEXT: vperm v0, v5, v3, v4 @@ -215,9 +215,9 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i ; CHECK-BE-NEXT: lxv v2, 16(r4) ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; CHECK-BE-NEXT: xxlxor v5, v5, v5 ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_0@toc@l ; CHECK-BE-NEXT: lxvx v4, 0, r4 -; CHECK-BE-NEXT: xxlxor v5, v5, v5 ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_1@toc@l ; CHECK-BE-NEXT: vperm v0, v3, v5, v4 @@ -272,9 +272,9 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: mtfprwa f0, r3 ; CHECK-P9-NEXT: li r3, 2 ; CHECK-P9-NEXT: xscvsxdsp f0, f0 -; CHECK-P9-NEXT: xscvdpspn vs0, f0 ; CHECK-P9-NEXT: vextuhrx r3, r3, v2 ; CHECK-P9-NEXT: extsh r3, r3 +; CHECK-P9-NEXT: xscvdpspn vs0, f0 ; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 ; CHECK-P9-NEXT: mtfprwa f0, r3 ; CHECK-P9-NEXT: xscvsxdsp f0, f0 @@ -375,9 +375,9 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, <8 ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: vperm v3, v4, v2, v3 ; CHECK-BE-NEXT: vmrghh v2, v2, v2 ; CHECK-BE-NEXT: vextsh2w v3, v3 @@ -432,10 +432,10 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result, ; CHECK-P9-NEXT: vmrglh v4, v3, v3 ; CHECK-P9-NEXT: vmrghh v3, v3, v3 ; CHECK-P9-NEXT: vextsh2w v3, v3 +; CHECK-P9-NEXT: vextsh2w v4, v4 ; CHECK-P9-NEXT: xvcvsxwsp vs1, v3 ; CHECK-P9-NEXT: vmrglh v3, v2, v2 ; CHECK-P9-NEXT: vmrghh v2, v2, v2 -; CHECK-P9-NEXT: vextsh2w v4, v4 ; CHECK-P9-NEXT: xvcvsxwsp vs0, v4 ; CHECK-P9-NEXT: vextsh2w v3, v3 ; CHECK-P9-NEXT: vextsh2w v2, v2 @@ -452,9 +452,9 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result, ; CHECK-BE-NEXT: lxv v2, 16(r4) ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_0@toc@ha +; CHECK-BE-NEXT: xxlxor v5, v5, v5 ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_0@toc@l ; CHECK-BE-NEXT: lxvx v4, 0, r4 -; CHECK-BE-NEXT: xxlxor v5, v5, v5 ; CHECK-BE-NEXT: vperm v0, v5, v3, v4 ; CHECK-BE-NEXT: vperm v4, v5, v2, v4 ; CHECK-BE-NEXT: vmrghh v3, v3, v3 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll index ea8ede3af22a0..903b492e33963 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll @@ -25,9 +25,9 @@ define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: mtvsrws v2, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r3 -; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: vperm v2, v4, v2, v3 ; CHECK-P9-NEXT: xvcvuxddp v2, v2 ; CHECK-P9-NEXT: blr @@ -36,9 +36,9 @@ define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 { ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrws v2, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r3 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: vperm v2, v2, v4, v3 ; CHECK-BE-NEXT: xvcvuxddp v2, v2 ; CHECK-BE-NEXT: blr @@ -74,9 +74,9 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i64 %a.c ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI1_0@toc@ha +; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addi r4, r4, .LCPI1_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r4 -; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI1_1@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI1_1@toc@l ; CHECK-P9-NEXT: vperm v3, v4, v2, v3 @@ -92,9 +92,9 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i64 %a.c ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrd v2, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI1_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI1_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI1_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI1_1@toc@l ; CHECK-BE-NEXT: vperm v3, v2, v4, v3 @@ -152,9 +152,9 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, <8 x i16 ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r4 -; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI2_1@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI2_1@toc@l ; CHECK-P9-NEXT: vperm v3, v4, v2, v3 @@ -181,9 +181,9 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, <8 x i16 ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI2_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI2_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI2_1@toc@l ; CHECK-BE-NEXT: vperm v3, v2, v4, v3 @@ -276,9 +276,9 @@ define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x ; CHECK-P9-NEXT: lxv v2, 16(r4) ; CHECK-P9-NEXT: lxv v3, 0(r4) ; CHECK-P9-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; CHECK-P9-NEXT: xxlxor v5, v5, v5 ; CHECK-P9-NEXT: addi r4, r4, .LCPI3_0@toc@l ; CHECK-P9-NEXT: lxvx v4, 0, r4 -; CHECK-P9-NEXT: xxlxor v5, v5, v5 ; CHECK-P9-NEXT: addis r4, r2, .LCPI3_1@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI3_1@toc@l ; CHECK-P9-NEXT: vperm v0, v5, v3, v4 @@ -319,9 +319,9 @@ define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x ; CHECK-BE-NEXT: lxv v2, 16(r4) ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; CHECK-BE-NEXT: xxlxor v5, v5, v5 ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_0@toc@l ; CHECK-BE-NEXT: lxvx v4, 0, r4 -; CHECK-BE-NEXT: xxlxor v5, v5, v5 ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_1@toc@l ; CHECK-BE-NEXT: vperm v0, v3, v5, v4 @@ -459,13 +459,13 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrd v2, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI5_0@toc@ha +; CHECK-BE-NEXT: xxlxor v3, v3, v3 ; CHECK-BE-NEXT: addi r4, r4, .LCPI5_0@toc@l ; CHECK-BE-NEXT: lxvx v4, 0, r4 -; CHECK-BE-NEXT: xxlxor v3, v3, v3 -; CHECK-BE-NEXT: vperm v3, v3, v2, v4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI5_1@toc@ha -; CHECK-BE-NEXT: vextsh2d v3, v3 ; CHECK-BE-NEXT: addi r4, r4, .LCPI5_1@toc@l +; CHECK-BE-NEXT: vperm v3, v3, v2, v4 +; CHECK-BE-NEXT: vextsh2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs0, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: vperm v2, v2, v2, v3 @@ -564,12 +564,12 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, < ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 -; CHECK-BE-NEXT: vperm v3, v4, v2, v3 ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_1@toc@l +; CHECK-BE-NEXT: vperm v3, v4, v2, v3 ; CHECK-BE-NEXT: vextsh2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs0, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 @@ -680,8 +680,8 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: addis r5, r2, .LCPI7_0@toc@ha -; CHECK-P9-NEXT: addi r5, r5, .LCPI7_0@toc@l ; CHECK-P9-NEXT: lxv v2, 0(r4) +; CHECK-P9-NEXT: addi r5, r5, .LCPI7_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r5 ; CHECK-P9-NEXT: addis r5, r2, .LCPI7_1@toc@ha ; CHECK-P9-NEXT: addi r5, r5, .LCPI7_1@toc@l @@ -700,16 +700,17 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, ; CHECK-P9-NEXT: xvcvsxddp vs1, v4 ; CHECK-P9-NEXT: vperm v4, v2, v2, v0 ; CHECK-P9-NEXT: vperm v2, v2, v2, v1 +; CHECK-P9-NEXT: stxv vs0, 0(r3) ; CHECK-P9-NEXT: vextsh2d v4, v4 ; CHECK-P9-NEXT: xvcvsxddp vs2, v4 ; CHECK-P9-NEXT: lxv v4, 16(r4) +; CHECK-P9-NEXT: stxv vs1, 16(r3) ; CHECK-P9-NEXT: vextsh2d v2, v2 ; CHECK-P9-NEXT: xvcvsxddp vs3, v2 ; CHECK-P9-NEXT: vperm v2, v4, v4, v3 ; CHECK-P9-NEXT: stxv vs2, 32(r3) ; CHECK-P9-NEXT: vextsh2d v2, v2 ; CHECK-P9-NEXT: stxv vs3, 48(r3) -; CHECK-P9-NEXT: stxv vs1, 16(r3) ; CHECK-P9-NEXT: xvcvsxddp vs4, v2 ; CHECK-P9-NEXT: vperm v2, v4, v4, v5 ; CHECK-P9-NEXT: vextsh2d v2, v2 @@ -720,60 +721,59 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, ; CHECK-P9-NEXT: xvcvsxddp vs6, v2 ; CHECK-P9-NEXT: vperm v2, v4, v4, v1 ; CHECK-P9-NEXT: stxv vs5, 80(r3) -; CHECK-P9-NEXT: stxv vs6, 96(r3) ; CHECK-P9-NEXT: vextsh2d v2, v2 ; CHECK-P9-NEXT: xvcvsxddp vs7, v2 +; CHECK-P9-NEXT: stxv vs6, 96(r3) ; CHECK-P9-NEXT: stxv vs7, 112(r3) -; CHECK-P9-NEXT: stxv vs0, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r5, r2, .LCPI7_0@toc@ha -; CHECK-BE-NEXT: addi r5, r5, .LCPI7_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r5 ; CHECK-BE-NEXT: lxv v4, 0(r4) ; CHECK-BE-NEXT: lxv v1, 16(r4) +; CHECK-BE-NEXT: xxlxor v5, v5, v5 +; CHECK-BE-NEXT: addis r4, r2, .LCPI7_2@toc@ha +; CHECK-BE-NEXT: addi r5, r5, .LCPI7_0@toc@l +; CHECK-BE-NEXT: addi r4, r4, .LCPI7_2@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r5 ; CHECK-BE-NEXT: addis r5, r2, .LCPI7_1@toc@ha ; CHECK-BE-NEXT: addi r5, r5, .LCPI7_1@toc@l -; CHECK-BE-NEXT: addis r4, r2, .LCPI7_2@toc@ha -; CHECK-BE-NEXT: xxlxor v5, v5, v5 -; CHECK-BE-NEXT: vperm v0, v5, v4, v2 ; CHECK-BE-NEXT: lxvx v3, 0, r5 +; CHECK-BE-NEXT: vperm v0, v5, v4, v2 ; CHECK-BE-NEXT: vperm v2, v5, v1, v2 ; CHECK-BE-NEXT: vextsh2d v2, v2 -; CHECK-BE-NEXT: addi r4, r4, .LCPI7_2@toc@l ; CHECK-BE-NEXT: vextsh2d v0, v0 ; CHECK-BE-NEXT: xvcvsxddp vs2, v2 ; CHECK-BE-NEXT: vperm v2, v5, v1, v3 +; CHECK-BE-NEXT: xvcvsxddp vs0, v0 +; CHECK-BE-NEXT: vperm v0, v5, v4, v3 ; CHECK-BE-NEXT: vextsh2d v2, v2 -; CHECK-BE-NEXT: stxv vs2, 80(r3) +; CHECK-BE-NEXT: vextsh2d v0, v0 ; CHECK-BE-NEXT: xvcvsxddp vs3, v2 ; CHECK-BE-NEXT: lxvx v2, 0, r4 -; CHECK-BE-NEXT: xvcvsxddp vs0, v0 -; CHECK-BE-NEXT: vperm v0, v5, v4, v3 -; CHECK-BE-NEXT: vperm v3, v4, v4, v2 ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_3@toc@ha -; CHECK-BE-NEXT: vextsh2d v0, v0 ; CHECK-BE-NEXT: xvcvsxddp vs1, v0 +; CHECK-BE-NEXT: addi r4, r4, .LCPI7_3@toc@l +; CHECK-BE-NEXT: stxv vs2, 80(r3) +; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: vperm v3, v4, v4, v2 +; CHECK-BE-NEXT: vperm v2, v1, v1, v2 +; CHECK-BE-NEXT: stxv vs3, 112(r3) ; CHECK-BE-NEXT: stxv vs1, 48(r3) ; CHECK-BE-NEXT: vextsh2d v3, v3 -; CHECK-BE-NEXT: addi r4, r4, .LCPI7_3@toc@l +; CHECK-BE-NEXT: vextsh2d v2, v2 ; CHECK-BE-NEXT: xvcvsxddp vs4, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: vperm v2, v1, v1, v2 -; CHECK-BE-NEXT: vextsh2d v2, v2 ; CHECK-BE-NEXT: xvcvsxddp vs6, v2 -; CHECK-BE-NEXT: vperm v2, v1, v1, v3 ; CHECK-BE-NEXT: vperm v4, v4, v4, v3 +; CHECK-BE-NEXT: vperm v2, v1, v1, v3 +; CHECK-BE-NEXT: stxv vs6, 64(r3) +; CHECK-BE-NEXT: stxv vs4, 0(r3) ; CHECK-BE-NEXT: vextsh2d v4, v4 ; CHECK-BE-NEXT: vextsh2d v2, v2 -; CHECK-BE-NEXT: xvcvsxddp vs7, v2 ; CHECK-BE-NEXT: xvcvsxddp vs5, v4 -; CHECK-BE-NEXT: stxv vs3, 112(r3) -; CHECK-BE-NEXT: stxv vs6, 64(r3) -; CHECK-BE-NEXT: stxv vs0, 16(r3) -; CHECK-BE-NEXT: stxv vs4, 0(r3) +; CHECK-BE-NEXT: xvcvsxddp vs7, v2 ; CHECK-BE-NEXT: stxv vs7, 96(r3) ; CHECK-BE-NEXT: stxv vs5, 32(r3) ; CHECK-BE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll index 18957e6b59a2f..71a1718ab8776 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll @@ -106,8 +106,8 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, <8 x i32 ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r4) -; CHECK-P9-NEXT: xxmrglw v2, vs1, vs1 ; CHECK-P9-NEXT: lxv vs0, 16(r4) +; CHECK-P9-NEXT: xxmrglw v2, vs1, vs1 ; CHECK-P9-NEXT: xvcvuxwdp vs2, v2 ; CHECK-P9-NEXT: xxmrghw v2, vs1, vs1 ; CHECK-P9-NEXT: xvcvuxwdp vs1, v2 @@ -124,8 +124,8 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, <8 x i32 ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 0(r4) -; CHECK-BE-NEXT: xxmrghw v2, vs1, vs1 ; CHECK-BE-NEXT: lxv vs0, 16(r4) +; CHECK-BE-NEXT: xxmrghw v2, vs1, vs1 ; CHECK-BE-NEXT: xvcvuxwdp vs2, v2 ; CHECK-BE-NEXT: xxmrglw v2, vs1, vs1 ; CHECK-BE-NEXT: xvcvuxwdp vs1, v2 @@ -196,12 +196,12 @@ define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs0, 0(r4) -; CHECK-P9-NEXT: xxmrglw v2, vs0, vs0 ; CHECK-P9-NEXT: lxv vs2, 16(r4) +; CHECK-P9-NEXT: lxv vs5, 32(r4) ; CHECK-P9-NEXT: lxv vs4, 48(r4) +; CHECK-P9-NEXT: xxmrglw v2, vs0, vs0 ; CHECK-P9-NEXT: xvcvuxwdp vs1, v2 ; CHECK-P9-NEXT: xxmrghw v2, vs0, vs0 -; CHECK-P9-NEXT: lxv vs5, 32(r4) ; CHECK-P9-NEXT: xvcvuxwdp vs0, v2 ; CHECK-P9-NEXT: xxmrglw v2, vs2, vs2 ; CHECK-P9-NEXT: xvcvuxwdp vs3, v2 @@ -228,12 +228,12 @@ define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs0, 0(r4) -; CHECK-BE-NEXT: xxmrghw v2, vs0, vs0 ; CHECK-BE-NEXT: lxv vs2, 16(r4) +; CHECK-BE-NEXT: lxv vs5, 32(r4) ; CHECK-BE-NEXT: lxv vs4, 48(r4) +; CHECK-BE-NEXT: xxmrghw v2, vs0, vs0 ; CHECK-BE-NEXT: xvcvuxwdp vs1, v2 ; CHECK-BE-NEXT: xxmrglw v2, vs0, vs0 -; CHECK-BE-NEXT: lxv vs5, 32(r4) ; CHECK-BE-NEXT: xvcvuxwdp vs0, v2 ; CHECK-BE-NEXT: xxmrghw v2, vs2, vs2 ; CHECK-BE-NEXT: xvcvuxwdp vs3, v2 @@ -360,8 +360,8 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, < ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r4) -; CHECK-P9-NEXT: xxmrglw v2, vs1, vs1 ; CHECK-P9-NEXT: lxv vs0, 16(r4) +; CHECK-P9-NEXT: xxmrglw v2, vs1, vs1 ; CHECK-P9-NEXT: xvcvsxwdp vs2, v2 ; CHECK-P9-NEXT: xxmrghw v2, vs1, vs1 ; CHECK-P9-NEXT: xvcvsxwdp vs1, v2 @@ -378,8 +378,8 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, < ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 0(r4) -; CHECK-BE-NEXT: xxmrghw v2, vs1, vs1 ; CHECK-BE-NEXT: lxv vs0, 16(r4) +; CHECK-BE-NEXT: xxmrghw v2, vs1, vs1 ; CHECK-BE-NEXT: xvcvsxwdp vs2, v2 ; CHECK-BE-NEXT: xxmrglw v2, vs1, vs1 ; CHECK-BE-NEXT: xvcvsxwdp vs1, v2 @@ -450,12 +450,12 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs0, 0(r4) -; CHECK-P9-NEXT: xxmrglw v2, vs0, vs0 ; CHECK-P9-NEXT: lxv vs2, 16(r4) +; CHECK-P9-NEXT: lxv vs5, 32(r4) ; CHECK-P9-NEXT: lxv vs4, 48(r4) +; CHECK-P9-NEXT: xxmrglw v2, vs0, vs0 ; CHECK-P9-NEXT: xvcvsxwdp vs1, v2 ; CHECK-P9-NEXT: xxmrghw v2, vs0, vs0 -; CHECK-P9-NEXT: lxv vs5, 32(r4) ; CHECK-P9-NEXT: xvcvsxwdp vs0, v2 ; CHECK-P9-NEXT: xxmrglw v2, vs2, vs2 ; CHECK-P9-NEXT: xvcvsxwdp vs3, v2 @@ -482,12 +482,12 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs0, 0(r4) -; CHECK-BE-NEXT: xxmrghw v2, vs0, vs0 ; CHECK-BE-NEXT: lxv vs2, 16(r4) +; CHECK-BE-NEXT: lxv vs5, 32(r4) ; CHECK-BE-NEXT: lxv vs4, 48(r4) +; CHECK-BE-NEXT: xxmrghw v2, vs0, vs0 ; CHECK-BE-NEXT: xvcvsxwdp vs1, v2 ; CHECK-BE-NEXT: xxmrglw v2, vs0, vs0 -; CHECK-BE-NEXT: lxv vs5, 32(r4) ; CHECK-BE-NEXT: xvcvsxwdp vs0, v2 ; CHECK-BE-NEXT: xxmrghw v2, vs2, vs2 ; CHECK-BE-NEXT: xvcvsxwdp vs3, v2 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll index f152c2b008ff2..2e757152e4285 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll @@ -74,8 +74,8 @@ define <4 x float> @test4elt(<4 x i64>* nocapture readonly) local_unnamed_addr # ; CHECK-P9-LABEL: test4elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv v3, 0(r3) -; CHECK-P9-NEXT: xvcvuxdsp vs0, v3 ; CHECK-P9-NEXT: lxv v2, 16(r3) +; CHECK-P9-NEXT: xvcvuxdsp vs0, v3 ; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvuxdsp vs0, v2 ; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3 @@ -85,8 +85,8 @@ define <4 x float> @test4elt(<4 x i64>* nocapture readonly) local_unnamed_addr # ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv v3, 16(r3) -; CHECK-BE-NEXT: xvcvuxdsp vs0, v3 ; CHECK-BE-NEXT: lxv v2, 0(r3) +; CHECK-BE-NEXT: xvcvuxdsp vs0, v3 ; CHECK-BE-NEXT: xxsldwi v3, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvuxdsp vs0, v2 ; CHECK-BE-NEXT: xxsldwi v2, vs0, vs0, 3 @@ -129,14 +129,14 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i64> ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv v5, 0(r4) -; CHECK-P9-NEXT: xvcvuxdsp vs0, v5 ; CHECK-P9-NEXT: lxv v4, 16(r4) +; CHECK-P9-NEXT: lxv v3, 32(r4) +; CHECK-P9-NEXT: lxv v2, 48(r4) +; CHECK-P9-NEXT: xvcvuxdsp vs0, v5 ; CHECK-P9-NEXT: xxsldwi v5, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvuxdsp vs0, v4 -; CHECK-P9-NEXT: lxv v3, 32(r4) ; CHECK-P9-NEXT: xxsldwi v4, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvuxdsp vs0, v3 -; CHECK-P9-NEXT: lxv v2, 48(r4) ; CHECK-P9-NEXT: vpkudum v3, v4, v5 ; CHECK-P9-NEXT: stxv v3, 0(r3) ; CHECK-P9-NEXT: xxsldwi v4, vs0, vs0, 3 @@ -149,14 +149,14 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i64> ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: xvcvuxdsp vs0, v5 ; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: lxv v3, 48(r4) +; CHECK-BE-NEXT: lxv v2, 32(r4) +; CHECK-BE-NEXT: xvcvuxdsp vs0, v5 ; CHECK-BE-NEXT: xxsldwi v5, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvuxdsp vs0, v4 -; CHECK-BE-NEXT: lxv v3, 48(r4) ; CHECK-BE-NEXT: xxsldwi v4, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvuxdsp vs0, v3 -; CHECK-BE-NEXT: lxv v2, 32(r4) ; CHECK-BE-NEXT: vpkudum v3, v4, v5 ; CHECK-BE-NEXT: stxv v3, 0(r3) ; CHECK-BE-NEXT: xxsldwi v4, vs0, vs0, 3 @@ -227,30 +227,30 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv v7, 0(r4) -; CHECK-P9-NEXT: xvcvuxdsp vs0, v7 ; CHECK-P9-NEXT: lxv v6, 16(r4) +; CHECK-P9-NEXT: lxv v1, 32(r4) +; CHECK-P9-NEXT: lxv v0, 48(r4) +; CHECK-P9-NEXT: xvcvuxdsp vs0, v7 +; CHECK-P9-NEXT: lxv v5, 64(r4) +; CHECK-P9-NEXT: lxv v4, 80(r4) +; CHECK-P9-NEXT: lxv v3, 96(r4) +; CHECK-P9-NEXT: lxv v2, 112(r4) ; CHECK-P9-NEXT: xxsldwi v7, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvuxdsp vs0, v6 -; CHECK-P9-NEXT: lxv v1, 32(r4) ; CHECK-P9-NEXT: xxsldwi v6, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvuxdsp vs0, v1 -; CHECK-P9-NEXT: lxv v0, 48(r4) ; CHECK-P9-NEXT: vpkudum v1, v6, v7 +; CHECK-P9-NEXT: stxv v1, 0(r3) ; CHECK-P9-NEXT: xxsldwi v6, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvuxdsp vs0, v0 -; CHECK-P9-NEXT: lxv v5, 64(r4) -; CHECK-P9-NEXT: stxv v1, 0(r3) ; CHECK-P9-NEXT: xxsldwi v0, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvuxdsp vs0, v5 -; CHECK-P9-NEXT: lxv v4, 80(r4) ; CHECK-P9-NEXT: vpkudum v0, v0, v6 ; CHECK-P9-NEXT: stxv v0, 16(r3) ; CHECK-P9-NEXT: xxsldwi v5, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvuxdsp vs0, v4 -; CHECK-P9-NEXT: lxv v3, 96(r4) ; CHECK-P9-NEXT: xxsldwi v4, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvuxdsp vs0, v3 -; CHECK-P9-NEXT: lxv v2, 112(r4) ; CHECK-P9-NEXT: vpkudum v4, v4, v5 ; CHECK-P9-NEXT: stxv v4, 32(r3) ; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 @@ -263,30 +263,30 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv v7, 16(r4) -; CHECK-BE-NEXT: xvcvuxdsp vs0, v7 ; CHECK-BE-NEXT: lxv v6, 0(r4) +; CHECK-BE-NEXT: lxv v1, 48(r4) +; CHECK-BE-NEXT: lxv v0, 32(r4) +; CHECK-BE-NEXT: xvcvuxdsp vs0, v7 +; CHECK-BE-NEXT: lxv v5, 80(r4) +; CHECK-BE-NEXT: lxv v4, 64(r4) +; CHECK-BE-NEXT: lxv v3, 112(r4) +; CHECK-BE-NEXT: lxv v2, 96(r4) ; CHECK-BE-NEXT: xxsldwi v7, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvuxdsp vs0, v6 -; CHECK-BE-NEXT: lxv v1, 48(r4) ; CHECK-BE-NEXT: xxsldwi v6, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvuxdsp vs0, v1 -; CHECK-BE-NEXT: lxv v0, 32(r4) ; CHECK-BE-NEXT: vpkudum v1, v6, v7 +; CHECK-BE-NEXT: stxv v1, 0(r3) ; CHECK-BE-NEXT: xxsldwi v6, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvuxdsp vs0, v0 -; CHECK-BE-NEXT: lxv v5, 80(r4) -; CHECK-BE-NEXT: stxv v1, 0(r3) ; CHECK-BE-NEXT: xxsldwi v0, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvuxdsp vs0, v5 -; CHECK-BE-NEXT: lxv v4, 64(r4) ; CHECK-BE-NEXT: vpkudum v0, v0, v6 ; CHECK-BE-NEXT: stxv v0, 16(r3) ; CHECK-BE-NEXT: xxsldwi v5, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvuxdsp vs0, v4 -; CHECK-BE-NEXT: lxv v3, 112(r4) ; CHECK-BE-NEXT: xxsldwi v4, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvuxdsp vs0, v3 -; CHECK-BE-NEXT: lxv v2, 96(r4) ; CHECK-BE-NEXT: vpkudum v4, v4, v5 ; CHECK-BE-NEXT: stxv v4, 32(r3) ; CHECK-BE-NEXT: xxsldwi v3, vs0, vs0, 3 @@ -367,8 +367,8 @@ define <4 x float> @test4elt_signed(<4 x i64>* nocapture readonly) local_unnamed ; CHECK-P9-LABEL: test4elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv v3, 0(r3) -; CHECK-P9-NEXT: xvcvsxdsp vs0, v3 ; CHECK-P9-NEXT: lxv v2, 16(r3) +; CHECK-P9-NEXT: xvcvsxdsp vs0, v3 ; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvsxdsp vs0, v2 ; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3 @@ -378,8 +378,8 @@ define <4 x float> @test4elt_signed(<4 x i64>* nocapture readonly) local_unnamed ; CHECK-BE-LABEL: test4elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv v3, 16(r3) -; CHECK-BE-NEXT: xvcvsxdsp vs0, v3 ; CHECK-BE-NEXT: lxv v2, 0(r3) +; CHECK-BE-NEXT: xvcvsxdsp vs0, v3 ; CHECK-BE-NEXT: xxsldwi v3, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvsxdsp vs0, v2 ; CHECK-BE-NEXT: xxsldwi v2, vs0, vs0, 3 @@ -422,14 +422,14 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, <8 ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv v5, 0(r4) -; CHECK-P9-NEXT: xvcvsxdsp vs0, v5 ; CHECK-P9-NEXT: lxv v4, 16(r4) +; CHECK-P9-NEXT: lxv v3, 32(r4) +; CHECK-P9-NEXT: lxv v2, 48(r4) +; CHECK-P9-NEXT: xvcvsxdsp vs0, v5 ; CHECK-P9-NEXT: xxsldwi v5, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvsxdsp vs0, v4 -; CHECK-P9-NEXT: lxv v3, 32(r4) ; CHECK-P9-NEXT: xxsldwi v4, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvsxdsp vs0, v3 -; CHECK-P9-NEXT: lxv v2, 48(r4) ; CHECK-P9-NEXT: vpkudum v3, v4, v5 ; CHECK-P9-NEXT: stxv v3, 0(r3) ; CHECK-P9-NEXT: xxsldwi v4, vs0, vs0, 3 @@ -442,14 +442,14 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, <8 ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: xvcvsxdsp vs0, v5 ; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: lxv v3, 48(r4) +; CHECK-BE-NEXT: lxv v2, 32(r4) +; CHECK-BE-NEXT: xvcvsxdsp vs0, v5 ; CHECK-BE-NEXT: xxsldwi v5, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvsxdsp vs0, v4 -; CHECK-BE-NEXT: lxv v3, 48(r4) ; CHECK-BE-NEXT: xxsldwi v4, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvsxdsp vs0, v3 -; CHECK-BE-NEXT: lxv v2, 32(r4) ; CHECK-BE-NEXT: vpkudum v3, v4, v5 ; CHECK-BE-NEXT: stxv v3, 0(r3) ; CHECK-BE-NEXT: xxsldwi v4, vs0, vs0, 3 @@ -520,30 +520,30 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result, ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv v7, 0(r4) -; CHECK-P9-NEXT: xvcvsxdsp vs0, v7 ; CHECK-P9-NEXT: lxv v6, 16(r4) +; CHECK-P9-NEXT: lxv v1, 32(r4) +; CHECK-P9-NEXT: lxv v0, 48(r4) +; CHECK-P9-NEXT: xvcvsxdsp vs0, v7 +; CHECK-P9-NEXT: lxv v5, 64(r4) +; CHECK-P9-NEXT: lxv v4, 80(r4) +; CHECK-P9-NEXT: lxv v3, 96(r4) +; CHECK-P9-NEXT: lxv v2, 112(r4) ; CHECK-P9-NEXT: xxsldwi v7, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvsxdsp vs0, v6 -; CHECK-P9-NEXT: lxv v1, 32(r4) ; CHECK-P9-NEXT: xxsldwi v6, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvsxdsp vs0, v1 -; CHECK-P9-NEXT: lxv v0, 48(r4) ; CHECK-P9-NEXT: vpkudum v1, v6, v7 +; CHECK-P9-NEXT: stxv v1, 0(r3) ; CHECK-P9-NEXT: xxsldwi v6, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvsxdsp vs0, v0 -; CHECK-P9-NEXT: lxv v5, 64(r4) -; CHECK-P9-NEXT: stxv v1, 0(r3) ; CHECK-P9-NEXT: xxsldwi v0, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvsxdsp vs0, v5 -; CHECK-P9-NEXT: lxv v4, 80(r4) ; CHECK-P9-NEXT: vpkudum v0, v0, v6 ; CHECK-P9-NEXT: stxv v0, 16(r3) ; CHECK-P9-NEXT: xxsldwi v5, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvsxdsp vs0, v4 -; CHECK-P9-NEXT: lxv v3, 96(r4) ; CHECK-P9-NEXT: xxsldwi v4, vs0, vs0, 3 ; CHECK-P9-NEXT: xvcvsxdsp vs0, v3 -; CHECK-P9-NEXT: lxv v2, 112(r4) ; CHECK-P9-NEXT: vpkudum v4, v4, v5 ; CHECK-P9-NEXT: stxv v4, 32(r3) ; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 @@ -556,30 +556,30 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result, ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv v7, 16(r4) -; CHECK-BE-NEXT: xvcvsxdsp vs0, v7 ; CHECK-BE-NEXT: lxv v6, 0(r4) +; CHECK-BE-NEXT: lxv v1, 48(r4) +; CHECK-BE-NEXT: lxv v0, 32(r4) +; CHECK-BE-NEXT: xvcvsxdsp vs0, v7 +; CHECK-BE-NEXT: lxv v5, 80(r4) +; CHECK-BE-NEXT: lxv v4, 64(r4) +; CHECK-BE-NEXT: lxv v3, 112(r4) +; CHECK-BE-NEXT: lxv v2, 96(r4) ; CHECK-BE-NEXT: xxsldwi v7, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvsxdsp vs0, v6 -; CHECK-BE-NEXT: lxv v1, 48(r4) ; CHECK-BE-NEXT: xxsldwi v6, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvsxdsp vs0, v1 -; CHECK-BE-NEXT: lxv v0, 32(r4) ; CHECK-BE-NEXT: vpkudum v1, v6, v7 +; CHECK-BE-NEXT: stxv v1, 0(r3) ; CHECK-BE-NEXT: xxsldwi v6, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvsxdsp vs0, v0 -; CHECK-BE-NEXT: lxv v5, 80(r4) -; CHECK-BE-NEXT: stxv v1, 0(r3) ; CHECK-BE-NEXT: xxsldwi v0, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvsxdsp vs0, v5 -; CHECK-BE-NEXT: lxv v4, 64(r4) ; CHECK-BE-NEXT: vpkudum v0, v0, v6 ; CHECK-BE-NEXT: stxv v0, 16(r3) ; CHECK-BE-NEXT: xxsldwi v5, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvsxdsp vs0, v4 -; CHECK-BE-NEXT: lxv v3, 112(r4) ; CHECK-BE-NEXT: xxsldwi v4, vs0, vs0, 3 ; CHECK-BE-NEXT: xvcvsxdsp vs0, v3 -; CHECK-BE-NEXT: lxv v2, 96(r4) ; CHECK-BE-NEXT: vpkudum v4, v4, v5 ; CHECK-BE-NEXT: stxv v4, 32(r3) ; CHECK-BE-NEXT: xxsldwi v3, vs0, vs0, 3 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll index f2cb9f5f45fbb..1962ffa653112 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll @@ -40,9 +40,9 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: li r3, 1 ; CHECK-P9-NEXT: xscvuxdsp f0, f0 -; CHECK-P9-NEXT: xscvdpspn vs0, f0 ; CHECK-P9-NEXT: vextubrx r3, r3, v2 ; CHECK-P9-NEXT: clrlwi r3, r3, 24 +; CHECK-P9-NEXT: xscvdpspn vs0, f0 ; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 ; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: xscvuxdsp f0, f0 @@ -93,9 +93,9 @@ define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 { ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: mtvsrws v2, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r3 -; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: vperm v2, v4, v2, v3 ; CHECK-P9-NEXT: xvcvuxwsp v2, v2 ; CHECK-P9-NEXT: blr @@ -104,9 +104,9 @@ define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 { ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrws v2, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r3 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: vperm v2, v2, v4, v3 ; CHECK-BE-NEXT: xvcvuxwsp v2, v2 ; CHECK-BE-NEXT: blr @@ -140,9 +140,9 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, i64 %a.co ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r4 -; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI2_1@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI2_1@toc@l ; CHECK-P9-NEXT: vperm v3, v4, v2, v3 @@ -158,9 +158,9 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, i64 %a.co ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrd v2, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI2_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI2_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI2_1@toc@l ; CHECK-BE-NEXT: vperm v3, v2, v4, v3 @@ -214,9 +214,9 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addi r4, r4, .LCPI3_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r4 -; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI3_1@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI3_1@toc@l ; CHECK-P9-NEXT: vperm v3, v4, v2, v3 @@ -243,9 +243,9 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_1@toc@l ; CHECK-BE-NEXT: vperm v3, v2, v4, v3 @@ -305,9 +305,9 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9-NEXT: mtfprwa f0, r3 ; CHECK-P9-NEXT: li r3, 1 ; CHECK-P9-NEXT: xscvsxdsp f0, f0 -; CHECK-P9-NEXT: xscvdpspn vs0, f0 ; CHECK-P9-NEXT: vextubrx r3, r3, v2 ; CHECK-P9-NEXT: extsb r3, r3 +; CHECK-P9-NEXT: xscvdpspn vs0, f0 ; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3 ; CHECK-P9-NEXT: mtfprwa f0, r3 ; CHECK-P9-NEXT: xscvsxdsp f0, f0 @@ -432,13 +432,13 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, i6 ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrd v2, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha +; CHECK-BE-NEXT: xxlxor v3, v3, v3 ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l ; CHECK-BE-NEXT: lxvx v4, 0, r4 -; CHECK-BE-NEXT: xxlxor v3, v3, v3 -; CHECK-BE-NEXT: vperm v3, v3, v2, v4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_1@toc@ha -; CHECK-BE-NEXT: vextsb2w v3, v3 ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_1@toc@l +; CHECK-BE-NEXT: vperm v3, v3, v2, v4 +; CHECK-BE-NEXT: vextsb2w v3, v3 ; CHECK-BE-NEXT: xvcvsxwsp vs0, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: vperm v2, v2, v2, v3 @@ -531,12 +531,12 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result, ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 -; CHECK-BE-NEXT: vperm v3, v4, v2, v3 ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_1@toc@l +; CHECK-BE-NEXT: vperm v3, v4, v2, v3 ; CHECK-BE-NEXT: vextsb2w v3, v3 ; CHECK-BE-NEXT: xvcvsxwsp vs0, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll index 268fc9b7d4cc8..c68fa812ffe54 100644 --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll @@ -25,9 +25,9 @@ define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 { ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: mtvsrws v2, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r3 -; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: vperm v2, v4, v2, v3 ; CHECK-P9-NEXT: xvcvuxddp v2, v2 ; CHECK-P9-NEXT: blr @@ -36,9 +36,9 @@ define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 { ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrws v2, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r3 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: vperm v2, v2, v4, v3 ; CHECK-BE-NEXT: xvcvuxddp v2, v2 ; CHECK-BE-NEXT: blr @@ -74,9 +74,9 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i32 %a.c ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: mtvsrws v2, r4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI1_0@toc@ha +; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addi r4, r4, .LCPI1_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r4 -; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI1_1@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI1_1@toc@l ; CHECK-P9-NEXT: vperm v3, v4, v2, v3 @@ -92,9 +92,9 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i32 %a.c ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrws v2, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI1_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI1_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI1_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI1_1@toc@l ; CHECK-BE-NEXT: vperm v3, v2, v4, v3 @@ -154,9 +154,9 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, i64 %a.c ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r4 -; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI2_1@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI2_1@toc@l ; CHECK-P9-NEXT: vperm v3, v4, v2, v3 @@ -184,9 +184,9 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, i64 %a.c ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrd v2, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI2_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI2_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI2_1@toc@l ; CHECK-BE-NEXT: vperm v3, v2, v4, v3 @@ -288,9 +288,9 @@ define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addi r4, r4, .LCPI3_0@toc@l ; CHECK-P9-NEXT: lxvx v3, 0, r4 -; CHECK-P9-NEXT: xxlxor v4, v4, v4 ; CHECK-P9-NEXT: addis r4, r2, .LCPI3_1@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI3_1@toc@l ; CHECK-P9-NEXT: vperm v3, v4, v2, v3 @@ -341,9 +341,9 @@ define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_1@toc@l ; CHECK-BE-NEXT: vperm v3, v2, v4, v3 @@ -492,13 +492,13 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrws v2, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI5_0@toc@ha +; CHECK-BE-NEXT: xxlxor v3, v3, v3 ; CHECK-BE-NEXT: addi r4, r4, .LCPI5_0@toc@l ; CHECK-BE-NEXT: lxvx v4, 0, r4 -; CHECK-BE-NEXT: xxlxor v3, v3, v3 -; CHECK-BE-NEXT: vperm v3, v3, v2, v4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI5_1@toc@ha -; CHECK-BE-NEXT: vextsb2d v3, v3 ; CHECK-BE-NEXT: addi r4, r4, .LCPI5_1@toc@l +; CHECK-BE-NEXT: vperm v3, v3, v2, v4 +; CHECK-BE-NEXT: vextsb2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs0, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: vperm v2, v2, v2, v3 @@ -600,13 +600,13 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, i ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrd v2, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: xxlxor v4, v4, v4 -; CHECK-BE-NEXT: vperm v3, v4, v2, v3 ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_1@toc@ha -; CHECK-BE-NEXT: vextsb2d v3, v3 ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_1@toc@l +; CHECK-BE-NEXT: vperm v3, v4, v2, v3 +; CHECK-BE-NEXT: vextsb2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs0, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_2@toc@ha @@ -787,12 +787,12 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_0@toc@ha +; CHECK-BE-NEXT: xxlxor v3, v3, v3 ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_0@toc@l ; CHECK-BE-NEXT: lxvx v4, 0, r4 -; CHECK-BE-NEXT: xxlxor v3, v3, v3 -; CHECK-BE-NEXT: vperm v4, v3, v2, v4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_1@toc@l +; CHECK-BE-NEXT: vperm v4, v3, v2, v4 ; CHECK-BE-NEXT: vextsb2d v4, v4 ; CHECK-BE-NEXT: xvcvsxddp vs0, v4 ; CHECK-BE-NEXT: lxvx v4, 0, r4 diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index 29955dc17f674..cefa662c0eae5 100644 --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -305,8 +305,8 @@ define <2 x double> @constrained_vector_frem_v2f64() #0 { ; PC64LE9-NEXT: addis 3, 2, .LCPI6_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI6_2@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 +; PC64LE9-NEXT: lfs 1, .LCPI6_2@toc@l(3) ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload @@ -390,24 +390,24 @@ define <3 x float> @constrained_vector_frem_v3f32() #0 { ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI7_2@toc@ha ; PC64LE9-NEXT: fmr 30, 1 -; PC64LE9-NEXT: lfs 1, .LCPI7_2@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 +; PC64LE9-NEXT: lfs 1, .LCPI7_2@toc@l(3) ; PC64LE9-NEXT: bl fmodf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI7_3@toc@ha ; PC64LE9-NEXT: fmr 29, 1 -; PC64LE9-NEXT: lfs 1, .LCPI7_3@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 +; PC64LE9-NEXT: lfs 1, .LCPI7_3@toc@l(3) ; PC64LE9-NEXT: bl fmodf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: addis 3, 2, .LCPI7_4@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI7_4@toc@l +; PC64LE9-NEXT: lxvx 36, 0, 3 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 29 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 -; PC64LE9-NEXT: addis 3, 2, .LCPI7_4@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI7_4@toc@l -; PC64LE9-NEXT: lxvx 36, 0, 3 ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 @@ -478,27 +478,27 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -80(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; PC64LE9-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfs 1, .LCPI8_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI8_1@toc@ha -; PC64LE9-NEXT: stfd 31, 72(1) # 8-byte Folded Spill ; PC64LE9-NEXT: lfs 31, .LCPI8_1@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI8_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI8_2@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 +; PC64LE9-NEXT: lfs 1, .LCPI8_2@toc@l(3) ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI8_3@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: xxmrghd 63, 1, 0 ; PC64LE9-NEXT: lfs 1, .LCPI8_3@toc@l(3) -; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 @@ -580,34 +580,34 @@ define <4 x double> @constrained_vector_frem_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -80(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI9_0@toc@ha +; PC64LE9-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfs 1, .LCPI9_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI9_1@toc@ha -; PC64LE9-NEXT: stfd 31, 72(1) # 8-byte Folded Spill ; PC64LE9-NEXT: lfs 31, .LCPI9_1@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI9_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI9_2@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 +; PC64LE9-NEXT: lfs 1, .LCPI9_2@toc@l(3) ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI9_3@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: xxmrghd 63, 1, 0 ; PC64LE9-NEXT: lfs 1, .LCPI9_3@toc@l(3) -; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI9_4@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI9_4@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 +; PC64LE9-NEXT: lfs 1, .LCPI9_4@toc@l(3) ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload @@ -935,23 +935,23 @@ define <3 x float> @constrained_vector_fadd_v3f32() #0 { ; PC64LE9-LABEL: constrained_vector_fadd_v3f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI17_0@toc@ha +; PC64LE9-NEXT: xxlxor 1, 1, 1 ; PC64LE9-NEXT: lfs 0, .LCPI17_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI17_1@toc@ha ; PC64LE9-NEXT: lfs 2, .LCPI17_1@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI17_2@toc@ha -; PC64LE9-NEXT: xsaddsp 2, 0, 2 ; PC64LE9-NEXT: lfs 3, .LCPI17_2@toc@l(3) -; PC64LE9-NEXT: xxlxor 1, 1, 1 +; PC64LE9-NEXT: addis 3, 2, .LCPI17_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI17_3@toc@l ; PC64LE9-NEXT: xsaddsp 1, 0, 1 +; PC64LE9-NEXT: lxvx 36, 0, 3 +; PC64LE9-NEXT: xsaddsp 2, 0, 2 ; PC64LE9-NEXT: xsaddsp 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 2 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: addis 3, 2, .LCPI17_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI17_3@toc@l -; PC64LE9-NEXT: lxvx 36, 0, 3 ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 @@ -990,9 +990,9 @@ define <3 x double> @constrained_vector_fadd_v3f64() #0 { ; PC64LE9-LABEL: constrained_vector_fadd_v3f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI18_0@toc@ha +; PC64LE9-NEXT: xxlxor 1, 1, 1 ; PC64LE9-NEXT: lfd 0, .LCPI18_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI18_1@toc@ha -; PC64LE9-NEXT: xxlxor 1, 1, 1 ; PC64LE9-NEXT: addi 3, 3, .LCPI18_1@toc@l ; PC64LE9-NEXT: xsadddp 3, 0, 1 ; PC64LE9-NEXT: lxvx 0, 0, 3 @@ -1147,23 +1147,23 @@ define <3 x float> @constrained_vector_fsub_v3f32() #0 { ; PC64LE9-LABEL: constrained_vector_fsub_v3f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI22_0@toc@ha +; PC64LE9-NEXT: xxlxor 1, 1, 1 ; PC64LE9-NEXT: lfs 0, .LCPI22_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI22_1@toc@ha ; PC64LE9-NEXT: lfs 2, .LCPI22_1@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI22_2@toc@ha -; PC64LE9-NEXT: xssubsp 2, 0, 2 ; PC64LE9-NEXT: lfs 3, .LCPI22_2@toc@l(3) -; PC64LE9-NEXT: xxlxor 1, 1, 1 +; PC64LE9-NEXT: addis 3, 2, .LCPI22_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI22_3@toc@l ; PC64LE9-NEXT: xssubsp 1, 0, 1 +; PC64LE9-NEXT: lxvx 36, 0, 3 +; PC64LE9-NEXT: xssubsp 2, 0, 2 ; PC64LE9-NEXT: xssubsp 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 2 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: addis 3, 2, .LCPI22_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI22_3@toc@l -; PC64LE9-NEXT: lxvx 36, 0, 3 ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 @@ -1202,9 +1202,9 @@ define <3 x double> @constrained_vector_fsub_v3f64() #0 { ; PC64LE9-LABEL: constrained_vector_fsub_v3f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI23_0@toc@ha +; PC64LE9-NEXT: xxlxor 1, 1, 1 ; PC64LE9-NEXT: lfd 0, .LCPI23_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI23_1@toc@ha -; PC64LE9-NEXT: xxlxor 1, 1, 1 ; PC64LE9-NEXT: addi 3, 3, .LCPI23_1@toc@l ; PC64LE9-NEXT: xssubdp 3, 0, 1 ; PC64LE9-NEXT: lxvx 0, 0, 3 @@ -1534,8 +1534,8 @@ define <2 x double> @constrained_vector_pow_v2f64() #0 { ; PC64LE9-NEXT: addis 3, 2, .LCPI31_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfd 1, .LCPI31_2@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 +; PC64LE9-NEXT: lfd 1, .LCPI31_2@toc@l(3) ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload @@ -1619,24 +1619,24 @@ define <3 x float> @constrained_vector_pow_v3f32() #0 { ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI32_2@toc@ha ; PC64LE9-NEXT: fmr 30, 1 -; PC64LE9-NEXT: lfs 1, .LCPI32_2@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 +; PC64LE9-NEXT: lfs 1, .LCPI32_2@toc@l(3) ; PC64LE9-NEXT: bl powf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI32_3@toc@ha ; PC64LE9-NEXT: fmr 29, 1 -; PC64LE9-NEXT: lfs 1, .LCPI32_3@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 +; PC64LE9-NEXT: lfs 1, .LCPI32_3@toc@l(3) ; PC64LE9-NEXT: bl powf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: addis 3, 2, .LCPI32_4@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI32_4@toc@l +; PC64LE9-NEXT: lxvx 36, 0, 3 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 29 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 -; PC64LE9-NEXT: addis 3, 2, .LCPI32_4@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI32_4@toc@l -; PC64LE9-NEXT: lxvx 36, 0, 3 ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 @@ -1707,27 +1707,27 @@ define <3 x double> @constrained_vector_pow_v3f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -80(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI33_0@toc@ha +; PC64LE9-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfs 1, .LCPI33_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI33_1@toc@ha -; PC64LE9-NEXT: stfd 31, 72(1) # 8-byte Folded Spill ; PC64LE9-NEXT: lfs 31, .LCPI33_1@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI33_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfd 1, .LCPI33_2@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 +; PC64LE9-NEXT: lfd 1, .LCPI33_2@toc@l(3) ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI33_3@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: xxmrghd 63, 1, 0 ; PC64LE9-NEXT: lfd 1, .LCPI33_3@toc@l(3) -; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 @@ -1809,34 +1809,34 @@ define <4 x double> @constrained_vector_pow_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -80(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI34_0@toc@ha +; PC64LE9-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI34_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI34_1@toc@ha -; PC64LE9-NEXT: stfd 31, 72(1) # 8-byte Folded Spill ; PC64LE9-NEXT: lfs 31, .LCPI34_1@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI34_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfd 1, .LCPI34_2@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 +; PC64LE9-NEXT: lfd 1, .LCPI34_2@toc@l(3) ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI34_3@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: xxmrghd 63, 1, 0 ; PC64LE9-NEXT: lfd 1, .LCPI34_3@toc@l(3) -; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI34_4@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfd 1, .LCPI34_4@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 +; PC64LE9-NEXT: lfd 1, .LCPI34_4@toc@l(3) ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload @@ -1882,8 +1882,8 @@ define <1 x float> @constrained_vector_powi_v1f32() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -32(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI35_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI35_0@toc@l(3) ; PC64LE9-NEXT: li 4, 3 +; PC64LE9-NEXT: lfs 1, .LCPI35_0@toc@l(3) ; PC64LE9-NEXT: bl __powisf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addi 1, 1, 32 @@ -1933,15 +1933,15 @@ define <2 x double> @constrained_vector_powi_v2f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI36_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI36_0@toc@l(3) ; PC64LE9-NEXT: li 4, 3 +; PC64LE9-NEXT: lfd 1, .LCPI36_0@toc@l(3) ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI36_1@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfd 1, .LCPI36_1@toc@l(3) ; PC64LE9-NEXT: li 4, 3 +; PC64LE9-NEXT: lfd 1, .LCPI36_1@toc@l(3) ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload @@ -2013,30 +2013,30 @@ define <3 x float> @constrained_vector_powi_v3f32() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI37_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI37_0@toc@l(3) ; PC64LE9-NEXT: li 4, 3 +; PC64LE9-NEXT: lfs 1, .LCPI37_0@toc@l(3) ; PC64LE9-NEXT: bl __powisf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI37_1@toc@ha ; PC64LE9-NEXT: fmr 31, 1 -; PC64LE9-NEXT: lfs 1, .LCPI37_1@toc@l(3) ; PC64LE9-NEXT: li 4, 3 +; PC64LE9-NEXT: lfs 1, .LCPI37_1@toc@l(3) ; PC64LE9-NEXT: bl __powisf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI37_2@toc@ha ; PC64LE9-NEXT: fmr 30, 1 -; PC64LE9-NEXT: lfs 1, .LCPI37_2@toc@l(3) ; PC64LE9-NEXT: li 4, 3 +; PC64LE9-NEXT: lfs 1, .LCPI37_2@toc@l(3) ; PC64LE9-NEXT: bl __powisf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: addis 3, 2, .LCPI37_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI37_3@toc@l +; PC64LE9-NEXT: lxvx 36, 0, 3 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 -; PC64LE9-NEXT: addis 3, 2, .LCPI37_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI37_3@toc@l -; PC64LE9-NEXT: lxvx 36, 0, 3 ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 @@ -2102,24 +2102,24 @@ define <3 x double> @constrained_vector_powi_v3f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI38_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI38_0@toc@l(3) ; PC64LE9-NEXT: li 4, 3 ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI38_0@toc@l(3) ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI38_1@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI38_1@toc@l(3) ; PC64LE9-NEXT: li 4, 3 +; PC64LE9-NEXT: lfs 1, .LCPI38_1@toc@l(3) ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI38_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: li 4, 3 ; PC64LE9-NEXT: xxmrghd 63, 0, 1 ; PC64LE9-NEXT: lfd 1, .LCPI38_2@toc@l(3) -; PC64LE9-NEXT: li 4, 3 ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 @@ -2196,31 +2196,31 @@ define <4 x double> @constrained_vector_powi_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI39_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI39_0@toc@l(3) ; PC64LE9-NEXT: li 4, 3 ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI39_0@toc@l(3) ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI39_1@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfd 1, .LCPI39_1@toc@l(3) ; PC64LE9-NEXT: li 4, 3 +; PC64LE9-NEXT: lfd 1, .LCPI39_1@toc@l(3) ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI39_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: li 4, 3 ; PC64LE9-NEXT: xxmrghd 63, 1, 0 ; PC64LE9-NEXT: lfd 1, .LCPI39_2@toc@l(3) -; PC64LE9-NEXT: li 4, 3 ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI39_3@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfd 1, .LCPI39_3@toc@l(3) ; PC64LE9-NEXT: li 4, 3 +; PC64LE9-NEXT: lfd 1, .LCPI39_3@toc@l(3) ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload @@ -2396,12 +2396,12 @@ define <3 x float> @constrained_vector_sin_v3f32() #0 { ; PC64LE9-NEXT: bl sinf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: addis 3, 2, .LCPI42_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI42_3@toc@l ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 -; PC64LE9-NEXT: addis 3, 2, .LCPI42_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI42_3@toc@l ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 ; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 @@ -2464,8 +2464,8 @@ define <3 x double> @constrained_vector_sin_v3f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI43_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI43_0@toc@l(3) ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI43_0@toc@l(3) ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI43_1@toc@ha @@ -2550,8 +2550,8 @@ define <4 x double> @constrained_vector_sin_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI44_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI44_0@toc@l(3) ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI44_0@toc@l(3) ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI44_1@toc@ha @@ -2745,12 +2745,12 @@ define <3 x float> @constrained_vector_cos_v3f32() #0 { ; PC64LE9-NEXT: bl cosf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: addis 3, 2, .LCPI47_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI47_3@toc@l ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 -; PC64LE9-NEXT: addis 3, 2, .LCPI47_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI47_3@toc@l ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 ; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 @@ -2813,8 +2813,8 @@ define <3 x double> @constrained_vector_cos_v3f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI48_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI48_0@toc@l(3) ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI48_0@toc@l(3) ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI48_1@toc@ha @@ -2899,8 +2899,8 @@ define <4 x double> @constrained_vector_cos_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI49_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI49_0@toc@l(3) ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI49_0@toc@l(3) ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI49_1@toc@ha @@ -3094,12 +3094,12 @@ define <3 x float> @constrained_vector_exp_v3f32() #0 { ; PC64LE9-NEXT: bl expf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: addis 3, 2, .LCPI52_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI52_3@toc@l ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 -; PC64LE9-NEXT: addis 3, 2, .LCPI52_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI52_3@toc@l ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 ; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 @@ -3162,8 +3162,8 @@ define <3 x double> @constrained_vector_exp_v3f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI53_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI53_0@toc@l(3) ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI53_0@toc@l(3) ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI53_1@toc@ha @@ -3248,8 +3248,8 @@ define <4 x double> @constrained_vector_exp_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI54_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI54_0@toc@l(3) ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI54_0@toc@l(3) ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI54_1@toc@ha @@ -3443,12 +3443,12 @@ define <3 x float> @constrained_vector_exp2_v3f32() #0 { ; PC64LE9-NEXT: bl exp2f ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: addis 3, 2, .LCPI57_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI57_3@toc@l ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 -; PC64LE9-NEXT: addis 3, 2, .LCPI57_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI57_3@toc@l ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 ; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 @@ -3511,8 +3511,8 @@ define <3 x double> @constrained_vector_exp2_v3f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI58_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI58_0@toc@l(3) ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI58_0@toc@l(3) ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI58_1@toc@ha @@ -3597,8 +3597,8 @@ define <4 x double> @constrained_vector_exp2_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI59_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI59_0@toc@l(3) ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI59_0@toc@l(3) ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI59_1@toc@ha @@ -3792,12 +3792,12 @@ define <3 x float> @constrained_vector_log_v3f32() #0 { ; PC64LE9-NEXT: bl logf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: addis 3, 2, .LCPI62_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI62_3@toc@l ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 -; PC64LE9-NEXT: addis 3, 2, .LCPI62_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI62_3@toc@l ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 ; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 @@ -3860,8 +3860,8 @@ define <3 x double> @constrained_vector_log_v3f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI63_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI63_0@toc@l(3) ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI63_0@toc@l(3) ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI63_1@toc@ha @@ -3946,8 +3946,8 @@ define <4 x double> @constrained_vector_log_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI64_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI64_0@toc@l(3) ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI64_0@toc@l(3) ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI64_1@toc@ha @@ -4141,12 +4141,12 @@ define <3 x float> @constrained_vector_log10_v3f32() #0 { ; PC64LE9-NEXT: bl log10f ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: addis 3, 2, .LCPI67_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI67_3@toc@l ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 -; PC64LE9-NEXT: addis 3, 2, .LCPI67_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI67_3@toc@l ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 ; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 @@ -4209,8 +4209,8 @@ define <3 x double> @constrained_vector_log10_v3f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI68_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI68_0@toc@l(3) ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI68_0@toc@l(3) ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI68_1@toc@ha @@ -4295,8 +4295,8 @@ define <4 x double> @constrained_vector_log10_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI69_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI69_0@toc@l(3) ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI69_0@toc@l(3) ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI69_1@toc@ha @@ -4490,12 +4490,12 @@ define <3 x float> @constrained_vector_log2_v3f32() #0 { ; PC64LE9-NEXT: bl log2f ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: addis 3, 2, .LCPI72_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI72_3@toc@l ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 -; PC64LE9-NEXT: addis 3, 2, .LCPI72_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI72_3@toc@l ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 ; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 @@ -4558,8 +4558,8 @@ define <3 x double> @constrained_vector_log2_v3f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI73_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI73_0@toc@l(3) ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI73_0@toc@l(3) ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI73_1@toc@ha @@ -4644,8 +4644,8 @@ define <4 x double> @constrained_vector_log2_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI74_0@toc@ha -; PC64LE9-NEXT: lfd 1, .LCPI74_0@toc@l(3) ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI74_0@toc@l(3) ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI74_1@toc@ha @@ -4983,12 +4983,12 @@ define <3 x float> @constrained_vector_nearbyint_v3f32() #0 { ; PC64LE9-NEXT: bl nearbyintf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: addis 3, 2, .LCPI82_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI82_3@toc@l ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 31 -; PC64LE9-NEXT: addis 3, 2, .LCPI82_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI82_3@toc@l ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 ; PC64LE9-NEXT: xxsldwi 36, 0, 0, 3 @@ -5221,19 +5221,19 @@ define <3 x float> @constrained_vector_maxnum_v3f32() #0 { ; PC64LE9-NEXT: bl fmaxf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI87_4@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI87_4@toc@l(3) ; PC64LE9-NEXT: fmr 29, 1 ; PC64LE9-NEXT: fmr 1, 31 +; PC64LE9-NEXT: lfs 2, .LCPI87_4@toc@l(3) ; PC64LE9-NEXT: bl fmaxf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: addis 3, 2, .LCPI87_5@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI87_5@toc@l +; PC64LE9-NEXT: lxvx 36, 0, 3 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 29 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 -; PC64LE9-NEXT: addis 3, 2, .LCPI87_5@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI87_5@toc@l -; PC64LE9-NEXT: lxvx 36, 0, 3 ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 @@ -5294,11 +5294,11 @@ define <3 x double> @constrained_vector_max_v3f64() #0 { ; PC64LE9-NEXT: bl fmax ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI88_2@toc@ha +; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: addi 3, 3, .LCPI88_2@toc@l ; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI88_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI88_3@toc@l -; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: lxvx 1, 0, 3 ; PC64LE9-NEXT: xvmaxdp 2, 1, 0 ; PC64LE9-NEXT: xxswapd 1, 2 @@ -5508,19 +5508,19 @@ define <3 x float> @constrained_vector_minnum_v3f32() #0 { ; PC64LE9-NEXT: bl fminf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI92_4@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI92_4@toc@l(3) ; PC64LE9-NEXT: fmr 29, 1 ; PC64LE9-NEXT: fmr 1, 31 +; PC64LE9-NEXT: lfs 2, .LCPI92_4@toc@l(3) ; PC64LE9-NEXT: bl fminf ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: addis 3, 2, .LCPI92_5@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI92_5@toc@l +; PC64LE9-NEXT: lxvx 36, 0, 3 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 29 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: xscvdpspn 0, 30 -; PC64LE9-NEXT: addis 3, 2, .LCPI92_5@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI92_5@toc@l -; PC64LE9-NEXT: lxvx 36, 0, 3 ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 @@ -5581,11 +5581,11 @@ define <3 x double> @constrained_vector_min_v3f64() #0 { ; PC64LE9-NEXT: bl fmin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI93_2@toc@ha +; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: addi 3, 3, .LCPI93_2@toc@l ; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI93_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI93_3@toc@l -; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: lxvx 1, 0, 3 ; PC64LE9-NEXT: xvmindp 2, 1, 0 ; PC64LE9-NEXT: xxswapd 1, 2 diff --git a/llvm/test/CodeGen/RISCV/branch-relaxation.ll b/llvm/test/CodeGen/RISCV/branch-relaxation.ll index 56f0f27a06488..3d617bf0b26b4 100644 --- a/llvm/test/CodeGen/RISCV/branch-relaxation.ll +++ b/llvm/test/CodeGen/RISCV/branch-relaxation.ll @@ -11,7 +11,7 @@ define void @relax_bcc(i1 %a) nounwind { ; CHECK-NEXT: j .LBB0_2 ; CHECK-NEXT: .LBB0_1: # %iftrue ; CHECK-NEXT: #APP -; CHECK-NEXT: .space 4096 +; CHECK-NEXT: .zero 4096 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: .LBB0_2: # %tail ; CHECK-NEXT: ret @@ -38,7 +38,7 @@ define i32 @relax_jal(i1 %a) nounwind { ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: #APP -; CHECK-NEXT: .space 1048576 +; CHECK-NEXT: .zero 1048576 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/cmp-bool.ll b/llvm/test/CodeGen/RISCV/cmp-bool.ll new file mode 100644 index 0000000000000..01c9c9cae32cf --- /dev/null +++ b/llvm/test/CodeGen/RISCV/cmp-bool.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 < %s | FileCheck --check-prefix=RV32 %s +; RUN: llc -mtriple=riscv64 < %s | FileCheck --check-prefix=RV64 %s + +define void @bool_eq(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind { +; RV32-LABEL: bool_eq: +; RV32: # %bb.0: # %entry +; RV32-NEXT: beq a0, a1, .LBB0_2 +; RV32-NEXT: # %bb.1: # %if.end +; RV32-NEXT: ret +; RV32-NEXT: .LBB0_2: # %if.then +; RV32-NEXT: jr a2 +; +; RV64-LABEL: bool_eq: +; RV64: # %bb.0: # %entry +; RV64-NEXT: beq a0, a1, .LBB0_2 +; RV64-NEXT: # %bb.1: # %if.end +; RV64-NEXT: ret +; RV64-NEXT: .LBB0_2: # %if.then +; RV64-NEXT: jr a2 +entry: + %0 = xor i1 %a, %b + br i1 %0, label %if.end, label %if.then + +if.then: + tail call void %c() #1 + br label %if.end + +if.end: + ret void +} + +define void @bool_ne(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind { +; RV32-LABEL: bool_ne: +; RV32: # %bb.0: # %entry +; RV32-NEXT: beq a0, a1, .LBB1_2 +; RV32-NEXT: # %bb.1: # %if.then +; RV32-NEXT: jr a2 +; RV32-NEXT: .LBB1_2: # %if.end +; RV32-NEXT: ret +; +; RV64-LABEL: bool_ne: +; RV64: # %bb.0: # %entry +; RV64-NEXT: beq a0, a1, .LBB1_2 +; RV64-NEXT: # %bb.1: # %if.then +; RV64-NEXT: jr a2 +; RV64-NEXT: .LBB1_2: # %if.end +; RV64-NEXT: ret +entry: + %cmp = xor i1 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void %c() #1 + br label %if.end + +if.end: + ret void +} diff --git a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll index 84774feccf12c..6c172a26f050b 100644 --- a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll +++ b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll @@ -16,9 +16,9 @@ define i32 @lower_global(i32 %a) nounwind { ; ; RV32I-MEDIUM-LABEL: lower_global: ; RV32I-MEDIUM: # %bb.0: -; RV32I-MEDIUM-NEXT: .Ltmp0: +; RV32I-MEDIUM-NEXT: .LBB0_1: # Label of block must be emitted ; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(G) -; RV32I-MEDIUM-NEXT: addi a0, a0, %pcrel_lo(.Ltmp0) +; RV32I-MEDIUM-NEXT: addi a0, a0, %pcrel_lo(.LBB0_1) ; RV32I-MEDIUM-NEXT: lw a0, 0(a0) ; RV32I-MEDIUM-NEXT: ret %1 = load volatile i32, i32* @G @@ -39,9 +39,9 @@ define void @lower_blockaddress() nounwind { ; ; RV32I-MEDIUM-LABEL: lower_blockaddress: ; RV32I-MEDIUM: # %bb.0: -; RV32I-MEDIUM-NEXT: .Ltmp1: +; RV32I-MEDIUM-NEXT: .LBB1_1: # Label of block must be emitted ; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(addr) -; RV32I-MEDIUM-NEXT: addi a0, a0, %pcrel_lo(.Ltmp1) +; RV32I-MEDIUM-NEXT: addi a0, a0, %pcrel_lo(.LBB1_1) ; RV32I-MEDIUM-NEXT: addi a1, zero, 1 ; RV32I-MEDIUM-NEXT: sw a1, 0(a0) ; RV32I-MEDIUM-NEXT: ret @@ -82,16 +82,17 @@ define signext i32 @lower_blockaddress_displ(i32 signext %w) nounwind { ; RV32I-MEDIUM: # %bb.0: # %entry ; RV32I-MEDIUM-NEXT: addi sp, sp, -16 ; RV32I-MEDIUM-NEXT: sw ra, 12(sp) -; RV32I-MEDIUM-NEXT: .Ltmp2: -; RV32I-MEDIUM-NEXT: auipc a1, %pcrel_hi(.Ltmp3) -; RV32I-MEDIUM-NEXT: addi a1, a1, %pcrel_lo(.Ltmp2) +; RV32I-MEDIUM-NEXT: .LBB2_5: # %entry +; RV32I-MEDIUM-NEXT: # Label of block must be emitted +; RV32I-MEDIUM-NEXT: auipc a1, %pcrel_hi(.Ltmp0) +; RV32I-MEDIUM-NEXT: addi a1, a1, %pcrel_lo(.LBB2_5) ; RV32I-MEDIUM-NEXT: addi a2, zero, 101 ; RV32I-MEDIUM-NEXT: sw a1, 8(sp) ; RV32I-MEDIUM-NEXT: blt a0, a2, .LBB2_3 ; RV32I-MEDIUM-NEXT: # %bb.1: # %if.then ; RV32I-MEDIUM-NEXT: lw a0, 8(sp) ; RV32I-MEDIUM-NEXT: jr a0 -; RV32I-MEDIUM-NEXT: .Ltmp3: # Block address taken +; RV32I-MEDIUM-NEXT: .Ltmp0: # Block address taken ; RV32I-MEDIUM-NEXT: .LBB2_2: # %return ; RV32I-MEDIUM-NEXT: addi a0, zero, 4 ; RV32I-MEDIUM-NEXT: j .LBB2_4 @@ -139,9 +140,9 @@ define float @lower_constantpool(float %a) nounwind { ; ; RV32I-MEDIUM-LABEL: lower_constantpool: ; RV32I-MEDIUM: # %bb.0: -; RV32I-MEDIUM-NEXT: .Ltmp4: +; RV32I-MEDIUM-NEXT: .LBB3_1: # Label of block must be emitted ; RV32I-MEDIUM-NEXT: auipc a1, %pcrel_hi(.LCPI3_0) -; RV32I-MEDIUM-NEXT: addi a1, a1, %pcrel_lo(.Ltmp4) +; RV32I-MEDIUM-NEXT: addi a1, a1, %pcrel_lo(.LBB3_1) ; RV32I-MEDIUM-NEXT: flw ft0, 0(a1) ; RV32I-MEDIUM-NEXT: fmv.w.x ft1, a0 ; RV32I-MEDIUM-NEXT: fadd.s ft0, ft1, ft0 diff --git a/llvm/test/CodeGen/RISCV/inline-asm-abi-names.ll b/llvm/test/CodeGen/RISCV/inline-asm-abi-names.ll index 4d85e3ea006b8..f9ed4aed6ca32 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm-abi-names.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-abi-names.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -no-integrated-as < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s -; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -no-integrated-as < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; These test that we can use both the architectural names (x*) and the ABI names diff --git a/llvm/test/CodeGen/RISCV/inline-asm.ll b/llvm/test/CodeGen/RISCV/inline-asm.ll index 43f951e352a68..de5d9a5f22a83 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -no-integrated-as < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s -; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -no-integrated-as < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s @gi = external global i32 diff --git a/llvm/test/CodeGen/RISCV/large-stack.ll b/llvm/test/CodeGen/RISCV/large-stack.ll index 7acf0f4076e85..7cc6e83d7d85c 100644 --- a/llvm/test/CodeGen/RISCV/large-stack.ll +++ b/llvm/test/CodeGen/RISCV/large-stack.ll @@ -64,10 +64,12 @@ define void @test_emergency_spill_slot(i32 %a) { ; RV32I-FPELIM-NEXT: add a1, a2, a1 ; RV32I-FPELIM-NEXT: #APP ; RV32I-FPELIM-NEXT: nop +; RV32I-FPELIM-EMPTY: ; RV32I-FPELIM-NEXT: #NO_APP ; RV32I-FPELIM-NEXT: sw a0, 0(a1) ; RV32I-FPELIM-NEXT: #APP ; RV32I-FPELIM-NEXT: nop +; RV32I-FPELIM-EMPTY: ; RV32I-FPELIM-NEXT: #NO_APP ; RV32I-FPELIM-NEXT: lui a0, 97 ; RV32I-FPELIM-NEXT: addi a0, a0, 672 @@ -103,10 +105,12 @@ define void @test_emergency_spill_slot(i32 %a) { ; RV32I-WITHFP-NEXT: add a1, a2, a1 ; RV32I-WITHFP-NEXT: #APP ; RV32I-WITHFP-NEXT: nop +; RV32I-WITHFP-EMPTY: ; RV32I-WITHFP-NEXT: #NO_APP ; RV32I-WITHFP-NEXT: sw a0, 0(a1) ; RV32I-WITHFP-NEXT: #APP ; RV32I-WITHFP-NEXT: nop +; RV32I-WITHFP-EMPTY: ; RV32I-WITHFP-NEXT: #NO_APP ; RV32I-WITHFP-NEXT: lui a0, 97 ; RV32I-WITHFP-NEXT: addi a0, a0, 688 diff --git a/llvm/test/CodeGen/RISCV/mir-target-flags.ll b/llvm/test/CodeGen/RISCV/mir-target-flags.ll index b1bf935c4e3bf..f41fb77dbb00c 100644 --- a/llvm/test/CodeGen/RISCV/mir-target-flags.ll +++ b/llvm/test/CodeGen/RISCV/mir-target-flags.ll @@ -27,11 +27,11 @@ define i32 @caller(i32 %a) nounwind { ; RV32-SMALL-NEXT: target-flags(riscv-hi) @g_i ; RV32-SMALL-NEXT: target-flags(riscv-lo) @g_i ; RV32-SMALL: target-flags(riscv-tls-got-hi) @t_un -; RV32-SMALL-NEXT: target-flags(riscv-pcrel-lo) +; RV32-SMALL-NEXT: target-flags(riscv-pcrel-lo) %bb.1 ; RV32-SMALL: target-flags(riscv-tls-got-hi) @t_ld -; RV32-SMALL-NEXT: target-flags(riscv-pcrel-lo) +; RV32-SMALL-NEXT: target-flags(riscv-pcrel-lo) %bb.2 ; RV32-SMALL: target-flags(riscv-tls-got-hi) @t_ie -; RV32-SMALL-NEXT: target-flags(riscv-pcrel-lo) +; RV32-SMALL-NEXT: target-flags(riscv-pcrel-lo) %bb.3 ; RV32-SMALL: target-flags(riscv-tprel-hi) @t_le ; RV32-SMALL-NEXT: target-flags(riscv-tprel-add) @t_le ; RV32-SMALL-NEXT: target-flags(riscv-tprel-lo) @t_le @@ -39,17 +39,17 @@ define i32 @caller(i32 %a) nounwind { ; ; RV32-MED-LABEL: name: caller ; RV32-MED: target-flags(riscv-got-hi) @g_e -; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) +; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) %bb.1 ; RV32-MED: target-flags(riscv-pcrel-hi) @g_i -; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) +; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) %bb.2 ; RV32-MED: target-flags(riscv-tls-gd-hi) @t_un -; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) -; RV32-MED: target-flags(riscv-plt) &__tls_get_addr +; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) %bb.3 +; RV32-MED-NEXT: target-flags(riscv-plt) &__tls_get_addr ; RV32-MED: target-flags(riscv-tls-gd-hi) @t_ld -; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) -; RV32-MED: target-flags(riscv-plt) &__tls_get_addr +; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) %bb.4 +; RV32-MED-NEXT: target-flags(riscv-plt) &__tls_get_addr ; RV32-MED: target-flags(riscv-tls-got-hi) @t_ie -; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) +; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) %bb.5 ; RV32-MED: target-flags(riscv-tprel-hi) @t_le ; RV32-MED-NEXT: target-flags(riscv-tprel-add) @t_le ; RV32-MED-NEXT: target-flags(riscv-tprel-lo) @t_le diff --git a/llvm/test/CodeGen/RISCV/pic-models.ll b/llvm/test/CodeGen/RISCV/pic-models.ll index 46e9cee57d79d..8d835ae99f406 100644 --- a/llvm/test/CodeGen/RISCV/pic-models.ll +++ b/llvm/test/CodeGen/RISCV/pic-models.ll @@ -26,9 +26,10 @@ define i32* @f1() nounwind { ; ; RV32-PIC-LABEL: f1: ; RV32-PIC: # %bb.0: # %entry -; RV32-PIC-NEXT: .Ltmp0: +; RV32-PIC-NEXT: .LBB0_1: # %entry +; RV32-PIC-NEXT: # Label of block must be emitted ; RV32-PIC-NEXT: auipc a0, %got_pcrel_hi(external_var) -; RV32-PIC-NEXT: lw a0, %pcrel_lo(.Ltmp0)(a0) +; RV32-PIC-NEXT: lw a0, %pcrel_lo(.LBB0_1)(a0) ; RV32-PIC-NEXT: ret ; ; RV64-STATIC-LABEL: f1: @@ -39,9 +40,10 @@ define i32* @f1() nounwind { ; ; RV64-PIC-LABEL: f1: ; RV64-PIC: # %bb.0: # %entry -; RV64-PIC-NEXT: .Ltmp0: +; RV64-PIC-NEXT: .LBB0_1: # %entry +; RV64-PIC-NEXT: # Label of block must be emitted ; RV64-PIC-NEXT: auipc a0, %got_pcrel_hi(external_var) -; RV64-PIC-NEXT: ld a0, %pcrel_lo(.Ltmp0)(a0) +; RV64-PIC-NEXT: ld a0, %pcrel_lo(.LBB0_1)(a0) ; RV64-PIC-NEXT: ret entry: ret i32* @external_var @@ -59,9 +61,10 @@ define i32* @f2() nounwind { ; ; RV32-PIC-LABEL: f2: ; RV32-PIC: # %bb.0: # %entry -; RV32-PIC-NEXT: .Ltmp1: +; RV32-PIC-NEXT: .LBB1_1: # %entry +; RV32-PIC-NEXT: # Label of block must be emitted ; RV32-PIC-NEXT: auipc a0, %pcrel_hi(internal_var) -; RV32-PIC-NEXT: addi a0, a0, %pcrel_lo(.Ltmp1) +; RV32-PIC-NEXT: addi a0, a0, %pcrel_lo(.LBB1_1) ; RV32-PIC-NEXT: ret ; ; RV64-STATIC-LABEL: f2: @@ -72,9 +75,10 @@ define i32* @f2() nounwind { ; ; RV64-PIC-LABEL: f2: ; RV64-PIC: # %bb.0: # %entry -; RV64-PIC-NEXT: .Ltmp1: +; RV64-PIC-NEXT: .LBB1_1: # %entry +; RV64-PIC-NEXT: # Label of block must be emitted ; RV64-PIC-NEXT: auipc a0, %pcrel_hi(internal_var) -; RV64-PIC-NEXT: addi a0, a0, %pcrel_lo(.Ltmp1) +; RV64-PIC-NEXT: addi a0, a0, %pcrel_lo(.LBB1_1) ; RV64-PIC-NEXT: ret entry: ret i32* @internal_var diff --git a/llvm/test/CodeGen/RISCV/rv32Zbb.ll b/llvm/test/CodeGen/RISCV/rv32Zbb.ll new file mode 100644 index 0000000000000..6933bad1f8cd2 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32Zbb.ll @@ -0,0 +1,1218 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IB +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IBB + +define i32 @slo_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: slo_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: slo_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: slo a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: slo_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: slo a0, a0, a1 +; RV32IBB-NEXT: ret + %neg = xor i32 %a, -1 + %shl = shl i32 %neg, %b + %neg1 = xor i32 %shl, -1 + ret i32 %neg1 +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +define i64 @slo_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: slo_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a3, a2, -32 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: bltz a3, .LBB1_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a2, zero +; RV32I-NEXT: sll a1, a0, a3 +; RV32I-NEXT: j .LBB1_3 +; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: addi a3, zero, 31 +; RV32I-NEXT: sub a3, a3, a2 +; RV32I-NEXT: srli a4, a0, 1 +; RV32I-NEXT: srl a3, a4, a3 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: sll a2, a0, a2 +; RV32I-NEXT: .LBB1_3: +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: not a0, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: slo_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: addi a3, a2, -32 +; RV32IB-NEXT: not a0, a0 +; RV32IB-NEXT: bltz a3, .LBB1_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: mv a2, zero +; RV32IB-NEXT: sll a1, a0, a3 +; RV32IB-NEXT: j .LBB1_3 +; RV32IB-NEXT: .LBB1_2: +; RV32IB-NEXT: not a1, a1 +; RV32IB-NEXT: sll a1, a1, a2 +; RV32IB-NEXT: addi a3, zero, 31 +; RV32IB-NEXT: sub a3, a3, a2 +; RV32IB-NEXT: srli a4, a0, 1 +; RV32IB-NEXT: srl a3, a4, a3 +; RV32IB-NEXT: or a1, a1, a3 +; RV32IB-NEXT: sll a2, a0, a2 +; RV32IB-NEXT: .LBB1_3: +; RV32IB-NEXT: not a1, a1 +; RV32IB-NEXT: not a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: slo_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: addi a3, a2, -32 +; RV32IBB-NEXT: not a0, a0 +; RV32IBB-NEXT: bltz a3, .LBB1_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: mv a2, zero +; RV32IBB-NEXT: sll a1, a0, a3 +; RV32IBB-NEXT: j .LBB1_3 +; RV32IBB-NEXT: .LBB1_2: +; RV32IBB-NEXT: not a1, a1 +; RV32IBB-NEXT: sll a1, a1, a2 +; RV32IBB-NEXT: addi a3, zero, 31 +; RV32IBB-NEXT: sub a3, a3, a2 +; RV32IBB-NEXT: srli a4, a0, 1 +; RV32IBB-NEXT: srl a3, a4, a3 +; RV32IBB-NEXT: or a1, a1, a3 +; RV32IBB-NEXT: sll a2, a0, a2 +; RV32IBB-NEXT: .LBB1_3: +; RV32IBB-NEXT: not a1, a1 +; RV32IBB-NEXT: not a0, a2 +; RV32IBB-NEXT: ret + %neg = xor i64 %a, -1 + %shl = shl i64 %neg, %b + %neg1 = xor i64 %shl, -1 + ret i64 %neg1 +} + +define i32 @sro_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: sro_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: srl a0, a0, a1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sro_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sro a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sro_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: sro a0, a0, a1 +; RV32IBB-NEXT: ret + %neg = xor i32 %a, -1 + %shr = lshr i32 %neg, %b + %neg1 = xor i32 %shr, -1 + ret i32 %neg1 +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +define i64 @sro_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: sro_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a3, a2, -32 +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: bltz a3, .LBB3_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a2, zero +; RV32I-NEXT: srl a0, a1, a3 +; RV32I-NEXT: j .LBB3_3 +; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: srl a0, a0, a2 +; RV32I-NEXT: addi a3, zero, 31 +; RV32I-NEXT: sub a3, a3, a2 +; RV32I-NEXT: slli a4, a1, 1 +; RV32I-NEXT: sll a3, a4, a3 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: srl a2, a1, a2 +; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: not a1, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sro_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: addi a3, a2, -32 +; RV32IB-NEXT: not a1, a1 +; RV32IB-NEXT: bltz a3, .LBB3_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: mv a2, zero +; RV32IB-NEXT: srl a0, a1, a3 +; RV32IB-NEXT: j .LBB3_3 +; RV32IB-NEXT: .LBB3_2: +; RV32IB-NEXT: not a0, a0 +; RV32IB-NEXT: srl a0, a0, a2 +; RV32IB-NEXT: addi a3, zero, 31 +; RV32IB-NEXT: sub a3, a3, a2 +; RV32IB-NEXT: slli a4, a1, 1 +; RV32IB-NEXT: sll a3, a4, a3 +; RV32IB-NEXT: or a0, a0, a3 +; RV32IB-NEXT: srl a2, a1, a2 +; RV32IB-NEXT: .LBB3_3: +; RV32IB-NEXT: not a0, a0 +; RV32IB-NEXT: not a1, a2 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sro_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: addi a3, a2, -32 +; RV32IBB-NEXT: not a1, a1 +; RV32IBB-NEXT: bltz a3, .LBB3_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: mv a2, zero +; RV32IBB-NEXT: srl a0, a1, a3 +; RV32IBB-NEXT: j .LBB3_3 +; RV32IBB-NEXT: .LBB3_2: +; RV32IBB-NEXT: not a0, a0 +; RV32IBB-NEXT: srl a0, a0, a2 +; RV32IBB-NEXT: addi a3, zero, 31 +; RV32IBB-NEXT: sub a3, a3, a2 +; RV32IBB-NEXT: slli a4, a1, 1 +; RV32IBB-NEXT: sll a3, a4, a3 +; RV32IBB-NEXT: or a0, a0, a3 +; RV32IBB-NEXT: srl a2, a1, a2 +; RV32IBB-NEXT: .LBB3_3: +; RV32IBB-NEXT: not a0, a0 +; RV32IBB-NEXT: not a1, a2 +; RV32IBB-NEXT: ret + %neg = xor i64 %a, -1 + %shr = lshr i64 %neg, %b + %neg1 = xor i64 %shr, -1 + ret i64 %neg1 +} + +define i32 @sloi_i32(i32 %a) nounwind { +; RV32I-LABEL: sloi_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: ori a0, a0, 1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sloi_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sloi a0, a0, 1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sloi_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: sloi a0, a0, 1 +; RV32IBB-NEXT: ret + %neg = shl i32 %a, 1 + %neg12 = or i32 %neg, 1 + ret i32 %neg12 +} + +define i64 @sloi_i64(i64 %a) nounwind { +; RV32I-LABEL: sloi_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a2, a0, 31 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: ori a0, a0, 1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sloi_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: addi a2, zero, 1 +; RV32IB-NEXT: fsl a1, a1, a2, a0 +; RV32IB-NEXT: sloi a0, a0, 1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sloi_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: srli a2, a0, 31 +; RV32IBB-NEXT: slli a1, a1, 1 +; RV32IBB-NEXT: or a1, a1, a2 +; RV32IBB-NEXT: sloi a0, a0, 1 +; RV32IBB-NEXT: ret + %neg = shl i64 %a, 1 + %neg12 = or i64 %neg, 1 + ret i64 %neg12 +} + +define i32 @sroi_i32(i32 %a) nounwind { +; RV32I-LABEL: sroi_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sroi_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sroi a0, a0, 1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sroi_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: sroi a0, a0, 1 +; RV32IBB-NEXT: ret + %neg = lshr i32 %a, 1 + %neg12 = or i32 %neg, -2147483648 + ret i32 %neg12 +} + +define i64 @sroi_i64(i64 %a) nounwind { +; RV32I-LABEL: sroi_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a2, a1, 31 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: srli a1, a1, 1 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sroi_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: addi a2, zero, 31 +; RV32IB-NEXT: fsl a0, a1, a2, a0 +; RV32IB-NEXT: sroi a1, a1, 1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sroi_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: slli a2, a1, 31 +; RV32IBB-NEXT: srli a0, a0, 1 +; RV32IBB-NEXT: or a0, a0, a2 +; RV32IBB-NEXT: sroi a1, a1, 1 +; RV32IBB-NEXT: ret + %neg = lshr i64 %a, 1 + %neg12 = or i64 %neg, -9223372036854775808 + ret i64 %neg12 +} + +declare i32 @llvm.ctlz.i32(i32, i1) + +define i32 @ctlz_i32(i32 %a) nounwind { +; RV32I-LABEL: ctlz_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: beqz a0, .LBB8_2 +; RV32I-NEXT: # %bb.1: # %cond.false +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: and a2, a0, a1 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: j .LBB8_3 +; RV32I-NEXT: .LBB8_2: +; RV32I-NEXT: addi a0, zero, 32 +; RV32I-NEXT: .LBB8_3: # %cond.end +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: ctlz_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: beqz a0, .LBB8_2 +; RV32IB-NEXT: # %bb.1: # %cond.false +; RV32IB-NEXT: clz a0, a0 +; RV32IB-NEXT: ret +; RV32IB-NEXT: .LBB8_2: +; RV32IB-NEXT: addi a0, zero, 32 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: ctlz_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: beqz a0, .LBB8_2 +; RV32IBB-NEXT: # %bb.1: # %cond.false +; RV32IBB-NEXT: clz a0, a0 +; RV32IBB-NEXT: ret +; RV32IBB-NEXT: .LBB8_2: +; RV32IBB-NEXT: addi a0, zero, 32 +; RV32IBB-NEXT: ret + %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false) + ret i32 %1 +} + +declare i64 @llvm.ctlz.i64(i64, i1) + +define i64 @ctlz_i64(i64 %a) nounwind { +; RV32I-LABEL: ctlz_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: sw s4, 8(sp) +; RV32I-NEXT: sw s5, 4(sp) +; RV32I-NEXT: sw s6, 0(sp) +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi s5, a2, 1365 +; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi s1, a1, 819 +; RV32I-NEXT: and a1, a0, s1 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi s6, a1, -241 +; RV32I-NEXT: and a0, a0, s6 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi s0, a1, 257 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: srli a0, s4, 1 +; RV32I-NEXT: or a0, s4, a0 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: and a1, a0, s1 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: and a0, a0, s6 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: bnez s3, .LBB9_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: j .LBB9_3 +; RV32I-NEXT: .LBB9_2: +; RV32I-NEXT: srli a0, s2, 24 +; RV32I-NEXT: .LBB9_3: +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: lw s6, 0(sp) +; RV32I-NEXT: lw s5, 4(sp) +; RV32I-NEXT: lw s4, 8(sp) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: ctlz_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: bnez a1, .LBB9_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: clz a0, a0 +; RV32IB-NEXT: addi a0, a0, 32 +; RV32IB-NEXT: mv a1, zero +; RV32IB-NEXT: ret +; RV32IB-NEXT: .LBB9_2: +; RV32IB-NEXT: clz a0, a1 +; RV32IB-NEXT: mv a1, zero +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: ctlz_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: bnez a1, .LBB9_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: clz a0, a0 +; RV32IBB-NEXT: addi a0, a0, 32 +; RV32IBB-NEXT: mv a1, zero +; RV32IBB-NEXT: ret +; RV32IBB-NEXT: .LBB9_2: +; RV32IBB-NEXT: clz a0, a1 +; RV32IBB-NEXT: mv a1, zero +; RV32IBB-NEXT: ret + %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false) + ret i64 %1 +} + +declare i32 @llvm.cttz.i32(i32, i1) + +define i32 @cttz_i32(i32 %a) nounwind { +; RV32I-LABEL: cttz_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: beqz a0, .LBB10_2 +; RV32I-NEXT: # %bb.1: # %cond.false +; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: and a2, a0, a1 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: j .LBB10_3 +; RV32I-NEXT: .LBB10_2: +; RV32I-NEXT: addi a0, zero, 32 +; RV32I-NEXT: .LBB10_3: # %cond.end +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: cttz_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: beqz a0, .LBB10_2 +; RV32IB-NEXT: # %bb.1: # %cond.false +; RV32IB-NEXT: ctz a0, a0 +; RV32IB-NEXT: ret +; RV32IB-NEXT: .LBB10_2: +; RV32IB-NEXT: addi a0, zero, 32 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: cttz_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: beqz a0, .LBB10_2 +; RV32IBB-NEXT: # %bb.1: # %cond.false +; RV32IBB-NEXT: ctz a0, a0 +; RV32IBB-NEXT: ret +; RV32IBB-NEXT: .LBB10_2: +; RV32IBB-NEXT: addi a0, zero, 32 +; RV32IBB-NEXT: ret + %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false) + ret i32 %1 +} + +declare i64 @llvm.cttz.i64(i64, i1) + +define i64 @cttz_i64(i64 %a) nounwind { +; RV32I-LABEL: cttz_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: sw s4, 8(sp) +; RV32I-NEXT: sw s5, 4(sp) +; RV32I-NEXT: sw s6, 0(sp) +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: not a1, s4 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi s5, a2, 1365 +; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi s0, a1, 819 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi s6, a1, -241 +; RV32I-NEXT: and a0, a0, s6 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi s1, a1, 257 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: addi a0, s3, -1 +; RV32I-NEXT: not a1, s3 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: and a0, a0, s6 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: bnez s4, .LBB11_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: j .LBB11_3 +; RV32I-NEXT: .LBB11_2: +; RV32I-NEXT: srli a0, s2, 24 +; RV32I-NEXT: .LBB11_3: +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: lw s6, 0(sp) +; RV32I-NEXT: lw s5, 4(sp) +; RV32I-NEXT: lw s4, 8(sp) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: cttz_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: bnez a0, .LBB11_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: ctz a0, a1 +; RV32IB-NEXT: addi a0, a0, 32 +; RV32IB-NEXT: mv a1, zero +; RV32IB-NEXT: ret +; RV32IB-NEXT: .LBB11_2: +; RV32IB-NEXT: ctz a0, a0 +; RV32IB-NEXT: mv a1, zero +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: cttz_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: bnez a0, .LBB11_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: ctz a0, a1 +; RV32IBB-NEXT: addi a0, a0, 32 +; RV32IBB-NEXT: mv a1, zero +; RV32IBB-NEXT: ret +; RV32IBB-NEXT: .LBB11_2: +; RV32IBB-NEXT: ctz a0, a0 +; RV32IBB-NEXT: mv a1, zero +; RV32IBB-NEXT: ret + %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false) + ret i64 %1 +} + +declare i32 @llvm.ctpop.i32(i32) + +define i32 @ctpop_i32(i32 %a) nounwind { +; RV32I-LABEL: ctpop_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: and a2, a0, a1 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: ctpop_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: pcnt a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: ctpop_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: pcnt a0, a0 +; RV32IBB-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + ret i32 %1 +} + +declare i64 @llvm.ctpop.i64(i64) + +define i64 @ctpop_i64(i64 %a) nounwind { +; RV32I-LABEL: ctpop_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: sw s4, 8(sp) +; RV32I-NEXT: sw s5, 4(sp) +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi s3, a2, 1365 +; RV32I-NEXT: and a0, a0, s3 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi s0, a1, 819 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi s4, a1, -241 +; RV32I-NEXT: and a0, a0, s4 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi s1, a1, 257 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: srli s5, a0, 24 +; RV32I-NEXT: srli a0, s2, 1 +; RV32I-NEXT: and a0, a0, s3 +; RV32I-NEXT: sub a0, s2, a0 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: and a0, a0, s4 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: add a0, a0, s5 +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: lw s5, 4(sp) +; RV32I-NEXT: lw s4, 8(sp) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: ctpop_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: pcnt a1, a1 +; RV32IB-NEXT: pcnt a0, a0 +; RV32IB-NEXT: add a0, a0, a1 +; RV32IB-NEXT: mv a1, zero +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: ctpop_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: pcnt a1, a1 +; RV32IBB-NEXT: pcnt a0, a0 +; RV32IBB-NEXT: add a0, a0, a1 +; RV32IBB-NEXT: mv a1, zero +; RV32IBB-NEXT: ret + %1 = call i64 @llvm.ctpop.i64(i64 %a) + ret i64 %1 +} + +define i32 @sextb_i32(i32 %a) nounwind { +; RV32I-LABEL: sextb_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sextb_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sext.b a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sextb_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: sext.b a0, a0 +; RV32IBB-NEXT: ret + %shl = shl i32 %a, 24 + %shr = ashr exact i32 %shl, 24 + ret i32 %shr +} + +define i64 @sextb_i64(i64 %a) nounwind { +; RV32I-LABEL: sextb_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a0, a1, 24 +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sextb_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sext.b a2, a0 +; RV32IB-NEXT: slli a0, a0, 24 +; RV32IB-NEXT: srai a1, a0, 31 +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sextb_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: sext.b a2, a0 +; RV32IBB-NEXT: slli a0, a0, 24 +; RV32IBB-NEXT: srai a1, a0, 31 +; RV32IBB-NEXT: mv a0, a2 +; RV32IBB-NEXT: ret + %shl = shl i64 %a, 56 + %shr = ashr exact i64 %shl, 56 + ret i64 %shr +} + +define i32 @sexth_i32(i32 %a) nounwind { +; RV32I-LABEL: sexth_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sexth_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sext.h a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sexth_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: sext.h a0, a0 +; RV32IBB-NEXT: ret + %shl = shl i32 %a, 16 + %shr = ashr exact i32 %shl, 16 + ret i32 %shr +} + +define i64 @sexth_i64(i64 %a) nounwind { +; RV32I-LABEL: sexth_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a0, a1, 16 +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sexth_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sext.h a2, a0 +; RV32IB-NEXT: slli a0, a0, 16 +; RV32IB-NEXT: srai a1, a0, 31 +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sexth_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: sext.h a2, a0 +; RV32IBB-NEXT: slli a0, a0, 16 +; RV32IBB-NEXT: srai a1, a0, 31 +; RV32IBB-NEXT: mv a0, a2 +; RV32IBB-NEXT: ret + %shl = shl i64 %a, 48 + %shr = ashr exact i64 %shl, 48 + ret i64 %shr +} + +define i32 @min_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: min_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: blt a0, a1, .LBB18_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB18_2: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: min_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: min a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: min_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: min a0, a0, a1 +; RV32IBB-NEXT: ret + %cmp = icmp slt i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +define i64 @min_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: min_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: beq a1, a3, .LBB19_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: slt a4, a1, a3 +; RV32I-NEXT: beqz a4, .LBB19_3 +; RV32I-NEXT: j .LBB19_4 +; RV32I-NEXT: .LBB19_2: +; RV32I-NEXT: sltu a4, a0, a2 +; RV32I-NEXT: bnez a4, .LBB19_4 +; RV32I-NEXT: .LBB19_3: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB19_4: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: min_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: beq a1, a3, .LBB19_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: slt a4, a1, a3 +; RV32IB-NEXT: beqz a4, .LBB19_3 +; RV32IB-NEXT: j .LBB19_4 +; RV32IB-NEXT: .LBB19_2: +; RV32IB-NEXT: sltu a4, a0, a2 +; RV32IB-NEXT: bnez a4, .LBB19_4 +; RV32IB-NEXT: .LBB19_3: +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: mv a1, a3 +; RV32IB-NEXT: .LBB19_4: +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: min_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: beq a1, a3, .LBB19_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: slt a4, a1, a3 +; RV32IBB-NEXT: beqz a4, .LBB19_3 +; RV32IBB-NEXT: j .LBB19_4 +; RV32IBB-NEXT: .LBB19_2: +; RV32IBB-NEXT: sltu a4, a0, a2 +; RV32IBB-NEXT: bnez a4, .LBB19_4 +; RV32IBB-NEXT: .LBB19_3: +; RV32IBB-NEXT: mv a0, a2 +; RV32IBB-NEXT: mv a1, a3 +; RV32IBB-NEXT: .LBB19_4: +; RV32IBB-NEXT: ret + %cmp = icmp slt i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} + +define i32 @max_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: max_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: blt a1, a0, .LBB20_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB20_2: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: max_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: max a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: max_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: max a0, a0, a1 +; RV32IBB-NEXT: ret + %cmp = icmp sgt i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +define i64 @max_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: max_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: beq a1, a3, .LBB21_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: slt a4, a3, a1 +; RV32I-NEXT: beqz a4, .LBB21_3 +; RV32I-NEXT: j .LBB21_4 +; RV32I-NEXT: .LBB21_2: +; RV32I-NEXT: sltu a4, a2, a0 +; RV32I-NEXT: bnez a4, .LBB21_4 +; RV32I-NEXT: .LBB21_3: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB21_4: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: max_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: beq a1, a3, .LBB21_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: slt a4, a3, a1 +; RV32IB-NEXT: beqz a4, .LBB21_3 +; RV32IB-NEXT: j .LBB21_4 +; RV32IB-NEXT: .LBB21_2: +; RV32IB-NEXT: sltu a4, a2, a0 +; RV32IB-NEXT: bnez a4, .LBB21_4 +; RV32IB-NEXT: .LBB21_3: +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: mv a1, a3 +; RV32IB-NEXT: .LBB21_4: +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: max_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: beq a1, a3, .LBB21_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: slt a4, a3, a1 +; RV32IBB-NEXT: beqz a4, .LBB21_3 +; RV32IBB-NEXT: j .LBB21_4 +; RV32IBB-NEXT: .LBB21_2: +; RV32IBB-NEXT: sltu a4, a2, a0 +; RV32IBB-NEXT: bnez a4, .LBB21_4 +; RV32IBB-NEXT: .LBB21_3: +; RV32IBB-NEXT: mv a0, a2 +; RV32IBB-NEXT: mv a1, a3 +; RV32IBB-NEXT: .LBB21_4: +; RV32IBB-NEXT: ret + %cmp = icmp sgt i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} + +define i32 @minu_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: minu_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: bltu a0, a1, .LBB22_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB22_2: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: minu_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: minu a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: minu_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: minu a0, a0, a1 +; RV32IBB-NEXT: ret + %cmp = icmp ult i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +define i64 @minu_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: minu_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: beq a1, a3, .LBB23_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sltu a4, a1, a3 +; RV32I-NEXT: beqz a4, .LBB23_3 +; RV32I-NEXT: j .LBB23_4 +; RV32I-NEXT: .LBB23_2: +; RV32I-NEXT: sltu a4, a0, a2 +; RV32I-NEXT: bnez a4, .LBB23_4 +; RV32I-NEXT: .LBB23_3: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB23_4: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: minu_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: beq a1, a3, .LBB23_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: sltu a4, a1, a3 +; RV32IB-NEXT: beqz a4, .LBB23_3 +; RV32IB-NEXT: j .LBB23_4 +; RV32IB-NEXT: .LBB23_2: +; RV32IB-NEXT: sltu a4, a0, a2 +; RV32IB-NEXT: bnez a4, .LBB23_4 +; RV32IB-NEXT: .LBB23_3: +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: mv a1, a3 +; RV32IB-NEXT: .LBB23_4: +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: minu_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: beq a1, a3, .LBB23_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: sltu a4, a1, a3 +; RV32IBB-NEXT: beqz a4, .LBB23_3 +; RV32IBB-NEXT: j .LBB23_4 +; RV32IBB-NEXT: .LBB23_2: +; RV32IBB-NEXT: sltu a4, a0, a2 +; RV32IBB-NEXT: bnez a4, .LBB23_4 +; RV32IBB-NEXT: .LBB23_3: +; RV32IBB-NEXT: mv a0, a2 +; RV32IBB-NEXT: mv a1, a3 +; RV32IBB-NEXT: .LBB23_4: +; RV32IBB-NEXT: ret + %cmp = icmp ult i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} + +define i32 @maxu_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: maxu_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: bltu a1, a0, .LBB24_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB24_2: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: maxu_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: maxu a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: maxu_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: maxu a0, a0, a1 +; RV32IBB-NEXT: ret + %cmp = icmp ugt i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +define i64 @maxu_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: maxu_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: beq a1, a3, .LBB25_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sltu a4, a3, a1 +; RV32I-NEXT: beqz a4, .LBB25_3 +; RV32I-NEXT: j .LBB25_4 +; RV32I-NEXT: .LBB25_2: +; RV32I-NEXT: sltu a4, a2, a0 +; RV32I-NEXT: bnez a4, .LBB25_4 +; RV32I-NEXT: .LBB25_3: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB25_4: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: maxu_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: beq a1, a3, .LBB25_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: sltu a4, a3, a1 +; RV32IB-NEXT: beqz a4, .LBB25_3 +; RV32IB-NEXT: j .LBB25_4 +; RV32IB-NEXT: .LBB25_2: +; RV32IB-NEXT: sltu a4, a2, a0 +; RV32IB-NEXT: bnez a4, .LBB25_4 +; RV32IB-NEXT: .LBB25_3: +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: mv a1, a3 +; RV32IB-NEXT: .LBB25_4: +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: maxu_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: beq a1, a3, .LBB25_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: sltu a4, a3, a1 +; RV32IBB-NEXT: beqz a4, .LBB25_3 +; RV32IBB-NEXT: j .LBB25_4 +; RV32IBB-NEXT: .LBB25_2: +; RV32IBB-NEXT: sltu a4, a2, a0 +; RV32IBB-NEXT: bnez a4, .LBB25_4 +; RV32IBB-NEXT: .LBB25_3: +; RV32IBB-NEXT: mv a0, a2 +; RV32IBB-NEXT: mv a1, a3 +; RV32IBB-NEXT: .LBB25_4: +; RV32IBB-NEXT: ret + %cmp = icmp ugt i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} diff --git a/llvm/test/CodeGen/RISCV/rv32Zbbp.ll b/llvm/test/CodeGen/RISCV/rv32Zbbp.ll new file mode 100644 index 0000000000000..0e6288928f0cb --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32Zbbp.ll @@ -0,0 +1,892 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IB +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IBB +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IBP + +define i32 @andn_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: andn_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: andn_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: andn a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: andn_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: andn a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: andn_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: andn a0, a0, a1 +; RV32IBP-NEXT: ret + %neg = xor i32 %b, -1 + %and = and i32 %neg, %a + ret i32 %and +} + +define i64 @andn_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: andn_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: not a3, a3 +; RV32I-NEXT: not a2, a2 +; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: and a1, a3, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: andn_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: andn a0, a0, a2 +; RV32IB-NEXT: andn a1, a1, a3 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: andn_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: andn a0, a0, a2 +; RV32IBB-NEXT: andn a1, a1, a3 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: andn_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: andn a0, a0, a2 +; RV32IBP-NEXT: andn a1, a1, a3 +; RV32IBP-NEXT: ret + %neg = xor i64 %b, -1 + %and = and i64 %neg, %a + ret i64 %and +} + +define i32 @orn_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: orn_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: orn_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orn a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: orn_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: orn a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: orn_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orn a0, a0, a1 +; RV32IBP-NEXT: ret + %neg = xor i32 %b, -1 + %or = or i32 %neg, %a + ret i32 %or +} + +define i64 @orn_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: orn_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: not a3, a3 +; RV32I-NEXT: not a2, a2 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: orn_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orn a0, a0, a2 +; RV32IB-NEXT: orn a1, a1, a3 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: orn_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: orn a0, a0, a2 +; RV32IBB-NEXT: orn a1, a1, a3 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: orn_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orn a0, a0, a2 +; RV32IBP-NEXT: orn a1, a1, a3 +; RV32IBP-NEXT: ret + %neg = xor i64 %b, -1 + %or = or i64 %neg, %a + ret i64 %or +} + +define i32 @xnor_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: xnor_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: xnor_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: xnor a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: xnor_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: xnor a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: xnor_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: xnor a0, a0, a1 +; RV32IBP-NEXT: ret + %neg = xor i32 %a, -1 + %xor = xor i32 %neg, %b + ret i32 %xor +} + +define i64 @xnor_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: xnor_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: xor a1, a1, a3 +; RV32I-NEXT: xor a0, a0, a2 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: xnor_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: xnor a0, a0, a2 +; RV32IB-NEXT: xnor a1, a1, a3 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: xnor_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: xnor a0, a0, a2 +; RV32IBB-NEXT: xnor a1, a1, a3 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: xnor_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: xnor a0, a0, a2 +; RV32IBP-NEXT: xnor a1, a1, a3 +; RV32IBP-NEXT: ret + %neg = xor i64 %a, -1 + %xor = xor i64 %neg, %b + ret i64 %xor +} + +declare i32 @llvm.fshl.i32(i32, i32, i32) + +define i32 @rol_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: rol_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: sll a2, a0, a1 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: srl a0, a0, a1 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: rol_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rol a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: rol_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: rol a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: rol_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rol a0, a0, a1 +; RV32IBP-NEXT: ret + %or = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b) + ret i32 %or +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +declare i64 @llvm.fshl.i64(i64, i64, i64) + +define i64 @rol_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: rol_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a3, a2, 63 +; RV32I-NEXT: addi t1, a3, -32 +; RV32I-NEXT: addi a6, zero, 31 +; RV32I-NEXT: bltz t1, .LBB7_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sll a7, a0, t1 +; RV32I-NEXT: j .LBB7_3 +; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: sll a4, a1, a2 +; RV32I-NEXT: sub a3, a6, a3 +; RV32I-NEXT: srli a5, a0, 1 +; RV32I-NEXT: srl a3, a5, a3 +; RV32I-NEXT: or a7, a4, a3 +; RV32I-NEXT: .LBB7_3: +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: andi a5, a4, 63 +; RV32I-NEXT: addi a3, a5, -32 +; RV32I-NEXT: bltz a3, .LBB7_7 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: mv t0, zero +; RV32I-NEXT: bgez a3, .LBB7_8 +; RV32I-NEXT: .LBB7_5: +; RV32I-NEXT: srl a3, a0, a4 +; RV32I-NEXT: sub a4, a6, a5 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: sll a1, a1, a4 +; RV32I-NEXT: or a4, a3, a1 +; RV32I-NEXT: or a1, a7, t0 +; RV32I-NEXT: bgez t1, .LBB7_9 +; RV32I-NEXT: .LBB7_6: +; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB7_7: +; RV32I-NEXT: srl t0, a1, a4 +; RV32I-NEXT: bltz a3, .LBB7_5 +; RV32I-NEXT: .LBB7_8: +; RV32I-NEXT: srl a4, a1, a3 +; RV32I-NEXT: or a1, a7, t0 +; RV32I-NEXT: bltz t1, .LBB7_6 +; RV32I-NEXT: .LBB7_9: +; RV32I-NEXT: or a0, zero, a4 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: rol_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: andi a3, a2, 63 +; RV32IB-NEXT: addi t1, a3, -32 +; RV32IB-NEXT: addi a6, zero, 31 +; RV32IB-NEXT: bltz t1, .LBB7_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: sll a7, a0, t1 +; RV32IB-NEXT: j .LBB7_3 +; RV32IB-NEXT: .LBB7_2: +; RV32IB-NEXT: sll a4, a1, a2 +; RV32IB-NEXT: sub a3, a6, a3 +; RV32IB-NEXT: srli a5, a0, 1 +; RV32IB-NEXT: srl a3, a5, a3 +; RV32IB-NEXT: or a7, a4, a3 +; RV32IB-NEXT: .LBB7_3: +; RV32IB-NEXT: neg a4, a2 +; RV32IB-NEXT: andi a5, a4, 63 +; RV32IB-NEXT: addi a3, a5, -32 +; RV32IB-NEXT: bltz a3, .LBB7_7 +; RV32IB-NEXT: # %bb.4: +; RV32IB-NEXT: mv t0, zero +; RV32IB-NEXT: bgez a3, .LBB7_8 +; RV32IB-NEXT: .LBB7_5: +; RV32IB-NEXT: srl a3, a0, a4 +; RV32IB-NEXT: sub a4, a6, a5 +; RV32IB-NEXT: slli a1, a1, 1 +; RV32IB-NEXT: sll a1, a1, a4 +; RV32IB-NEXT: or a4, a3, a1 +; RV32IB-NEXT: or a1, a7, t0 +; RV32IB-NEXT: bgez t1, .LBB7_9 +; RV32IB-NEXT: .LBB7_6: +; RV32IB-NEXT: sll a0, a0, a2 +; RV32IB-NEXT: or a0, a0, a4 +; RV32IB-NEXT: ret +; RV32IB-NEXT: .LBB7_7: +; RV32IB-NEXT: srl t0, a1, a4 +; RV32IB-NEXT: bltz a3, .LBB7_5 +; RV32IB-NEXT: .LBB7_8: +; RV32IB-NEXT: srl a4, a1, a3 +; RV32IB-NEXT: or a1, a7, t0 +; RV32IB-NEXT: bltz t1, .LBB7_6 +; RV32IB-NEXT: .LBB7_9: +; RV32IB-NEXT: or a0, zero, a4 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: rol_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: andi a3, a2, 63 +; RV32IBB-NEXT: addi t1, a3, -32 +; RV32IBB-NEXT: addi a6, zero, 31 +; RV32IBB-NEXT: bltz t1, .LBB7_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: sll a7, a0, t1 +; RV32IBB-NEXT: j .LBB7_3 +; RV32IBB-NEXT: .LBB7_2: +; RV32IBB-NEXT: sll a4, a1, a2 +; RV32IBB-NEXT: sub a3, a6, a3 +; RV32IBB-NEXT: srli a5, a0, 1 +; RV32IBB-NEXT: srl a3, a5, a3 +; RV32IBB-NEXT: or a7, a4, a3 +; RV32IBB-NEXT: .LBB7_3: +; RV32IBB-NEXT: neg a4, a2 +; RV32IBB-NEXT: andi a5, a4, 63 +; RV32IBB-NEXT: addi a3, a5, -32 +; RV32IBB-NEXT: bltz a3, .LBB7_7 +; RV32IBB-NEXT: # %bb.4: +; RV32IBB-NEXT: mv t0, zero +; RV32IBB-NEXT: bgez a3, .LBB7_8 +; RV32IBB-NEXT: .LBB7_5: +; RV32IBB-NEXT: srl a3, a0, a4 +; RV32IBB-NEXT: sub a4, a6, a5 +; RV32IBB-NEXT: slli a1, a1, 1 +; RV32IBB-NEXT: sll a1, a1, a4 +; RV32IBB-NEXT: or a4, a3, a1 +; RV32IBB-NEXT: or a1, a7, t0 +; RV32IBB-NEXT: bgez t1, .LBB7_9 +; RV32IBB-NEXT: .LBB7_6: +; RV32IBB-NEXT: sll a0, a0, a2 +; RV32IBB-NEXT: or a0, a0, a4 +; RV32IBB-NEXT: ret +; RV32IBB-NEXT: .LBB7_7: +; RV32IBB-NEXT: srl t0, a1, a4 +; RV32IBB-NEXT: bltz a3, .LBB7_5 +; RV32IBB-NEXT: .LBB7_8: +; RV32IBB-NEXT: srl a4, a1, a3 +; RV32IBB-NEXT: or a1, a7, t0 +; RV32IBB-NEXT: bltz t1, .LBB7_6 +; RV32IBB-NEXT: .LBB7_9: +; RV32IBB-NEXT: or a0, zero, a4 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: rol_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: andi a3, a2, 63 +; RV32IBP-NEXT: addi t1, a3, -32 +; RV32IBP-NEXT: addi a6, zero, 31 +; RV32IBP-NEXT: bltz t1, .LBB7_2 +; RV32IBP-NEXT: # %bb.1: +; RV32IBP-NEXT: sll a7, a0, t1 +; RV32IBP-NEXT: j .LBB7_3 +; RV32IBP-NEXT: .LBB7_2: +; RV32IBP-NEXT: sll a4, a1, a2 +; RV32IBP-NEXT: sub a3, a6, a3 +; RV32IBP-NEXT: srli a5, a0, 1 +; RV32IBP-NEXT: srl a3, a5, a3 +; RV32IBP-NEXT: or a7, a4, a3 +; RV32IBP-NEXT: .LBB7_3: +; RV32IBP-NEXT: neg a4, a2 +; RV32IBP-NEXT: andi a5, a4, 63 +; RV32IBP-NEXT: addi a3, a5, -32 +; RV32IBP-NEXT: bltz a3, .LBB7_7 +; RV32IBP-NEXT: # %bb.4: +; RV32IBP-NEXT: mv t0, zero +; RV32IBP-NEXT: bgez a3, .LBB7_8 +; RV32IBP-NEXT: .LBB7_5: +; RV32IBP-NEXT: srl a3, a0, a4 +; RV32IBP-NEXT: sub a4, a6, a5 +; RV32IBP-NEXT: slli a1, a1, 1 +; RV32IBP-NEXT: sll a1, a1, a4 +; RV32IBP-NEXT: or a4, a3, a1 +; RV32IBP-NEXT: or a1, a7, t0 +; RV32IBP-NEXT: bgez t1, .LBB7_9 +; RV32IBP-NEXT: .LBB7_6: +; RV32IBP-NEXT: sll a0, a0, a2 +; RV32IBP-NEXT: or a0, a0, a4 +; RV32IBP-NEXT: ret +; RV32IBP-NEXT: .LBB7_7: +; RV32IBP-NEXT: srl t0, a1, a4 +; RV32IBP-NEXT: bltz a3, .LBB7_5 +; RV32IBP-NEXT: .LBB7_8: +; RV32IBP-NEXT: srl a4, a1, a3 +; RV32IBP-NEXT: or a1, a7, t0 +; RV32IBP-NEXT: bltz t1, .LBB7_6 +; RV32IBP-NEXT: .LBB7_9: +; RV32IBP-NEXT: or a0, zero, a4 +; RV32IBP-NEXT: ret + %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b) + ret i64 %or +} + +declare i32 @llvm.fshr.i32(i32, i32, i32) + +define i32 @ror_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: ror_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srl a2, a0, a1 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: ror_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: ror a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: ror_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: ror a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: ror_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: ror a0, a0, a1 +; RV32IBP-NEXT: ret + %or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b) + ret i32 %or +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +declare i64 @llvm.fshr.i64(i64, i64, i64) + +define i64 @ror_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: ror_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a3, a2, 63 +; RV32I-NEXT: addi t1, a3, -32 +; RV32I-NEXT: addi a6, zero, 31 +; RV32I-NEXT: bltz t1, .LBB9_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srl a7, a1, t1 +; RV32I-NEXT: j .LBB9_3 +; RV32I-NEXT: .LBB9_2: +; RV32I-NEXT: srl a4, a0, a2 +; RV32I-NEXT: sub a3, a6, a3 +; RV32I-NEXT: slli a5, a1, 1 +; RV32I-NEXT: sll a3, a5, a3 +; RV32I-NEXT: or a7, a4, a3 +; RV32I-NEXT: .LBB9_3: +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: andi a5, a4, 63 +; RV32I-NEXT: addi a3, a5, -32 +; RV32I-NEXT: bltz a3, .LBB9_7 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: mv t0, zero +; RV32I-NEXT: bgez a3, .LBB9_8 +; RV32I-NEXT: .LBB9_5: +; RV32I-NEXT: sll a3, a1, a4 +; RV32I-NEXT: sub a4, a6, a5 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: srl a0, a0, a4 +; RV32I-NEXT: or a4, a3, a0 +; RV32I-NEXT: or a0, t0, a7 +; RV32I-NEXT: bgez t1, .LBB9_9 +; RV32I-NEXT: .LBB9_6: +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: or a1, a4, a1 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB9_7: +; RV32I-NEXT: sll t0, a0, a4 +; RV32I-NEXT: bltz a3, .LBB9_5 +; RV32I-NEXT: .LBB9_8: +; RV32I-NEXT: sll a4, a0, a3 +; RV32I-NEXT: or a0, t0, a7 +; RV32I-NEXT: bltz t1, .LBB9_6 +; RV32I-NEXT: .LBB9_9: +; RV32I-NEXT: or a1, a4, zero +; RV32I-NEXT: ret +; +; RV32IB-LABEL: ror_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: andi a3, a2, 63 +; RV32IB-NEXT: addi t1, a3, -32 +; RV32IB-NEXT: addi a6, zero, 31 +; RV32IB-NEXT: bltz t1, .LBB9_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: srl a7, a1, t1 +; RV32IB-NEXT: j .LBB9_3 +; RV32IB-NEXT: .LBB9_2: +; RV32IB-NEXT: srl a4, a0, a2 +; RV32IB-NEXT: sub a3, a6, a3 +; RV32IB-NEXT: slli a5, a1, 1 +; RV32IB-NEXT: sll a3, a5, a3 +; RV32IB-NEXT: or a7, a4, a3 +; RV32IB-NEXT: .LBB9_3: +; RV32IB-NEXT: neg a4, a2 +; RV32IB-NEXT: andi a5, a4, 63 +; RV32IB-NEXT: addi a3, a5, -32 +; RV32IB-NEXT: bltz a3, .LBB9_7 +; RV32IB-NEXT: # %bb.4: +; RV32IB-NEXT: mv t0, zero +; RV32IB-NEXT: bgez a3, .LBB9_8 +; RV32IB-NEXT: .LBB9_5: +; RV32IB-NEXT: sll a3, a1, a4 +; RV32IB-NEXT: sub a4, a6, a5 +; RV32IB-NEXT: srli a0, a0, 1 +; RV32IB-NEXT: srl a0, a0, a4 +; RV32IB-NEXT: or a4, a3, a0 +; RV32IB-NEXT: or a0, t0, a7 +; RV32IB-NEXT: bgez t1, .LBB9_9 +; RV32IB-NEXT: .LBB9_6: +; RV32IB-NEXT: srl a1, a1, a2 +; RV32IB-NEXT: or a1, a4, a1 +; RV32IB-NEXT: ret +; RV32IB-NEXT: .LBB9_7: +; RV32IB-NEXT: sll t0, a0, a4 +; RV32IB-NEXT: bltz a3, .LBB9_5 +; RV32IB-NEXT: .LBB9_8: +; RV32IB-NEXT: sll a4, a0, a3 +; RV32IB-NEXT: or a0, t0, a7 +; RV32IB-NEXT: bltz t1, .LBB9_6 +; RV32IB-NEXT: .LBB9_9: +; RV32IB-NEXT: or a1, a4, zero +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: ror_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: andi a3, a2, 63 +; RV32IBB-NEXT: addi t1, a3, -32 +; RV32IBB-NEXT: addi a6, zero, 31 +; RV32IBB-NEXT: bltz t1, .LBB9_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: srl a7, a1, t1 +; RV32IBB-NEXT: j .LBB9_3 +; RV32IBB-NEXT: .LBB9_2: +; RV32IBB-NEXT: srl a4, a0, a2 +; RV32IBB-NEXT: sub a3, a6, a3 +; RV32IBB-NEXT: slli a5, a1, 1 +; RV32IBB-NEXT: sll a3, a5, a3 +; RV32IBB-NEXT: or a7, a4, a3 +; RV32IBB-NEXT: .LBB9_3: +; RV32IBB-NEXT: neg a4, a2 +; RV32IBB-NEXT: andi a5, a4, 63 +; RV32IBB-NEXT: addi a3, a5, -32 +; RV32IBB-NEXT: bltz a3, .LBB9_7 +; RV32IBB-NEXT: # %bb.4: +; RV32IBB-NEXT: mv t0, zero +; RV32IBB-NEXT: bgez a3, .LBB9_8 +; RV32IBB-NEXT: .LBB9_5: +; RV32IBB-NEXT: sll a3, a1, a4 +; RV32IBB-NEXT: sub a4, a6, a5 +; RV32IBB-NEXT: srli a0, a0, 1 +; RV32IBB-NEXT: srl a0, a0, a4 +; RV32IBB-NEXT: or a4, a3, a0 +; RV32IBB-NEXT: or a0, t0, a7 +; RV32IBB-NEXT: bgez t1, .LBB9_9 +; RV32IBB-NEXT: .LBB9_6: +; RV32IBB-NEXT: srl a1, a1, a2 +; RV32IBB-NEXT: or a1, a4, a1 +; RV32IBB-NEXT: ret +; RV32IBB-NEXT: .LBB9_7: +; RV32IBB-NEXT: sll t0, a0, a4 +; RV32IBB-NEXT: bltz a3, .LBB9_5 +; RV32IBB-NEXT: .LBB9_8: +; RV32IBB-NEXT: sll a4, a0, a3 +; RV32IBB-NEXT: or a0, t0, a7 +; RV32IBB-NEXT: bltz t1, .LBB9_6 +; RV32IBB-NEXT: .LBB9_9: +; RV32IBB-NEXT: or a1, a4, zero +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: ror_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: andi a3, a2, 63 +; RV32IBP-NEXT: addi t1, a3, -32 +; RV32IBP-NEXT: addi a6, zero, 31 +; RV32IBP-NEXT: bltz t1, .LBB9_2 +; RV32IBP-NEXT: # %bb.1: +; RV32IBP-NEXT: srl a7, a1, t1 +; RV32IBP-NEXT: j .LBB9_3 +; RV32IBP-NEXT: .LBB9_2: +; RV32IBP-NEXT: srl a4, a0, a2 +; RV32IBP-NEXT: sub a3, a6, a3 +; RV32IBP-NEXT: slli a5, a1, 1 +; RV32IBP-NEXT: sll a3, a5, a3 +; RV32IBP-NEXT: or a7, a4, a3 +; RV32IBP-NEXT: .LBB9_3: +; RV32IBP-NEXT: neg a4, a2 +; RV32IBP-NEXT: andi a5, a4, 63 +; RV32IBP-NEXT: addi a3, a5, -32 +; RV32IBP-NEXT: bltz a3, .LBB9_7 +; RV32IBP-NEXT: # %bb.4: +; RV32IBP-NEXT: mv t0, zero +; RV32IBP-NEXT: bgez a3, .LBB9_8 +; RV32IBP-NEXT: .LBB9_5: +; RV32IBP-NEXT: sll a3, a1, a4 +; RV32IBP-NEXT: sub a4, a6, a5 +; RV32IBP-NEXT: srli a0, a0, 1 +; RV32IBP-NEXT: srl a0, a0, a4 +; RV32IBP-NEXT: or a4, a3, a0 +; RV32IBP-NEXT: or a0, t0, a7 +; RV32IBP-NEXT: bgez t1, .LBB9_9 +; RV32IBP-NEXT: .LBB9_6: +; RV32IBP-NEXT: srl a1, a1, a2 +; RV32IBP-NEXT: or a1, a4, a1 +; RV32IBP-NEXT: ret +; RV32IBP-NEXT: .LBB9_7: +; RV32IBP-NEXT: sll t0, a0, a4 +; RV32IBP-NEXT: bltz a3, .LBB9_5 +; RV32IBP-NEXT: .LBB9_8: +; RV32IBP-NEXT: sll a4, a0, a3 +; RV32IBP-NEXT: or a0, t0, a7 +; RV32IBP-NEXT: bltz t1, .LBB9_6 +; RV32IBP-NEXT: .LBB9_9: +; RV32IBP-NEXT: or a1, a4, zero +; RV32IBP-NEXT: ret + %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b) + ret i64 %or +} + +define i32 @rori_i32(i32 %a) nounwind { +; RV32I-LABEL: rori_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: slli a0, a0, 31 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: rori_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rori a0, a0, 1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: rori_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: rori a0, a0, 1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: rori_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rori a0, a0, 1 +; RV32IBP-NEXT: ret + %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31) + ret i32 %1 +} + +define i64 @rori_i64(i64 %a) nounwind { +; RV32I-LABEL: rori_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a2, a1, 31 +; RV32I-NEXT: srli a3, a0, 1 +; RV32I-NEXT: or a2, a3, a2 +; RV32I-NEXT: srli a1, a1, 1 +; RV32I-NEXT: slli a0, a0, 31 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: rori_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: addi a3, zero, 31 +; RV32IB-NEXT: fsl a2, a1, a3, a0 +; RV32IB-NEXT: fsl a1, a0, a3, a1 +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: rori_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: slli a2, a1, 31 +; RV32IBB-NEXT: srli a3, a0, 1 +; RV32IBB-NEXT: or a2, a3, a2 +; RV32IBB-NEXT: srli a1, a1, 1 +; RV32IBB-NEXT: slli a0, a0, 31 +; RV32IBB-NEXT: or a1, a0, a1 +; RV32IBB-NEXT: mv a0, a2 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: rori_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: slli a2, a1, 31 +; RV32IBP-NEXT: srli a3, a0, 1 +; RV32IBP-NEXT: or a2, a3, a2 +; RV32IBP-NEXT: srli a1, a1, 1 +; RV32IBP-NEXT: slli a0, a0, 31 +; RV32IBP-NEXT: or a1, a0, a1 +; RV32IBP-NEXT: mv a0, a2 +; RV32IBP-NEXT: ret + %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63) + ret i64 %1 +} + +define i32 @pack_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: pack_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: pack_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: pack a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: pack_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: pack a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: pack_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: pack a0, a0, a1 +; RV32IBP-NEXT: ret + %shl = and i32 %a, 65535 + %shl1 = shl i32 %b, 16 + %or = or i32 %shl1, %shl + ret i32 %or +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +define i64 @pack_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: pack_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: pack_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: mv a1, a2 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: pack_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: mv a1, a2 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: pack_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: mv a1, a2 +; RV32IBP-NEXT: ret + %shl = and i64 %a, 4294967295 + %shl1 = shl i64 %b, 32 + %or = or i64 %shl1, %shl + ret i64 %or +} + +define i32 @packu_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: packu_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: lui a2, 1048560 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: packu_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: packu a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: packu_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: packu a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: packu_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: packu a0, a0, a1 +; RV32IBP-NEXT: ret + %shr = lshr i32 %a, 16 + %shr1 = and i32 %b, -65536 + %or = or i32 %shr1, %shr + ret i32 %or +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +define i64 @packu_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: packu_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: packu_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: mv a0, a1 +; RV32IB-NEXT: mv a1, a3 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: packu_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: mv a0, a1 +; RV32IBB-NEXT: mv a1, a3 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: packu_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: mv a0, a1 +; RV32IBP-NEXT: mv a1, a3 +; RV32IBP-NEXT: ret + %shr = lshr i64 %a, 32 + %shr1 = and i64 %b, -4294967296 + %or = or i64 %shr1, %shr + ret i64 %or +} + +define i32 @packh_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: packh_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: packh_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: packh a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: packh_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: packh a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: packh_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: packh a0, a0, a1 +; RV32IBP-NEXT: ret + %and = and i32 %a, 255 + %and1 = shl i32 %b, 8 + %shl = and i32 %and1, 65280 + %or = or i32 %shl, %and + ret i32 %or +} + +define i64 @packh_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: packh_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: slli a1, a2, 8 +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: ret +; +; RV32IB-LABEL: packh_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: packh a0, a0, a2 +; RV32IB-NEXT: mv a1, zero +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: packh_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: packh a0, a0, a2 +; RV32IBB-NEXT: mv a1, zero +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: packh_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: packh a0, a0, a2 +; RV32IBP-NEXT: mv a1, zero +; RV32IBP-NEXT: ret + %and = and i64 %a, 255 + %and1 = shl i64 %b, 8 + %shl = and i64 %and1, 65280 + %or = or i64 %shl, %and + ret i64 %or +} diff --git a/llvm/test/CodeGen/RISCV/rv32Zbp.ll b/llvm/test/CodeGen/RISCV/rv32Zbp.ll new file mode 100644 index 0000000000000..8769ce77337c1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32Zbp.ll @@ -0,0 +1,1245 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IB +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IBP + +define i32 @gorc1_i32(i32 %a) nounwind { +; RV32I-LABEL: gorc1_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: lui a2, 699051 +; RV32I-NEXT: addi a2, a2, -1366 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: lui a3, 349525 +; RV32I-NEXT: addi a3, a3, 1365 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: gorc1_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc.p a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: gorc1_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orc.p a0, a0 +; RV32IBP-NEXT: ret + %and = shl i32 %a, 1 + %shl = and i32 %and, -1431655766 + %and1 = lshr i32 %a, 1 + %shr = and i32 %and1, 1431655765 + %or = or i32 %shr, %a + %or2 = or i32 %or, %shl + ret i32 %or2 +} + +define i64 @gorc1_i64(i64 %a) nounwind { +; RV32I-LABEL: gorc1_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a2, a0, 1 +; RV32I-NEXT: slli a3, a1, 1 +; RV32I-NEXT: lui a4, 699051 +; RV32I-NEXT: addi a4, a4, -1366 +; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: srli a4, a1, 1 +; RV32I-NEXT: srli a5, a0, 1 +; RV32I-NEXT: lui a3, 349525 +; RV32I-NEXT: addi a3, a3, 1365 +; RV32I-NEXT: and a5, a5, a3 +; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: or a0, a5, a0 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: gorc1_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc.p a0, a0 +; RV32IB-NEXT: orc.p a1, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: gorc1_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orc.p a0, a0 +; RV32IBP-NEXT: orc.p a1, a1 +; RV32IBP-NEXT: ret + %and = shl i64 %a, 1 + %shl = and i64 %and, -6148914691236517206 + %and1 = lshr i64 %a, 1 + %shr = and i64 %and1, 6148914691236517205 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define i32 @gorc2_i32(i32 %a) nounwind { +; RV32I-LABEL: gorc2_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: lui a2, 838861 +; RV32I-NEXT: addi a2, a2, -820 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: lui a3, 209715 +; RV32I-NEXT: addi a3, a3, 819 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: gorc2_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc2.n a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: gorc2_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orc2.n a0, a0 +; RV32IBP-NEXT: ret + %and = shl i32 %a, 2 + %shl = and i32 %and, -858993460 + %and1 = lshr i32 %a, 2 + %shr = and i32 %and1, 858993459 + %or = or i32 %shr, %a + %or2 = or i32 %or, %shl + ret i32 %or2 +} + +define i64 @gorc2_i64(i64 %a) nounwind { +; RV32I-LABEL: gorc2_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a2, a0, 2 +; RV32I-NEXT: slli a3, a1, 2 +; RV32I-NEXT: lui a4, 838861 +; RV32I-NEXT: addi a4, a4, -820 +; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: srli a4, a1, 2 +; RV32I-NEXT: srli a5, a0, 2 +; RV32I-NEXT: lui a3, 209715 +; RV32I-NEXT: addi a3, a3, 819 +; RV32I-NEXT: and a5, a5, a3 +; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: or a0, a5, a0 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: gorc2_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc2.n a0, a0 +; RV32IB-NEXT: orc2.n a1, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: gorc2_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orc2.n a0, a0 +; RV32IBP-NEXT: orc2.n a1, a1 +; RV32IBP-NEXT: ret + %and = shl i64 %a, 2 + %shl = and i64 %and, -3689348814741910324 + %and1 = lshr i64 %a, 2 + %shr = and i64 %and1, 3689348814741910323 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define i32 @gorc4_i32(i32 %a) nounwind { +; RV32I-LABEL: gorc4_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 4 +; RV32I-NEXT: lui a2, 986895 +; RV32I-NEXT: addi a2, a2, 240 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a2, a0, 4 +; RV32I-NEXT: lui a3, 61681 +; RV32I-NEXT: addi a3, a3, -241 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: gorc4_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc4.b a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: gorc4_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orc4.b a0, a0 +; RV32IBP-NEXT: ret + %and = shl i32 %a, 4 + %shl = and i32 %and, -252645136 + %and1 = lshr i32 %a, 4 + %shr = and i32 %and1, 252645135 + %or = or i32 %shr, %a + %or2 = or i32 %or, %shl + ret i32 %or2 +} + +define i64 @gorc4_i64(i64 %a) nounwind { +; RV32I-LABEL: gorc4_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a2, a0, 4 +; RV32I-NEXT: slli a3, a1, 4 +; RV32I-NEXT: lui a4, 986895 +; RV32I-NEXT: addi a4, a4, 240 +; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: srli a4, a1, 4 +; RV32I-NEXT: srli a5, a0, 4 +; RV32I-NEXT: lui a3, 61681 +; RV32I-NEXT: addi a3, a3, -241 +; RV32I-NEXT: and a5, a5, a3 +; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: or a0, a5, a0 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: gorc4_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc4.b a0, a0 +; RV32IB-NEXT: orc4.b a1, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: gorc4_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orc4.b a0, a0 +; RV32IBP-NEXT: orc4.b a1, a1 +; RV32IBP-NEXT: ret + %and = shl i64 %a, 4 + %shl = and i64 %and, -1085102592571150096 + %and1 = lshr i64 %a, 4 + %shr = and i64 %and1, 1085102592571150095 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define i32 @gorc8_i32(i32 %a) nounwind { +; RV32I-LABEL: gorc8_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: lui a2, 1044496 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a2, a0, 8 +; RV32I-NEXT: lui a3, 4080 +; RV32I-NEXT: addi a3, a3, 255 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: gorc8_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc8.h a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: gorc8_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orc8.h a0, a0 +; RV32IBP-NEXT: ret + %and = shl i32 %a, 8 + %shl = and i32 %and, -16711936 + %and1 = lshr i32 %a, 8 + %shr = and i32 %and1, 16711935 + %or = or i32 %shr, %a + %or2 = or i32 %or, %shl + ret i32 %or2 +} + +define i64 @gorc8_i64(i64 %a) nounwind { +; RV32I-LABEL: gorc8_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: slli a3, a1, 8 +; RV32I-NEXT: lui a4, 1044496 +; RV32I-NEXT: addi a4, a4, -256 +; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: srli a4, a1, 8 +; RV32I-NEXT: srli a5, a0, 8 +; RV32I-NEXT: lui a3, 4080 +; RV32I-NEXT: addi a3, a3, 255 +; RV32I-NEXT: and a5, a5, a3 +; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: or a0, a5, a0 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: gorc8_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc8.h a0, a0 +; RV32IB-NEXT: orc8.h a1, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: gorc8_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orc8.h a0, a0 +; RV32IBP-NEXT: orc8.h a1, a1 +; RV32IBP-NEXT: ret + %and = shl i64 %a, 8 + %shl = and i64 %and, -71777214294589696 + %and1 = lshr i64 %a, 8 + %shr = and i64 %and1, 71777214294589695 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define i32 @gorc16_i32(i32 %a) nounwind { +; RV32I-LABEL: gorc16_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srli a2, a0, 16 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: gorc16_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc16 a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: gorc16_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orc16 a0, a0 +; RV32IBP-NEXT: ret + %shl = shl i32 %a, 16 + %shr = lshr i32 %a, 16 + %or = or i32 %shr, %a + %or2 = or i32 %or, %shl + ret i32 %or2 +} + +define i64 @gorc16_i64(i64 %a) nounwind { +; RV32I-LABEL: gorc16_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a2, a1, 16 +; RV32I-NEXT: slli a3, a0, 16 +; RV32I-NEXT: srli a4, a0, 16 +; RV32I-NEXT: srli a5, a1, 16 +; RV32I-NEXT: or a1, a5, a1 +; RV32I-NEXT: or a0, a4, a0 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: gorc16_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc16 a0, a0 +; RV32IB-NEXT: orc16 a1, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: gorc16_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orc16 a0, a0 +; RV32IBP-NEXT: orc16 a1, a1 +; RV32IBP-NEXT: ret + %and = shl i64 %a, 16 + %shl = and i64 %and, -281470681808896 + %and1 = lshr i64 %a, 16 + %shr = and i64 %and1, 281470681808895 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define i32 @grev1_i32(i32 %a) nounwind { +; RV32I-LABEL: grev1_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: lui a2, 699051 +; RV32I-NEXT: addi a2, a2, -1366 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: grev1_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev.p a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: grev1_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rev.p a0, a0 +; RV32IBP-NEXT: ret + %and = shl i32 %a, 1 + %shl = and i32 %and, -1431655766 + %and1 = lshr i32 %a, 1 + %shr = and i32 %and1, 1431655765 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i64 @grev1_i64(i64 %a) nounwind { +; RV32I-LABEL: grev1_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a2, a0, 1 +; RV32I-NEXT: slli a3, a1, 1 +; RV32I-NEXT: lui a4, 699051 +; RV32I-NEXT: addi a4, a4, -1366 +; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: srli a1, a1, 1 +; RV32I-NEXT: lui a4, 349525 +; RV32I-NEXT: addi a4, a4, 1365 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: grev1_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev.p a0, a0 +; RV32IB-NEXT: rev.p a1, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: grev1_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rev.p a0, a0 +; RV32IBP-NEXT: rev.p a1, a1 +; RV32IBP-NEXT: ret + %and = shl i64 %a, 1 + %shl = and i64 %and, -6148914691236517206 + %and1 = lshr i64 %a, 1 + %shr = and i64 %and1, 6148914691236517205 + %or = or i64 %shl, %shr + ret i64 %or +} + +define i32 @grev2_i32(i32 %a) nounwind { +; RV32I-LABEL: grev2_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: lui a2, 838861 +; RV32I-NEXT: addi a2, a2, -820 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: grev2_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev2.n a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: grev2_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rev2.n a0, a0 +; RV32IBP-NEXT: ret + %and = shl i32 %a, 2 + %shl = and i32 %and, -858993460 + %and1 = lshr i32 %a, 2 + %shr = and i32 %and1, 858993459 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i64 @grev2_i64(i64 %a) nounwind { +; RV32I-LABEL: grev2_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a2, a0, 2 +; RV32I-NEXT: slli a3, a1, 2 +; RV32I-NEXT: lui a4, 838861 +; RV32I-NEXT: addi a4, a4, -820 +; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: srli a1, a1, 2 +; RV32I-NEXT: lui a4, 209715 +; RV32I-NEXT: addi a4, a4, 819 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: grev2_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev2.n a0, a0 +; RV32IB-NEXT: rev2.n a1, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: grev2_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rev2.n a0, a0 +; RV32IBP-NEXT: rev2.n a1, a1 +; RV32IBP-NEXT: ret + %and = shl i64 %a, 2 + %shl = and i64 %and, -3689348814741910324 + %and1 = lshr i64 %a, 2 + %shr = and i64 %and1, 3689348814741910323 + %or = or i64 %shl, %shr + ret i64 %or +} + +define i32 @grev4_i32(i32 %a) nounwind { +; RV32I-LABEL: grev4_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 4 +; RV32I-NEXT: lui a2, 986895 +; RV32I-NEXT: addi a2, a2, 240 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a0, a0, 4 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: grev4_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev4.b a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: grev4_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rev4.b a0, a0 +; RV32IBP-NEXT: ret + %and = shl i32 %a, 4 + %shl = and i32 %and, -252645136 + %and1 = lshr i32 %a, 4 + %shr = and i32 %and1, 252645135 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i64 @grev4_i64(i64 %a) nounwind { +; RV32I-LABEL: grev4_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a2, a0, 4 +; RV32I-NEXT: slli a3, a1, 4 +; RV32I-NEXT: lui a4, 986895 +; RV32I-NEXT: addi a4, a4, 240 +; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: srli a0, a0, 4 +; RV32I-NEXT: srli a1, a1, 4 +; RV32I-NEXT: lui a4, 61681 +; RV32I-NEXT: addi a4, a4, -241 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: grev4_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev4.b a0, a0 +; RV32IB-NEXT: rev4.b a1, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: grev4_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rev4.b a0, a0 +; RV32IBP-NEXT: rev4.b a1, a1 +; RV32IBP-NEXT: ret + %and = shl i64 %a, 4 + %shl = and i64 %and, -1085102592571150096 + %and1 = lshr i64 %a, 4 + %shr = and i64 %and1, 1085102592571150095 + %or = or i64 %shl, %shr + ret i64 %or +} + +define i32 @grev8_i32(i32 %a) nounwind { +; RV32I-LABEL: grev8_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: lui a2, 1044496 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: lui a2, 4080 +; RV32I-NEXT: addi a2, a2, 255 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: grev8_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev8.h a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: grev8_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rev8.h a0, a0 +; RV32IBP-NEXT: ret + %and = shl i32 %a, 8 + %shl = and i32 %and, -16711936 + %and1 = lshr i32 %a, 8 + %shr = and i32 %and1, 16711935 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i64 @grev8_i64(i64 %a) nounwind { +; RV32I-LABEL: grev8_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: slli a3, a1, 8 +; RV32I-NEXT: lui a4, 1044496 +; RV32I-NEXT: addi a4, a4, -256 +; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: lui a4, 4080 +; RV32I-NEXT: addi a4, a4, 255 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: grev8_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev8.h a0, a0 +; RV32IB-NEXT: rev8.h a1, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: grev8_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rev8.h a0, a0 +; RV32IBP-NEXT: rev8.h a1, a1 +; RV32IBP-NEXT: ret + %and = shl i64 %a, 8 + %shl = and i64 %and, -71777214294589696 + %and1 = lshr i64 %a, 8 + %shr = and i64 %and1, 71777214294589695 + %or = or i64 %shl, %shr + ret i64 %or +} + +define i32 @grev16_i32(i32 %a) nounwind { +; RV32I-LABEL: grev16_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: grev16_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rori a0, a0, 16 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: grev16_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rori a0, a0, 16 +; RV32IBP-NEXT: ret + %shl = shl i32 %a, 16 + %shr = lshr i32 %a, 16 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i64 @grev16_i64(i64 %a) nounwind { +; RV32I-LABEL: grev16_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a2, a1, 16 +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: srli a1, a1, 16 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: grev16_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rori a0, a0, 16 +; RV32IB-NEXT: rori a1, a1, 16 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: grev16_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rori a0, a0, 16 +; RV32IBP-NEXT: rori a1, a1, 16 +; RV32IBP-NEXT: ret + %and = shl i64 %a, 16 + %shl = and i64 %and, -281470681808896 + %and1 = lshr i64 %a, 16 + %shr = and i64 %and1, 281470681808895 + %or = or i64 %shl, %shr + ret i64 %or +} + +declare i32 @llvm.bswap.i32(i32) + +define i32 @bswap_i32(i32 %a) nounwind { +; RV32I-LABEL: bswap_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a2, a0, 24 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: lui a3, 4080 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: bswap_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev8 a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: bswap_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rev8 a0, a0 +; RV32IBP-NEXT: ret + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + ret i32 %1 +} + +declare i64 @llvm.bswap.i64(i64) + +define i64 @bswap_i64(i64 %a) { +; RV32I-LABEL: bswap_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a2, a1, 8 +; RV32I-NEXT: lui a3, 16 +; RV32I-NEXT: addi a3, a3, -256 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: slli a4, a1, 8 +; RV32I-NEXT: lui a5, 4080 +; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: or a2, a1, a2 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: slli a3, a0, 8 +; RV32I-NEXT: and a3, a3, a5 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: bswap_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev8 a2, a1 +; RV32IB-NEXT: rev8 a1, a0 +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: bswap_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rev8 a2, a1 +; RV32IBP-NEXT: rev8 a1, a0 +; RV32IBP-NEXT: mv a0, a2 +; RV32IBP-NEXT: ret + %1 = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %1 +} + +declare i32 @llvm.bitreverse.i32(i32) + +define i32 @bitreverse_i32(i32 %a) nounwind { +; RV32I-LABEL: bitreverse_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a2, a0, 24 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: lui a3, 4080 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: slli a1, a1, 4 +; RV32I-NEXT: lui a2, 986895 +; RV32I-NEXT: addi a2, a2, 240 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: srli a0, a0, 4 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: lui a2, 838861 +; RV32I-NEXT: addi a2, a2, -820 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: lui a1, 349525 +; RV32I-NEXT: addi a1, a1, 1365 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: lui a2, 699051 +; RV32I-NEXT: addi a2, a2, -1366 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: bitreverse_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: bitreverse_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rev a0, a0 +; RV32IBP-NEXT: ret + %1 = tail call i32 @llvm.bitreverse.i32(i32 %a) + ret i32 %1 +} + +declare i64 @llvm.bitreverse.i64(i64) + +define i64 @bitreverse_i64(i64 %a) nounwind { +; RV32I-LABEL: bitreverse_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a2, a1, 8 +; RV32I-NEXT: lui a3, 16 +; RV32I-NEXT: addi t0, a3, -256 +; RV32I-NEXT: and a2, a2, t0 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: slli a4, a1, 8 +; RV32I-NEXT: lui a6, 4080 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi t1, a2, -241 +; RV32I-NEXT: and a2, a1, t1 +; RV32I-NEXT: slli a2, a2, 4 +; RV32I-NEXT: lui a5, 986895 +; RV32I-NEXT: addi t2, a5, 240 +; RV32I-NEXT: and a1, a1, t2 +; RV32I-NEXT: srli a1, a1, 4 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi t3, a2, 819 +; RV32I-NEXT: and a3, a1, t3 +; RV32I-NEXT: slli a3, a3, 2 +; RV32I-NEXT: lui a4, 838861 +; RV32I-NEXT: addi a4, a4, -820 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: srli a1, a1, 2 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: lui a3, 349525 +; RV32I-NEXT: addi a3, a3, 1365 +; RV32I-NEXT: and a5, a1, a3 +; RV32I-NEXT: slli a5, a5, 1 +; RV32I-NEXT: lui a2, 699051 +; RV32I-NEXT: addi a2, a2, -1366 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a1, a1, 1 +; RV32I-NEXT: or a7, a1, a5 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: and a1, a1, t0 +; RV32I-NEXT: srli a5, a0, 24 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: slli a5, a0, 8 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: and a1, a0, t1 +; RV32I-NEXT: slli a1, a1, 4 +; RV32I-NEXT: and a0, a0, t2 +; RV32I-NEXT: srli a0, a0, 4 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: and a1, a0, t3 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: and a1, a0, a3 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: mv a0, a7 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: bitreverse_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev a2, a1 +; RV32IB-NEXT: rev a1, a0 +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: bitreverse_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rev a2, a1 +; RV32IBP-NEXT: rev a1, a0 +; RV32IBP-NEXT: mv a0, a2 +; RV32IBP-NEXT: ret + %1 = call i64 @llvm.bitreverse.i64(i64 %a) + ret i64 %1 +} + +define i32 @shfl1_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: shfl1_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 629146 +; RV32I-NEXT: addi a1, a1, -1639 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: slli a2, a0, 1 +; RV32I-NEXT: lui a3, 279620 +; RV32I-NEXT: addi a3, a3, 1092 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: lui a2, 139810 +; RV32I-NEXT: addi a2, a2, 546 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: shfl1_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: zip.n a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: shfl1_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: zip.n a0, a0 +; RV32IBP-NEXT: ret + %and = and i32 %a, -1717986919 + %shl = shl i32 %a, 1 + %and1 = and i32 %shl, 1145324612 + %or = or i32 %and1, %and + %shr = lshr i32 %a, 1 + %and2 = and i32 %shr, 572662306 + %or3 = or i32 %or, %and2 + ret i32 %or3 +} + +define i64 @shfl1_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: shfl1_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 629146 +; RV32I-NEXT: addi a2, a2, -1639 +; RV32I-NEXT: and a6, a0, a2 +; RV32I-NEXT: and a2, a1, a2 +; RV32I-NEXT: slli a4, a1, 1 +; RV32I-NEXT: slli a5, a0, 1 +; RV32I-NEXT: lui a3, 279620 +; RV32I-NEXT: addi a3, a3, 1092 +; RV32I-NEXT: and a5, a5, a3 +; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: or a2, a3, a2 +; RV32I-NEXT: or a3, a5, a6 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: srli a1, a1, 1 +; RV32I-NEXT: lui a4, 139810 +; RV32I-NEXT: addi a4, a4, 546 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: shfl1_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: zip.n a0, a0 +; RV32IB-NEXT: zip.n a1, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: shfl1_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: zip.n a0, a0 +; RV32IBP-NEXT: zip.n a1, a1 +; RV32IBP-NEXT: ret + %and = and i64 %a, -7378697629483820647 + %shl = shl i64 %a, 1 + %and1 = and i64 %shl, 4919131752989213764 + %or = or i64 %and1, %and + %shr = lshr i64 %a, 1 + %and2 = and i64 %shr, 2459565876494606882 + %or3 = or i64 %or, %and2 + ret i64 %or3 +} + +define i32 @shfl2_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: shfl2_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 801852 +; RV32I-NEXT: addi a1, a1, 963 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: slli a2, a0, 2 +; RV32I-NEXT: lui a3, 197379 +; RV32I-NEXT: addi a3, a3, 48 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: lui a2, 49345 +; RV32I-NEXT: addi a2, a2, -1012 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: shfl2_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: zip2.b a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: shfl2_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: zip2.b a0, a0 +; RV32IBP-NEXT: ret + %and = and i32 %a, -1010580541 + %shl = shl i32 %a, 2 + %and1 = and i32 %shl, 808464432 + %or = or i32 %and1, %and + %shr = lshr i32 %a, 2 + %and2 = and i32 %shr, 202116108 + %or3 = or i32 %or, %and2 + ret i32 %or3 +} + +define i64 @shfl2_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: shfl2_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 801852 +; RV32I-NEXT: addi a2, a2, 963 +; RV32I-NEXT: and a6, a0, a2 +; RV32I-NEXT: and a2, a1, a2 +; RV32I-NEXT: slli a4, a1, 2 +; RV32I-NEXT: slli a5, a0, 2 +; RV32I-NEXT: lui a3, 197379 +; RV32I-NEXT: addi a3, a3, 48 +; RV32I-NEXT: and a5, a5, a3 +; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: or a2, a3, a2 +; RV32I-NEXT: or a3, a5, a6 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: srli a1, a1, 2 +; RV32I-NEXT: lui a4, 49345 +; RV32I-NEXT: addi a4, a4, -1012 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: shfl2_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: zip2.b a0, a0 +; RV32IB-NEXT: zip2.b a1, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: shfl2_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: zip2.b a0, a0 +; RV32IBP-NEXT: zip2.b a1, a1 +; RV32IBP-NEXT: ret + %and = and i64 %a, -4340410370284600381 + %shl = shl i64 %a, 2 + %and1 = and i64 %shl, 3472328296227680304 + %or = or i64 %and1, %and + %shr = lshr i64 %a, 2 + %and2 = and i64 %shr, 868082074056920076 + %or3 = or i64 %or, %and2 + ret i64 %or3 +} + +define i32 @shfl4_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: shfl4_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 983295 +; RV32I-NEXT: addi a1, a1, 15 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: slli a2, a0, 4 +; RV32I-NEXT: lui a3, 61441 +; RV32I-NEXT: addi a3, a3, -256 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a0, a0, 4 +; RV32I-NEXT: lui a2, 3840 +; RV32I-NEXT: addi a2, a2, 240 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: shfl4_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: zip4.h a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: shfl4_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: zip4.h a0, a0 +; RV32IBP-NEXT: ret + %and = and i32 %a, -267390961 + %shl = shl i32 %a, 4 + %and1 = and i32 %shl, 251662080 + %or = or i32 %and1, %and + %shr = lshr i32 %a, 4 + %and2 = and i32 %shr, 15728880 + %or3 = or i32 %or, %and2 + ret i32 %or3 +} + +define i64 @shfl4_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: shfl4_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 983295 +; RV32I-NEXT: addi a2, a2, 15 +; RV32I-NEXT: and a6, a0, a2 +; RV32I-NEXT: and a2, a1, a2 +; RV32I-NEXT: slli a4, a1, 4 +; RV32I-NEXT: slli a5, a0, 4 +; RV32I-NEXT: lui a3, 61441 +; RV32I-NEXT: addi a3, a3, -256 +; RV32I-NEXT: and a5, a5, a3 +; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: or a2, a3, a2 +; RV32I-NEXT: or a3, a5, a6 +; RV32I-NEXT: srli a0, a0, 4 +; RV32I-NEXT: srli a1, a1, 4 +; RV32I-NEXT: lui a4, 3840 +; RV32I-NEXT: addi a4, a4, 240 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: shfl4_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: zip4.h a0, a0 +; RV32IB-NEXT: zip4.h a1, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: shfl4_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: zip4.h a0, a0 +; RV32IBP-NEXT: zip4.h a1, a1 +; RV32IBP-NEXT: ret + %and = and i64 %a, -1148435428713435121 + %shl = shl i64 %a, 4 + %and1 = and i64 %shl, 1080880403494997760 + %or = or i64 %and1, %and + %shr = lshr i64 %a, 4 + %and2 = and i64 %shr, 67555025218437360 + %or3 = or i64 %or, %and2 + ret i64 %or3 +} + +define i32 @shfl8_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: shfl8_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 1044480 +; RV32I-NEXT: addi a1, a1, 255 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: lui a3, 4080 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: shfl8_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: zip8 a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: shfl8_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: zip8 a0, a0 +; RV32IBP-NEXT: ret + %and = and i32 %a, -16776961 + %shl = shl i32 %a, 8 + %and1 = and i32 %shl, 16711680 + %or = or i32 %and1, %and + %shr = lshr i32 %a, 8 + %and2 = and i32 %shr, 65280 + %or3 = or i32 %or, %and2 + ret i32 %or3 +} + +define i64 @shfl8_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: shfl8_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 1044480 +; RV32I-NEXT: addi a2, a2, 255 +; RV32I-NEXT: and a3, a0, a2 +; RV32I-NEXT: and a2, a1, a2 +; RV32I-NEXT: slli a4, a1, 8 +; RV32I-NEXT: slli a5, a0, 8 +; RV32I-NEXT: lui a6, 4080 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a2, a4, a2 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: lui a4, 16 +; RV32I-NEXT: addi a4, a4, -256 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: shfl8_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: zip8 a0, a0 +; RV32IB-NEXT: zip8 a1, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: shfl8_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: zip8 a0, a0 +; RV32IBP-NEXT: zip8 a1, a1 +; RV32IBP-NEXT: ret + %and = and i64 %a, -72056494543077121 + %shl = shl i64 %a, 8 + %and1 = and i64 %shl, 71776119077928960 + %or = or i64 %and1, %and + %shr = lshr i64 %a, 8 + %and2 = and i64 %shr, 280375465148160 + %or3 = or i64 %or, %and2 + ret i64 %or3 +} diff --git a/llvm/test/CodeGen/RISCV/rv32Zbs.ll b/llvm/test/CodeGen/RISCV/rv32Zbs.ll new file mode 100644 index 0000000000000..16da34e49c663 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32Zbs.ll @@ -0,0 +1,361 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IB +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbs -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IBS + +define i32 @sbclr_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: sbclr_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a2, zero, 1 +; RV32I-NEXT: sll a1, a2, a1 +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sbclr_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sbclr a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBS-LABEL: sbclr_i32: +; RV32IBS: # %bb.0: +; RV32IBS-NEXT: sbclr a0, a0, a1 +; RV32IBS-NEXT: ret + %and = and i32 %b, 31 + %shl = shl nuw i32 1, %and + %neg = xor i32 %shl, -1 + %and1 = and i32 %neg, %a + ret i32 %and1 +} + +define i64 @sbclr_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: sbclr_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a3, a2, 63 +; RV32I-NEXT: addi a4, a3, -32 +; RV32I-NEXT: addi a3, zero, 1 +; RV32I-NEXT: bltz a4, .LBB1_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a2, zero +; RV32I-NEXT: sll a4, a3, a4 +; RV32I-NEXT: j .LBB1_3 +; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: mv a4, zero +; RV32I-NEXT: sll a2, a3, a2 +; RV32I-NEXT: .LBB1_3: +; RV32I-NEXT: not a3, a4 +; RV32I-NEXT: not a2, a2 +; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: and a1, a3, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sbclr_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: andi a3, a2, 63 +; RV32IB-NEXT: addi a4, a3, -32 +; RV32IB-NEXT: addi a3, zero, 1 +; RV32IB-NEXT: bltz a4, .LBB1_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: mv a2, zero +; RV32IB-NEXT: sll a4, a3, a4 +; RV32IB-NEXT: j .LBB1_3 +; RV32IB-NEXT: .LBB1_2: +; RV32IB-NEXT: mv a4, zero +; RV32IB-NEXT: sll a2, a3, a2 +; RV32IB-NEXT: .LBB1_3: +; RV32IB-NEXT: andn a0, a0, a2 +; RV32IB-NEXT: andn a1, a1, a4 +; RV32IB-NEXT: ret +; +; RV32IBS-LABEL: sbclr_i64: +; RV32IBS: # %bb.0: +; RV32IBS-NEXT: andi a3, a2, 63 +; RV32IBS-NEXT: addi a4, a3, -32 +; RV32IBS-NEXT: addi a3, zero, 1 +; RV32IBS-NEXT: bltz a4, .LBB1_2 +; RV32IBS-NEXT: # %bb.1: +; RV32IBS-NEXT: mv a2, zero +; RV32IBS-NEXT: sll a4, a3, a4 +; RV32IBS-NEXT: j .LBB1_3 +; RV32IBS-NEXT: .LBB1_2: +; RV32IBS-NEXT: mv a4, zero +; RV32IBS-NEXT: sll a2, a3, a2 +; RV32IBS-NEXT: .LBB1_3: +; RV32IBS-NEXT: not a3, a4 +; RV32IBS-NEXT: not a2, a2 +; RV32IBS-NEXT: and a0, a2, a0 +; RV32IBS-NEXT: and a1, a3, a1 +; RV32IBS-NEXT: ret + %and = and i64 %b, 63 + %shl = shl nuw i64 1, %and + %neg = xor i64 %shl, -1 + %and1 = and i64 %neg, %a + ret i64 %and1 +} + +define i32 @sbset_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: sbset_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a2, zero, 1 +; RV32I-NEXT: sll a1, a2, a1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sbset_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sbset a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBS-LABEL: sbset_i32: +; RV32IBS: # %bb.0: +; RV32IBS-NEXT: sbset a0, a0, a1 +; RV32IBS-NEXT: ret + %and = and i32 %b, 31 + %shl = shl nuw i32 1, %and + %or = or i32 %shl, %a + ret i32 %or +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +define i64 @sbset_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: sbset_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a3, zero, 1 +; RV32I-NEXT: sll a2, a3, a2 +; RV32I-NEXT: srai a3, a2, 31 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sbset_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: addi a3, zero, 1 +; RV32IB-NEXT: sll a2, a3, a2 +; RV32IB-NEXT: srai a3, a2, 31 +; RV32IB-NEXT: or a0, a2, a0 +; RV32IB-NEXT: or a1, a3, a1 +; RV32IB-NEXT: ret +; +; RV32IBS-LABEL: sbset_i64: +; RV32IBS: # %bb.0: +; RV32IBS-NEXT: addi a3, zero, 1 +; RV32IBS-NEXT: sll a2, a3, a2 +; RV32IBS-NEXT: srai a3, a2, 31 +; RV32IBS-NEXT: or a0, a2, a0 +; RV32IBS-NEXT: or a1, a3, a1 +; RV32IBS-NEXT: ret + %1 = trunc i64 %b to i32 + %conv = and i32 %1, 63 + %shl = shl nuw i32 1, %conv + %conv1 = sext i32 %shl to i64 + %or = or i64 %conv1, %a + ret i64 %or +} + +define i32 @sbinv_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: sbinv_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a2, zero, 1 +; RV32I-NEXT: sll a1, a2, a1 +; RV32I-NEXT: xor a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sbinv_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sbinv a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBS-LABEL: sbinv_i32: +; RV32IBS: # %bb.0: +; RV32IBS-NEXT: sbinv a0, a0, a1 +; RV32IBS-NEXT: ret + %and = and i32 %b, 31 + %shl = shl nuw i32 1, %and + %xor = xor i32 %shl, %a + ret i32 %xor +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +define i64 @sbinv_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: sbinv_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a3, zero, 1 +; RV32I-NEXT: sll a2, a3, a2 +; RV32I-NEXT: srai a3, a2, 31 +; RV32I-NEXT: xor a0, a2, a0 +; RV32I-NEXT: xor a1, a3, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sbinv_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: addi a3, zero, 1 +; RV32IB-NEXT: sll a2, a3, a2 +; RV32IB-NEXT: srai a3, a2, 31 +; RV32IB-NEXT: xor a0, a2, a0 +; RV32IB-NEXT: xor a1, a3, a1 +; RV32IB-NEXT: ret +; +; RV32IBS-LABEL: sbinv_i64: +; RV32IBS: # %bb.0: +; RV32IBS-NEXT: addi a3, zero, 1 +; RV32IBS-NEXT: sll a2, a3, a2 +; RV32IBS-NEXT: srai a3, a2, 31 +; RV32IBS-NEXT: xor a0, a2, a0 +; RV32IBS-NEXT: xor a1, a3, a1 +; RV32IBS-NEXT: ret + %1 = trunc i64 %b to i32 + %conv = and i32 %1, 63 + %shl = shl nuw i32 1, %conv + %conv1 = sext i32 %shl to i64 + %xor = xor i64 %conv1, %a + ret i64 %xor +} + +define i32 @sbext_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: sbext_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srl a0, a0, a1 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sbext_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sbext a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBS-LABEL: sbext_i32: +; RV32IBS: # %bb.0: +; RV32IBS-NEXT: sbext a0, a0, a1 +; RV32IBS-NEXT: ret + %and = and i32 %b, 31 + %shr = lshr i32 %a, %and + %and1 = and i32 %shr, 1 + ret i32 %and1 +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +define i64 @sbext_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: sbext_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a3, a2, 63 +; RV32I-NEXT: addi a4, a3, -32 +; RV32I-NEXT: bltz a4, .LBB7_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srl a0, a1, a4 +; RV32I-NEXT: j .LBB7_3 +; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: srl a0, a0, a2 +; RV32I-NEXT: addi a2, zero, 31 +; RV32I-NEXT: sub a2, a2, a3 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: .LBB7_3: +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sbext_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: andi a3, a2, 63 +; RV32IB-NEXT: addi a4, a3, -32 +; RV32IB-NEXT: bltz a4, .LBB7_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: srl a0, a1, a4 +; RV32IB-NEXT: j .LBB7_3 +; RV32IB-NEXT: .LBB7_2: +; RV32IB-NEXT: srl a0, a0, a2 +; RV32IB-NEXT: addi a2, zero, 31 +; RV32IB-NEXT: sub a2, a2, a3 +; RV32IB-NEXT: slli a1, a1, 1 +; RV32IB-NEXT: sll a1, a1, a2 +; RV32IB-NEXT: or a0, a0, a1 +; RV32IB-NEXT: .LBB7_3: +; RV32IB-NEXT: andi a0, a0, 1 +; RV32IB-NEXT: mv a1, zero +; RV32IB-NEXT: ret +; +; RV32IBS-LABEL: sbext_i64: +; RV32IBS: # %bb.0: +; RV32IBS-NEXT: andi a3, a2, 63 +; RV32IBS-NEXT: addi a4, a3, -32 +; RV32IBS-NEXT: bltz a4, .LBB7_2 +; RV32IBS-NEXT: # %bb.1: +; RV32IBS-NEXT: srl a0, a1, a4 +; RV32IBS-NEXT: j .LBB7_3 +; RV32IBS-NEXT: .LBB7_2: +; RV32IBS-NEXT: srl a0, a0, a2 +; RV32IBS-NEXT: addi a2, zero, 31 +; RV32IBS-NEXT: sub a2, a2, a3 +; RV32IBS-NEXT: slli a1, a1, 1 +; RV32IBS-NEXT: sll a1, a1, a2 +; RV32IBS-NEXT: or a0, a0, a1 +; RV32IBS-NEXT: .LBB7_3: +; RV32IBS-NEXT: andi a0, a0, 1 +; RV32IBS-NEXT: mv a1, zero +; RV32IBS-NEXT: ret + %conv = and i64 %b, 63 + %shr = lshr i64 %a, %conv + %and1 = and i64 %shr, 1 + ret i64 %and1 +} + +define i32 @sbexti_i32(i32 %a) nounwind { +; RV32I-LABEL: sbexti_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a0, a0, 5 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sbexti_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sbexti a0, a0, 5 +; RV32IB-NEXT: ret +; +; RV32IBS-LABEL: sbexti_i32: +; RV32IBS: # %bb.0: +; RV32IBS-NEXT: sbexti a0, a0, 5 +; RV32IBS-NEXT: ret + %shr = lshr i32 %a, 5 + %and = and i32 %shr, 1 + ret i32 %and +} + +define i64 @sbexti_i64(i64 %a) nounwind { +; RV32I-LABEL: sbexti_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a0, a0, 5 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sbexti_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sbexti a0, a0, 5 +; RV32IB-NEXT: mv a1, zero +; RV32IB-NEXT: ret +; +; RV32IBS-LABEL: sbexti_i64: +; RV32IBS: # %bb.0: +; RV32IBS-NEXT: sbexti a0, a0, 5 +; RV32IBS-NEXT: mv a1, zero +; RV32IBS-NEXT: ret + %shr = lshr i64 %a, 5 + %and = and i64 %shr, 1 + ret i64 %and +} diff --git a/llvm/test/CodeGen/RISCV/rv32Zbt.ll b/llvm/test/CodeGen/RISCV/rv32Zbt.ll new file mode 100644 index 0000000000000..54b5b79778f42 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32Zbt.ll @@ -0,0 +1,569 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IB +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IBT + +define i32 @cmix_i32(i32 %a, i32 %b, i32 %c) nounwind { +; RV32I-LABEL: cmix_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: cmix_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: cmix a0, a1, a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: cmix_i32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: cmix a0, a1, a0, a2 +; RV32IBT-NEXT: ret + %and = and i32 %b, %a + %neg = xor i32 %b, -1 + %and1 = and i32 %neg, %c + %or = or i32 %and1, %and + ret i32 %or +} + +define i64 @cmix_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV32I-LABEL: cmix_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: and a1, a3, a1 +; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: not a2, a2 +; RV32I-NEXT: not a3, a3 +; RV32I-NEXT: and a3, a3, a5 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: cmix_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: cmix a0, a2, a0, a4 +; RV32IB-NEXT: cmix a1, a3, a1, a5 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: cmix_i64: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: cmix a0, a2, a0, a4 +; RV32IBT-NEXT: cmix a1, a3, a1, a5 +; RV32IBT-NEXT: ret + %and = and i64 %b, %a + %neg = xor i64 %b, -1 + %and1 = and i64 %neg, %c + %or = or i64 %and1, %and + ret i64 %or +} + +define i32 @cmov_i32(i32 %a, i32 %b, i32 %c) nounwind { +; RV32I-LABEL: cmov_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: beqz a1, .LBB2_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: cmov_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: cmov a0, a1, a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: cmov_i32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: cmov a0, a1, a0, a2 +; RV32IBT-NEXT: ret + %tobool.not = icmp eq i32 %b, 0 + %cond = select i1 %tobool.not, i32 %c, i32 %a + ret i32 %cond +} + +define i64 @cmov_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV32I-LABEL: cmov_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: beqz a2, .LBB3_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a4, a0 +; RV32I-NEXT: mv a5, a1 +; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: cmov_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: or a2, a2, a3 +; RV32IB-NEXT: cmov a0, a2, a0, a4 +; RV32IB-NEXT: cmov a1, a2, a1, a5 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: cmov_i64: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: or a2, a2, a3 +; RV32IBT-NEXT: cmov a0, a2, a0, a4 +; RV32IBT-NEXT: cmov a1, a2, a1, a5 +; RV32IBT-NEXT: ret + %tobool.not = icmp eq i64 %b, 0 + %cond = select i1 %tobool.not, i64 %c, i64 %a + ret i64 %cond +} + +declare i32 @llvm.fshl.i32(i32, i32, i32) + +define i32 @fshl_i32(i32 %a, i32 %b, i32 %c) nounwind { +; RV32I-LABEL: fshl_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a3, a2, 31 +; RV32I-NEXT: beqz a3, .LBB4_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: addi a2, zero, 32 +; RV32I-NEXT: sub a2, a2, a3 +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: fshl_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: fsl a0, a0, a2, a1 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: fshl_i32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: fsl a0, a0, a2, a1 +; RV32IBT-NEXT: ret + %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet an efficient pattern-matching with bit manipulation +; instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions that can match more efficiently this pattern. + +declare i64 @llvm.fshl.i64(i64, i64, i64) + +define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV32I-LABEL: fshl_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: andi t1, a4, 63 +; RV32I-NEXT: addi a6, t1, -32 +; RV32I-NEXT: addi a7, zero, 31 +; RV32I-NEXT: bltz a6, .LBB5_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sll t0, a0, a6 +; RV32I-NEXT: j .LBB5_3 +; RV32I-NEXT: .LBB5_2: +; RV32I-NEXT: sll t0, a1, a4 +; RV32I-NEXT: sub t2, a7, t1 +; RV32I-NEXT: srli a5, a0, 1 +; RV32I-NEXT: srl a5, a5, t2 +; RV32I-NEXT: or t0, t0, a5 +; RV32I-NEXT: .LBB5_3: +; RV32I-NEXT: addi a5, zero, 32 +; RV32I-NEXT: sub t4, a5, t1 +; RV32I-NEXT: addi a5, zero, 64 +; RV32I-NEXT: sub t2, a5, t1 +; RV32I-NEXT: bltz t4, .LBB5_5 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: mv t3, zero +; RV32I-NEXT: bnez t1, .LBB5_6 +; RV32I-NEXT: j .LBB5_7 +; RV32I-NEXT: .LBB5_5: +; RV32I-NEXT: srl t3, a3, t2 +; RV32I-NEXT: beqz t1, .LBB5_7 +; RV32I-NEXT: .LBB5_6: +; RV32I-NEXT: or a1, t0, t3 +; RV32I-NEXT: .LBB5_7: +; RV32I-NEXT: bltz t4, .LBB5_10 +; RV32I-NEXT: # %bb.8: +; RV32I-NEXT: srl a2, a3, t4 +; RV32I-NEXT: bgez a6, .LBB5_11 +; RV32I-NEXT: .LBB5_9: +; RV32I-NEXT: sll a3, a0, a4 +; RV32I-NEXT: bnez t1, .LBB5_12 +; RV32I-NEXT: j .LBB5_13 +; RV32I-NEXT: .LBB5_10: +; RV32I-NEXT: srl a2, a2, t2 +; RV32I-NEXT: sub a5, a7, t2 +; RV32I-NEXT: slli a3, a3, 1 +; RV32I-NEXT: sll a3, a3, a5 +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: bltz a6, .LBB5_9 +; RV32I-NEXT: .LBB5_11: +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: beqz t1, .LBB5_13 +; RV32I-NEXT: .LBB5_12: +; RV32I-NEXT: or a0, a3, a2 +; RV32I-NEXT: .LBB5_13: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: fshl_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: andi t1, a4, 63 +; RV32IB-NEXT: addi a6, t1, -32 +; RV32IB-NEXT: addi a7, zero, 31 +; RV32IB-NEXT: bltz a6, .LBB5_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: sll t0, a0, a6 +; RV32IB-NEXT: j .LBB5_3 +; RV32IB-NEXT: .LBB5_2: +; RV32IB-NEXT: sll t0, a1, a4 +; RV32IB-NEXT: sub t2, a7, t1 +; RV32IB-NEXT: srli a5, a0, 1 +; RV32IB-NEXT: srl a5, a5, t2 +; RV32IB-NEXT: or t0, t0, a5 +; RV32IB-NEXT: .LBB5_3: +; RV32IB-NEXT: addi a5, zero, 32 +; RV32IB-NEXT: sub t4, a5, t1 +; RV32IB-NEXT: addi a5, zero, 64 +; RV32IB-NEXT: sub t2, a5, t1 +; RV32IB-NEXT: bltz t4, .LBB5_7 +; RV32IB-NEXT: # %bb.4: +; RV32IB-NEXT: mv t3, zero +; RV32IB-NEXT: or t0, t0, t3 +; RV32IB-NEXT: bgez t4, .LBB5_8 +; RV32IB-NEXT: .LBB5_5: +; RV32IB-NEXT: srl a2, a2, t2 +; RV32IB-NEXT: sub a5, a7, t2 +; RV32IB-NEXT: slli a3, a3, 1 +; RV32IB-NEXT: sll a3, a3, a5 +; RV32IB-NEXT: or a2, a2, a3 +; RV32IB-NEXT: cmov a1, t1, t0, a1 +; RV32IB-NEXT: bgez a6, .LBB5_9 +; RV32IB-NEXT: .LBB5_6: +; RV32IB-NEXT: sll a3, a0, a4 +; RV32IB-NEXT: j .LBB5_10 +; RV32IB-NEXT: .LBB5_7: +; RV32IB-NEXT: srl t3, a3, t2 +; RV32IB-NEXT: or t0, t0, t3 +; RV32IB-NEXT: bltz t4, .LBB5_5 +; RV32IB-NEXT: .LBB5_8: +; RV32IB-NEXT: srl a2, a3, t4 +; RV32IB-NEXT: cmov a1, t1, t0, a1 +; RV32IB-NEXT: bltz a6, .LBB5_6 +; RV32IB-NEXT: .LBB5_9: +; RV32IB-NEXT: mv a3, zero +; RV32IB-NEXT: .LBB5_10: +; RV32IB-NEXT: or a2, a3, a2 +; RV32IB-NEXT: cmov a0, t1, a2, a0 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: fshl_i64: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: andi t1, a4, 63 +; RV32IBT-NEXT: addi a6, t1, -32 +; RV32IBT-NEXT: addi a7, zero, 31 +; RV32IBT-NEXT: bltz a6, .LBB5_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT: sll t0, a0, a6 +; RV32IBT-NEXT: j .LBB5_3 +; RV32IBT-NEXT: .LBB5_2: +; RV32IBT-NEXT: sll t0, a1, a4 +; RV32IBT-NEXT: sub t2, a7, t1 +; RV32IBT-NEXT: srli a5, a0, 1 +; RV32IBT-NEXT: srl a5, a5, t2 +; RV32IBT-NEXT: or t0, t0, a5 +; RV32IBT-NEXT: .LBB5_3: +; RV32IBT-NEXT: addi a5, zero, 32 +; RV32IBT-NEXT: sub t4, a5, t1 +; RV32IBT-NEXT: addi a5, zero, 64 +; RV32IBT-NEXT: sub t2, a5, t1 +; RV32IBT-NEXT: bltz t4, .LBB5_7 +; RV32IBT-NEXT: # %bb.4: +; RV32IBT-NEXT: mv t3, zero +; RV32IBT-NEXT: or t0, t0, t3 +; RV32IBT-NEXT: bgez t4, .LBB5_8 +; RV32IBT-NEXT: .LBB5_5: +; RV32IBT-NEXT: srl a2, a2, t2 +; RV32IBT-NEXT: sub a5, a7, t2 +; RV32IBT-NEXT: slli a3, a3, 1 +; RV32IBT-NEXT: sll a3, a3, a5 +; RV32IBT-NEXT: or a2, a2, a3 +; RV32IBT-NEXT: cmov a1, t1, t0, a1 +; RV32IBT-NEXT: bgez a6, .LBB5_9 +; RV32IBT-NEXT: .LBB5_6: +; RV32IBT-NEXT: sll a3, a0, a4 +; RV32IBT-NEXT: j .LBB5_10 +; RV32IBT-NEXT: .LBB5_7: +; RV32IBT-NEXT: srl t3, a3, t2 +; RV32IBT-NEXT: or t0, t0, t3 +; RV32IBT-NEXT: bltz t4, .LBB5_5 +; RV32IBT-NEXT: .LBB5_8: +; RV32IBT-NEXT: srl a2, a3, t4 +; RV32IBT-NEXT: cmov a1, t1, t0, a1 +; RV32IBT-NEXT: bltz a6, .LBB5_6 +; RV32IBT-NEXT: .LBB5_9: +; RV32IBT-NEXT: mv a3, zero +; RV32IBT-NEXT: .LBB5_10: +; RV32IBT-NEXT: or a2, a3, a2 +; RV32IBT-NEXT: cmov a0, t1, a2, a0 +; RV32IBT-NEXT: ret + %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) + ret i64 %1 +} + +declare i32 @llvm.fshr.i32(i32, i32, i32) + +define i32 @fshr_i32(i32 %a, i32 %b, i32 %c) nounwind { +; RV32I-LABEL: fshr_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a3, a2, 31 +; RV32I-NEXT: beqz a3, .LBB6_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: addi a2, zero, 32 +; RV32I-NEXT: sub a2, a2, a3 +; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: .LBB6_2: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: fshr_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: fsr a0, a0, a2, a1 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: fshr_i32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: fsr a0, a0, a2, a1 +; RV32IBT-NEXT: ret + %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet an efficient pattern-matching with bit manipulation +; instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions that can match more efficiently this pattern. + +declare i64 @llvm.fshr.i64(i64, i64, i64) + +define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV32I-LABEL: fshr_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: mv t1, a3 +; RV32I-NEXT: mv a6, a2 +; RV32I-NEXT: andi a5, a4, 63 +; RV32I-NEXT: addi t2, a5, -32 +; RV32I-NEXT: addi a7, zero, 31 +; RV32I-NEXT: bltz t2, .LBB7_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srl t0, t1, t2 +; RV32I-NEXT: j .LBB7_3 +; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: srl t0, a6, a4 +; RV32I-NEXT: sub a3, a7, a5 +; RV32I-NEXT: slli a2, t1, 1 +; RV32I-NEXT: sll a2, a2, a3 +; RV32I-NEXT: or t0, t0, a2 +; RV32I-NEXT: .LBB7_3: +; RV32I-NEXT: addi a2, zero, 32 +; RV32I-NEXT: sub a3, a2, a5 +; RV32I-NEXT: addi a2, zero, 64 +; RV32I-NEXT: sub a2, a2, a5 +; RV32I-NEXT: bltz a3, .LBB7_5 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: mv t3, zero +; RV32I-NEXT: bnez a5, .LBB7_6 +; RV32I-NEXT: j .LBB7_7 +; RV32I-NEXT: .LBB7_5: +; RV32I-NEXT: sll t3, a0, a2 +; RV32I-NEXT: beqz a5, .LBB7_7 +; RV32I-NEXT: .LBB7_6: +; RV32I-NEXT: or a6, t3, t0 +; RV32I-NEXT: .LBB7_7: +; RV32I-NEXT: bltz a3, .LBB7_10 +; RV32I-NEXT: # %bb.8: +; RV32I-NEXT: sll a0, a0, a3 +; RV32I-NEXT: bgez t2, .LBB7_11 +; RV32I-NEXT: .LBB7_9: +; RV32I-NEXT: srl a1, t1, a4 +; RV32I-NEXT: bnez a5, .LBB7_12 +; RV32I-NEXT: j .LBB7_13 +; RV32I-NEXT: .LBB7_10: +; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: sub a2, a7, a2 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: srl a0, a0, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: bltz t2, .LBB7_9 +; RV32I-NEXT: .LBB7_11: +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: beqz a5, .LBB7_13 +; RV32I-NEXT: .LBB7_12: +; RV32I-NEXT: or t1, a0, a1 +; RV32I-NEXT: .LBB7_13: +; RV32I-NEXT: mv a0, a6 +; RV32I-NEXT: mv a1, t1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: fshr_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: andi t1, a4, 63 +; RV32IB-NEXT: addi a6, t1, -32 +; RV32IB-NEXT: addi a7, zero, 31 +; RV32IB-NEXT: bltz a6, .LBB7_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: srl t0, a3, a6 +; RV32IB-NEXT: j .LBB7_3 +; RV32IB-NEXT: .LBB7_2: +; RV32IB-NEXT: srl t0, a2, a4 +; RV32IB-NEXT: sub t2, a7, t1 +; RV32IB-NEXT: slli a5, a3, 1 +; RV32IB-NEXT: sll a5, a5, t2 +; RV32IB-NEXT: or t0, t0, a5 +; RV32IB-NEXT: .LBB7_3: +; RV32IB-NEXT: addi a5, zero, 32 +; RV32IB-NEXT: sub t4, a5, t1 +; RV32IB-NEXT: addi a5, zero, 64 +; RV32IB-NEXT: sub t2, a5, t1 +; RV32IB-NEXT: bltz t4, .LBB7_7 +; RV32IB-NEXT: # %bb.4: +; RV32IB-NEXT: mv t3, zero +; RV32IB-NEXT: or t0, t3, t0 +; RV32IB-NEXT: bgez t4, .LBB7_8 +; RV32IB-NEXT: .LBB7_5: +; RV32IB-NEXT: sll a1, a1, t2 +; RV32IB-NEXT: sub a5, a7, t2 +; RV32IB-NEXT: srli a0, a0, 1 +; RV32IB-NEXT: srl a0, a0, a5 +; RV32IB-NEXT: or a1, a1, a0 +; RV32IB-NEXT: cmov a0, t1, t0, a2 +; RV32IB-NEXT: bgez a6, .LBB7_9 +; RV32IB-NEXT: .LBB7_6: +; RV32IB-NEXT: srl a2, a3, a4 +; RV32IB-NEXT: j .LBB7_10 +; RV32IB-NEXT: .LBB7_7: +; RV32IB-NEXT: sll t3, a0, t2 +; RV32IB-NEXT: or t0, t3, t0 +; RV32IB-NEXT: bltz t4, .LBB7_5 +; RV32IB-NEXT: .LBB7_8: +; RV32IB-NEXT: sll a1, a0, t4 +; RV32IB-NEXT: cmov a0, t1, t0, a2 +; RV32IB-NEXT: bltz a6, .LBB7_6 +; RV32IB-NEXT: .LBB7_9: +; RV32IB-NEXT: mv a2, zero +; RV32IB-NEXT: .LBB7_10: +; RV32IB-NEXT: or a1, a1, a2 +; RV32IB-NEXT: cmov a1, t1, a1, a3 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: fshr_i64: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: andi t1, a4, 63 +; RV32IBT-NEXT: addi a6, t1, -32 +; RV32IBT-NEXT: addi a7, zero, 31 +; RV32IBT-NEXT: bltz a6, .LBB7_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT: srl t0, a3, a6 +; RV32IBT-NEXT: j .LBB7_3 +; RV32IBT-NEXT: .LBB7_2: +; RV32IBT-NEXT: srl t0, a2, a4 +; RV32IBT-NEXT: sub t2, a7, t1 +; RV32IBT-NEXT: slli a5, a3, 1 +; RV32IBT-NEXT: sll a5, a5, t2 +; RV32IBT-NEXT: or t0, t0, a5 +; RV32IBT-NEXT: .LBB7_3: +; RV32IBT-NEXT: addi a5, zero, 32 +; RV32IBT-NEXT: sub t4, a5, t1 +; RV32IBT-NEXT: addi a5, zero, 64 +; RV32IBT-NEXT: sub t2, a5, t1 +; RV32IBT-NEXT: bltz t4, .LBB7_7 +; RV32IBT-NEXT: # %bb.4: +; RV32IBT-NEXT: mv t3, zero +; RV32IBT-NEXT: or t0, t3, t0 +; RV32IBT-NEXT: bgez t4, .LBB7_8 +; RV32IBT-NEXT: .LBB7_5: +; RV32IBT-NEXT: sll a1, a1, t2 +; RV32IBT-NEXT: sub a5, a7, t2 +; RV32IBT-NEXT: srli a0, a0, 1 +; RV32IBT-NEXT: srl a0, a0, a5 +; RV32IBT-NEXT: or a1, a1, a0 +; RV32IBT-NEXT: cmov a0, t1, t0, a2 +; RV32IBT-NEXT: bgez a6, .LBB7_9 +; RV32IBT-NEXT: .LBB7_6: +; RV32IBT-NEXT: srl a2, a3, a4 +; RV32IBT-NEXT: j .LBB7_10 +; RV32IBT-NEXT: .LBB7_7: +; RV32IBT-NEXT: sll t3, a0, t2 +; RV32IBT-NEXT: or t0, t3, t0 +; RV32IBT-NEXT: bltz t4, .LBB7_5 +; RV32IBT-NEXT: .LBB7_8: +; RV32IBT-NEXT: sll a1, a0, t4 +; RV32IBT-NEXT: cmov a0, t1, t0, a2 +; RV32IBT-NEXT: bltz a6, .LBB7_6 +; RV32IBT-NEXT: .LBB7_9: +; RV32IBT-NEXT: mv a2, zero +; RV32IBT-NEXT: .LBB7_10: +; RV32IBT-NEXT: or a1, a1, a2 +; RV32IBT-NEXT: cmov a1, t1, a1, a3 +; RV32IBT-NEXT: ret + %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) + ret i64 %1 +} + +define i32 @fshri_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: fshri_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a1, a1, 5 +; RV32I-NEXT: slli a0, a0, 27 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: fshri_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: fsri a0, a0, a1, 5 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: fshri_i32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: fsri a0, a0, a1, 5 +; RV32IBT-NEXT: ret + %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 5) + ret i32 %1 +} + +define i64 @fshri_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: fshri_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a3, 27 +; RV32I-NEXT: srli a2, a2, 5 +; RV32I-NEXT: or a2, a2, a1 +; RV32I-NEXT: srli a1, a3, 5 +; RV32I-NEXT: slli a0, a0, 27 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: fshri_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: addi a1, zero, 27 +; RV32IB-NEXT: fsl a2, a3, a1, a2 +; RV32IB-NEXT: fsl a1, a0, a1, a3 +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: fshri_i64: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: addi a1, zero, 27 +; RV32IBT-NEXT: fsl a2, a3, a1, a2 +; RV32IBT-NEXT: fsl a1, a0, a1, a3 +; RV32IBT-NEXT: mv a0, a2 +; RV32IBT-NEXT: ret + %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 5) + ret i64 %1 +} diff --git a/llvm/test/CodeGen/RISCV/rv64Zbb.ll b/llvm/test/CodeGen/RISCV/rv64Zbb.ll new file mode 100644 index 0000000000000..2e4b69e4997b3 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64Zbb.ll @@ -0,0 +1,1149 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IB +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IBB + +define signext i32 @slo_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: slo_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: sllw a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: slo_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: slow a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: slo_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: slow a0, a0, a1 +; RV64IBB-NEXT: ret + %neg = xor i32 %a, -1 + %shl = shl i32 %neg, %b + %neg1 = xor i32 %shl, -1 + ret i32 %neg1 +} + +define i64 @slo_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: slo_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: sll a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: slo_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: slo a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: slo_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: slo a0, a0, a1 +; RV64IBB-NEXT: ret + %neg = xor i64 %a, -1 + %shl = shl i64 %neg, %b + %neg1 = xor i64 %shl, -1 + ret i64 %neg1 +} + +define signext i32 @sro_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: sro_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: srlw a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sro_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: srow a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sro_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: srow a0, a0, a1 +; RV64IBB-NEXT: ret + %neg = xor i32 %a, -1 + %shr = lshr i32 %neg, %b + %neg1 = xor i32 %shr, -1 + ret i32 %neg1 +} + +define i64 @sro_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: sro_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: srl a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sro_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sro a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sro_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sro a0, a0, a1 +; RV64IBB-NEXT: ret + %neg = xor i64 %a, -1 + %shr = lshr i64 %neg, %b + %neg1 = xor i64 %shr, -1 + ret i64 %neg1 +} + +define signext i32 @sloi_i32(i32 signext %a) nounwind { +; RV64I-LABEL: sloi_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: ori a0, a0, 1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sloi_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sloiw a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sloi_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sloiw a0, a0, 1 +; RV64IBB-NEXT: ret + %neg = shl i32 %a, 1 + %neg12 = or i32 %neg, 1 + ret i32 %neg12 +} + +define i64 @sloi_i64(i64 %a) nounwind { +; RV64I-LABEL: sloi_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: ori a0, a0, 1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sloi_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sloi a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sloi_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sloi a0, a0, 1 +; RV64IBB-NEXT: ret + %neg = shl i64 %a, 1 + %neg12 = or i64 %neg, 1 + ret i64 %neg12 +} + +define signext i32 @sroi_i32(i32 signext %a) nounwind { +; RV64I-LABEL: sroi_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sroi_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sroiw a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sroi_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sroiw a0, a0, 1 +; RV64IBB-NEXT: ret + %neg = lshr i32 %a, 1 + %neg12 = or i32 %neg, -2147483648 + ret i32 %neg12 +} + +define i64 @sroi_i64(i64 %a) nounwind { +; RV64I-LABEL: sroi_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: addi a1, zero, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sroi_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sroi a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sroi_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sroi a0, a0, 1 +; RV64IBB-NEXT: ret + %neg = lshr i64 %a, 1 + %neg12 = or i64 %neg, -9223372036854775808 + ret i64 %neg12 +} + +declare i32 @llvm.ctlz.i32(i32, i1) + +define signext i32 @ctlz_i32(i32 signext %a) nounwind { +; RV64I-LABEL: ctlz_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: beqz a0, .LBB8_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 21845 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 13107 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: addi a0, a0, -32 +; RV64I-NEXT: j .LBB8_3 +; RV64I-NEXT: .LBB8_2: +; RV64I-NEXT: addi a0, zero, 32 +; RV64I-NEXT: .LBB8_3: # %cond.end +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: ctlz_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: beqz a0, .LBB8_2 +; RV64IB-NEXT: # %bb.1: # %cond.false +; RV64IB-NEXT: clzw a0, a0 +; RV64IB-NEXT: ret +; RV64IB-NEXT: .LBB8_2: +; RV64IB-NEXT: addi a0, zero, 32 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: ctlz_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: beqz a0, .LBB8_2 +; RV64IBB-NEXT: # %bb.1: # %cond.false +; RV64IBB-NEXT: clzw a0, a0 +; RV64IBB-NEXT: ret +; RV64IBB-NEXT: .LBB8_2: +; RV64IBB-NEXT: addi a0, zero, 32 +; RV64IBB-NEXT: ret + %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false) + ret i32 %1 +} + +declare i64 @llvm.ctlz.i64(i64, i1) + +define i64 @ctlz_i64(i64 %a) nounwind { +; RV64I-LABEL: ctlz_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: beqz a0, .LBB9_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 21845 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 13107 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: j .LBB9_3 +; RV64I-NEXT: .LBB9_2: +; RV64I-NEXT: addi a0, zero, 64 +; RV64I-NEXT: .LBB9_3: # %cond.end +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: ctlz_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: beqz a0, .LBB9_2 +; RV64IB-NEXT: # %bb.1: # %cond.false +; RV64IB-NEXT: clz a0, a0 +; RV64IB-NEXT: ret +; RV64IB-NEXT: .LBB9_2: +; RV64IB-NEXT: addi a0, zero, 64 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: ctlz_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: beqz a0, .LBB9_2 +; RV64IBB-NEXT: # %bb.1: # %cond.false +; RV64IBB-NEXT: clz a0, a0 +; RV64IBB-NEXT: ret +; RV64IBB-NEXT: .LBB9_2: +; RV64IBB-NEXT: addi a0, zero, 64 +; RV64IBB-NEXT: ret + %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false) + ret i64 %1 +} + +declare i32 @llvm.cttz.i32(i32, i1) + +define signext i32 @cttz_i32(i32 signext %a) nounwind { +; RV64I-LABEL: cttz_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: beqz a0, .LBB10_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: addi a1, a0, -1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 21845 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 13107 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: j .LBB10_3 +; RV64I-NEXT: .LBB10_2: +; RV64I-NEXT: addi a0, zero, 32 +; RV64I-NEXT: .LBB10_3: # %cond.end +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: cttz_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: beqz a0, .LBB10_2 +; RV64IB-NEXT: # %bb.1: # %cond.false +; RV64IB-NEXT: ctz a0, a0 +; RV64IB-NEXT: ret +; RV64IB-NEXT: .LBB10_2: +; RV64IB-NEXT: addi a0, zero, 32 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: cttz_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: beqz a0, .LBB10_2 +; RV64IBB-NEXT: # %bb.1: # %cond.false +; RV64IBB-NEXT: ctz a0, a0 +; RV64IBB-NEXT: ret +; RV64IBB-NEXT: .LBB10_2: +; RV64IBB-NEXT: addi a0, zero, 32 +; RV64IBB-NEXT: ret + %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false) + ret i32 %1 +} + +declare i64 @llvm.cttz.i64(i64, i1) + +define i64 @cttz_i64(i64 %a) nounwind { +; RV64I-LABEL: cttz_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: beqz a0, .LBB11_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: addi a1, a0, -1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 21845 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 13107 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: j .LBB11_3 +; RV64I-NEXT: .LBB11_2: +; RV64I-NEXT: addi a0, zero, 64 +; RV64I-NEXT: .LBB11_3: # %cond.end +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: cttz_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: beqz a0, .LBB11_2 +; RV64IB-NEXT: # %bb.1: # %cond.false +; RV64IB-NEXT: ctz a0, a0 +; RV64IB-NEXT: ret +; RV64IB-NEXT: .LBB11_2: +; RV64IB-NEXT: addi a0, zero, 64 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: cttz_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: beqz a0, .LBB11_2 +; RV64IBB-NEXT: # %bb.1: # %cond.false +; RV64IBB-NEXT: ctz a0, a0 +; RV64IBB-NEXT: ret +; RV64IBB-NEXT: .LBB11_2: +; RV64IBB-NEXT: addi a0, zero, 64 +; RV64IBB-NEXT: ret + %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false) + ret i64 %1 +} + +declare i32 @llvm.ctpop.i32(i32) + +define signext i32 @ctpop_i32(i32 signext %a) nounwind { +; RV64I-LABEL: ctpop_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: srliw a0, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: sub a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: lui a2, 13107 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: ctpop_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: pcntw a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: ctpop_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: pcntw a0, a0 +; RV64IBB-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + ret i32 %1 +} + +declare i64 @llvm.ctpop.i64(i64) + +define i64 @ctpop_i64(i64 %a) nounwind { +; RV64I-LABEL: ctpop_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 21845 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 13107 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: ctpop_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: pcnt a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: ctpop_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: pcnt a0, a0 +; RV64IBB-NEXT: ret + %1 = call i64 @llvm.ctpop.i64(i64 %a) + ret i64 %1 +} + +define signext i32 @sextb_i32(i32 signext %a) nounwind { +; RV64I-LABEL: sextb_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sextb_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sext.b a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sextb_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sext.b a0, a0 +; RV64IBB-NEXT: ret + %shl = shl i32 %a, 24 + %shr = ashr exact i32 %shl, 24 + ret i32 %shr +} + +define i64 @sextb_i64(i64 %a) nounwind { +; RV64I-LABEL: sextb_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sextb_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sext.b a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sextb_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sext.b a0, a0 +; RV64IBB-NEXT: ret + %shl = shl i64 %a, 56 + %shr = ashr exact i64 %shl, 56 + ret i64 %shr +} + +define signext i32 @sexth_i32(i32 signext %a) nounwind { +; RV64I-LABEL: sexth_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sexth_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sext.h a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sexth_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sext.h a0, a0 +; RV64IBB-NEXT: ret + %shl = shl i32 %a, 16 + %shr = ashr exact i32 %shl, 16 + ret i32 %shr +} + +define i64 @sexth_i64(i64 %a) nounwind { +; RV64I-LABEL: sexth_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sexth_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sext.h a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sexth_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sext.h a0, a0 +; RV64IBB-NEXT: ret + %shl = shl i64 %a, 48 + %shr = ashr exact i64 %shl, 48 + ret i64 %shr +} + +define signext i32 @min_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: min_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: blt a0, a1, .LBB18_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB18_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: min_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: min a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: min_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: min a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp slt i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +define i64 @min_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: min_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: blt a0, a1, .LBB19_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB19_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: min_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: min a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: min_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: min a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp slt i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} + +define signext i32 @max_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: max_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: blt a1, a0, .LBB20_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB20_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: max_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: max a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: max_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: max a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp sgt i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +define i64 @max_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: max_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: blt a1, a0, .LBB21_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB21_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: max_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: max a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: max_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: max a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp sgt i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} + +define signext i32 @minu_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: minu_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: bltu a0, a1, .LBB22_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB22_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: minu_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: minu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: minu_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: minu a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp ult i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +define i64 @minu_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: minu_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: bltu a0, a1, .LBB23_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB23_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: minu_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: minu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: minu_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: minu a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp ult i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} + +define signext i32 @maxu_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: maxu_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: bltu a1, a0, .LBB24_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB24_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: maxu_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: maxu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: maxu_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: maxu a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp ugt i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +define i64 @maxu_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: maxu_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: bltu a1, a0, .LBB25_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB25_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: maxu_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: maxu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: maxu_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: maxu a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp ugt i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} + +; We select a i32 addi that zero-extends the result on RV64 as addiwu + +define zeroext i32 @zext_add_to_addiwu(i32 signext %a) nounwind { +; RV64I-LABEL: zext_add_to_addiwu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a0, a0, 1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: zext_add_to_addiwu: +; RV64IB: # %bb.0: +; RV64IB-NEXT: addiwu a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: zext_add_to_addiwu: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: addiwu a0, a0, 1 +; RV64IBB-NEXT: ret + %add = add i32 %a, 1 + ret i32 %add +} + +define i64 @addiwu(i64 %a) nounwind { +; RV64I-LABEL: addiwu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a0, a0, 1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: addiwu: +; RV64IB: # %bb.0: +; RV64IB-NEXT: addiwu a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: addiwu: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: addiwu a0, a0, 1 +; RV64IBB-NEXT: ret + %conv = add i64 %a, 1 + %conv1 = and i64 %conv, 4294967295 + ret i64 %conv1 +} + +define i64 @slliuw(i64 %a) nounwind { +; RV64I-LABEL: slliuw: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: addi a1, zero, 1 +; RV64I-NEXT: slli a1, a1, 33 +; RV64I-NEXT: addi a1, a1, -2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: slliuw: +; RV64IB: # %bb.0: +; RV64IB-NEXT: slliu.w a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: slliuw: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: slliu.w a0, a0, 1 +; RV64IBB-NEXT: ret + %conv1 = shl i64 %a, 1 + %shl = and i64 %conv1, 8589934590 + ret i64 %shl +} + +; We select a i32 add that zero-extends the result on RV64 as addwu + +define zeroext i32 @zext_add_to_addwu(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: zext_add_to_addwu: +; RV64I: # %bb.0: +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: zext_add_to_addwu: +; RV64IB: # %bb.0: +; RV64IB-NEXT: addwu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: zext_add_to_addwu: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: addwu a0, a0, a1 +; RV64IBB-NEXT: ret + %add = add i32 %a, %b + ret i32 %add +} + +define i64 @addwu(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: addwu: +; RV64I: # %bb.0: +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: addwu: +; RV64IB: # %bb.0: +; RV64IB-NEXT: addwu a0, a1, a0 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: addwu: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: addwu a0, a1, a0 +; RV64IBB-NEXT: ret + %add = add i64 %b, %a + %conv1 = and i64 %add, 4294967295 + ret i64 %conv1 +} + +; We select a i32 sub that zero-extends the result on RV64 as subwu + +define zeroext i32 @zext_sub_to_subwu(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: zext_sub_to_subwu: +; RV64I: # %bb.0: +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: zext_sub_to_subwu: +; RV64IB: # %bb.0: +; RV64IB-NEXT: subwu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: zext_sub_to_subwu: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: subwu a0, a0, a1 +; RV64IBB-NEXT: ret + %sub = sub i32 %a, %b + ret i32 %sub +} + +define i64 @subwu(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: subwu: +; RV64I: # %bb.0: +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: subwu: +; RV64IB: # %bb.0: +; RV64IB-NEXT: subwu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: subwu: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: subwu a0, a0, a1 +; RV64IBB-NEXT: ret + %sub = sub i64 %a, %b + %conv1 = and i64 %sub, 4294967295 + ret i64 %conv1 +} + +define i64 @adduw(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: adduw: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: adduw: +; RV64IB: # %bb.0: +; RV64IB-NEXT: addu.w a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: adduw: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: addu.w a0, a0, a1 +; RV64IBB-NEXT: ret + %and = and i64 %b, 4294967295 + %add = add i64 %and, %a + ret i64 %add +} + +define i64 @subuw(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: subuw: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: subuw: +; RV64IB: # %bb.0: +; RV64IB-NEXT: subu.w a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: subuw: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: subu.w a0, a0, a1 +; RV64IBB-NEXT: ret + %and = and i64 %b, 4294967295 + %sub = sub i64 %a, %and + ret i64 %sub +} diff --git a/llvm/test/CodeGen/RISCV/rv64Zbbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll new file mode 100644 index 0000000000000..c3a6799739d2b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll @@ -0,0 +1,517 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IB +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IBB +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IBP + +define signext i32 @andn_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: andn_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: andn_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: andn a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: andn_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: andn a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: andn_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: andn a0, a0, a1 +; RV64IBP-NEXT: ret + %neg = xor i32 %b, -1 + %and = and i32 %neg, %a + ret i32 %and +} + +define i64 @andn_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: andn_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: andn_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: andn a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: andn_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: andn a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: andn_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: andn a0, a0, a1 +; RV64IBP-NEXT: ret + %neg = xor i64 %b, -1 + %and = and i64 %neg, %a + ret i64 %and +} + +define signext i32 @orn_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: orn_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: orn_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orn a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: orn_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: orn a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: orn_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: orn a0, a0, a1 +; RV64IBP-NEXT: ret + %neg = xor i32 %b, -1 + %or = or i32 %neg, %a + ret i32 %or +} + +define i64 @orn_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: orn_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: orn_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orn a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: orn_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: orn a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: orn_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: orn a0, a0, a1 +; RV64IBP-NEXT: ret + %neg = xor i64 %b, -1 + %or = or i64 %neg, %a + ret i64 %or +} + +define signext i32 @xnor_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: xnor_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: xnor_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: xnor a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: xnor_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: xnor a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: xnor_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: xnor a0, a0, a1 +; RV64IBP-NEXT: ret + %neg = xor i32 %a, -1 + %xor = xor i32 %neg, %b + ret i32 %xor +} + +define i64 @xnor_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: xnor_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: xnor_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: xnor a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: xnor_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: xnor a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: xnor_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: xnor a0, a0, a1 +; RV64IBP-NEXT: ret + %neg = xor i64 %a, -1 + %xor = xor i64 %neg, %b + ret i64 %xor +} + +declare i32 @llvm.fshl.i32(i32, i32, i32) + +define signext i32 @rol_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: rol_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: sllw a2, a0, a1 +; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: srlw a0, a0, a1 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: rol_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rolw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: rol_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: rolw a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: rol_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rolw a0, a0, a1 +; RV64IBP-NEXT: ret + %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b) + ret i32 %1 +} + +declare i64 @llvm.fshl.i64(i64, i64, i64) + +define i64 @rol_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: rol_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: sll a2, a0, a1 +; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: srl a0, a0, a1 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: rol_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rol a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: rol_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: rol a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: rol_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rol a0, a0, a1 +; RV64IBP-NEXT: ret + %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b) + ret i64 %or +} + +declare i32 @llvm.fshr.i32(i32, i32, i32) + +define signext i32 @ror_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: ror_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srlw a2, a0, a1 +; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: sllw a0, a0, a1 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: ror_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rorw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: ror_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: rorw a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: ror_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rorw a0, a0, a1 +; RV64IBP-NEXT: ret + %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b) + ret i32 %1 +} + +declare i64 @llvm.fshr.i64(i64, i64, i64) + +define i64 @ror_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: ror_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srl a2, a0, a1 +; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: sll a0, a0, a1 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: ror_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: ror a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: ror_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: ror a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: ror_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: ror a0, a0, a1 +; RV64IBP-NEXT: ret + %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b) + ret i64 %or +} + +define signext i32 @rori_i32(i32 signext %a) nounwind { +; RV64I-LABEL: rori_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: slli a0, a0, 31 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: rori_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: fsriw a0, a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: rori_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: roriw a0, a0, 1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: rori_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: roriw a0, a0, 1 +; RV64IBP-NEXT: ret + %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31) + ret i32 %1 +} + +define i64 @rori_i64(i64 %a) nounwind { +; RV64I-LABEL: rori_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: slli a0, a0, 63 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: rori_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rori a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: rori_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: rori a0, a0, 1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: rori_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rori a0, a0, 1 +; RV64IBP-NEXT: ret + %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63) + ret i64 %1 +} + +define signext i32 @pack_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: pack_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: pack_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: packw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: pack_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: packw a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: pack_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: packw a0, a0, a1 +; RV64IBP-NEXT: ret + %shl = and i32 %a, 65535 + %shl1 = shl i32 %b, 16 + %or = or i32 %shl1, %shl + ret i32 %or +} + +define i64 @pack_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: pack_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: pack_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: pack a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: pack_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: pack a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: pack_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: pack a0, a0, a1 +; RV64IBP-NEXT: ret + %shl = and i64 %a, 4294967295 + %shl1 = shl i64 %b, 32 + %or = or i64 %shl1, %shl + ret i64 %or +} + +define signext i32 @packu_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: packu_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a0, a0, 16 +; RV64I-NEXT: lui a2, 1048560 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: packu_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: packuw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: packu_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: packuw a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: packu_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: packuw a0, a0, a1 +; RV64IBP-NEXT: ret + %shr = lshr i32 %a, 16 + %shr1 = and i32 %b, -65536 + %or = or i32 %shr1, %shr + ret i32 %or +} + +define i64 @packu_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: packu_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a2, zero, -1 +; RV64I-NEXT: slli a2, a2, 32 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: packu_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: packu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: packu_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: packu a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: packu_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: packu a0, a0, a1 +; RV64IBP-NEXT: ret + %shr = lshr i64 %a, 32 + %shr1 = and i64 %b, -4294967296 + %or = or i64 %shr1, %shr + ret i64 %or +} + +define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: packh_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a0, a0, 255 +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: packh_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: packh a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: packh_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: packh a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: packh_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: packh a0, a0, a1 +; RV64IBP-NEXT: ret + %and = and i32 %a, 255 + %and1 = shl i32 %b, 8 + %shl = and i32 %and1, 65280 + %or = or i32 %shl, %and + ret i32 %or +} + +define i64 @packh_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: packh_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a0, a0, 255 +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: packh_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: packh a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: packh_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: packh a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: packh_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: packh a0, a0, a1 +; RV64IBP-NEXT: ret + %and = and i64 %a, 255 + %and1 = shl i64 %b, 8 + %shl = and i64 %and1, 65280 + %or = or i64 %shl, %and + ret i64 %or +} diff --git a/llvm/test/CodeGen/RISCV/rv64Zbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbp.ll new file mode 100644 index 0000000000000..ae467efaab832 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64Zbp.ll @@ -0,0 +1,1343 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IB +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IBP + +define signext i32 @gorc1_i32(i32 signext %a) nounwind { +; RV64I-LABEL: gorc1_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: lui a2, 171 +; RV64I-NEXT: addiw a2, a2, -1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -1366 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: lui a3, 349525 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: gorc1_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: gorciw a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc1_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: gorciw a0, a0, 1 +; RV64IBP-NEXT: ret + %and = shl i32 %a, 1 + %shl = and i32 %and, -1431655766 + %and1 = lshr i32 %a, 1 + %shr = and i32 %and1, 1431655765 + %or = or i32 %shr, %a + %or2 = or i32 %or, %shl + ret i32 %or2 +} + +define i64 @gorc1_i64(i64 %a) nounwind { +; RV64I-LABEL: gorc1_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: lui a2, 1026731 +; RV64I-NEXT: addiw a2, a2, -1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -1366 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: lui a3, 21845 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, 1365 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, 1365 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, 1365 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: gorc1_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orc.p a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc1_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: orc.p a0, a0 +; RV64IBP-NEXT: ret + %and = shl i64 %a, 1 + %shl = and i64 %and, -6148914691236517206 + %and1 = lshr i64 %a, 1 + %shr = and i64 %and1, 6148914691236517205 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define signext i32 @gorc2_i32(i32 signext %a) nounwind { +; RV64I-LABEL: gorc2_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: lui a2, 205 +; RV64I-NEXT: addiw a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -820 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 2 +; RV64I-NEXT: lui a3, 209715 +; RV64I-NEXT: addiw a3, a3, 819 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: gorc2_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: gorciw a0, a0, 2 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc2_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: gorciw a0, a0, 2 +; RV64IBP-NEXT: ret + %and = shl i32 %a, 2 + %shl = and i32 %and, -858993460 + %and1 = lshr i32 %a, 2 + %shr = and i32 %and1, 858993459 + %or = or i32 %shr, %a + %or2 = or i32 %or, %shl + ret i32 %or2 +} + +define i64 @gorc2_i64(i64 %a) nounwind { +; RV64I-LABEL: gorc2_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: lui a2, 1035469 +; RV64I-NEXT: addiw a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -820 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 2 +; RV64I-NEXT: lui a3, 13107 +; RV64I-NEXT: addiw a3, a3, 819 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, 819 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, 819 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, 819 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: gorc2_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orc2.n a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc2_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: orc2.n a0, a0 +; RV64IBP-NEXT: ret + %and = shl i64 %a, 2 + %shl = and i64 %and, -3689348814741910324 + %and1 = lshr i64 %a, 2 + %shr = and i64 %and1, 3689348814741910323 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define signext i32 @gorc4_i32(i32 signext %a) nounwind { +; RV64I-LABEL: gorc4_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 4 +; RV64I-NEXT: lui a2, 241 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 4 +; RV64I-NEXT: lui a3, 61681 +; RV64I-NEXT: addiw a3, a3, -241 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: gorc4_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: gorciw a0, a0, 4 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc4_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: gorciw a0, a0, 4 +; RV64IBP-NEXT: ret + %and = shl i32 %a, 4 + %shl = and i32 %and, -252645136 + %and1 = lshr i32 %a, 4 + %shr = and i32 %and1, 252645135 + %or = or i32 %shr, %a + %or2 = or i32 %or, %shl + ret i32 %or2 +} + +define i64 @gorc4_i64(i64 %a) nounwind { +; RV64I-LABEL: gorc4_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 4 +; RV64I-NEXT: lui a2, 1044721 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 4 +; RV64I-NEXT: lui a3, 3855 +; RV64I-NEXT: addiw a3, a3, 241 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -241 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, 241 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -241 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: gorc4_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orc4.b a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc4_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: orc4.b a0, a0 +; RV64IBP-NEXT: ret + %and = shl i64 %a, 4 + %shl = and i64 %and, -1085102592571150096 + %and1 = lshr i64 %a, 4 + %shr = and i64 %and1, 1085102592571150095 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define signext i32 @gorc8_i32(i32 signext %a) nounwind { +; RV64I-LABEL: gorc8_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -255 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: addi a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 8 +; RV64I-NEXT: lui a3, 4080 +; RV64I-NEXT: addiw a3, a3, 255 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: gorc8_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: gorciw a0, a0, 8 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc8_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: gorciw a0, a0, 8 +; RV64IBP-NEXT: ret + %and = shl i32 %a, 8 + %shl = and i32 %and, -16711936 + %and1 = lshr i32 %a, 8 + %shr = and i32 %and1, 16711935 + %or = or i32 %shr, %a + %or2 = or i32 %or, %shl + ret i32 %or2 +} + +define i64 @gorc8_i64(i64 %a) nounwind { +; RV64I-LABEL: gorc8_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: lui a2, 1044496 +; RV64I-NEXT: addiw a2, a2, -255 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: addi a2, a2, -255 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: addi a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 8 +; RV64I-NEXT: lui a3, 4080 +; RV64I-NEXT: addiw a3, a3, 255 +; RV64I-NEXT: slli a3, a3, 16 +; RV64I-NEXT: addi a3, a3, 255 +; RV64I-NEXT: slli a3, a3, 16 +; RV64I-NEXT: addi a3, a3, 255 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: gorc8_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orc8.h a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc8_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: orc8.h a0, a0 +; RV64IBP-NEXT: ret + %and = shl i64 %a, 8 + %shl = and i64 %and, -71777214294589696 + %and1 = lshr i64 %a, 8 + %shr = and i64 %and1, 71777214294589695 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define signext i32 @gorc16_i32(i32 signext %a) nounwind { +; RV64I-LABEL: gorc16_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: srliw a2, a0, 16 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: gorc16_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: gorciw a0, a0, 16 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc16_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: gorciw a0, a0, 16 +; RV64IBP-NEXT: ret + %shl = shl i32 %a, 16 + %shr = lshr i32 %a, 16 + %or = or i32 %shr, %a + %or2 = or i32 %or, %shl + ret i32 %or2 +} + +define i64 @gorc16_i64(i64 %a) nounwind { +; RV64I-LABEL: gorc16_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: lui a2, 1048560 +; RV64I-NEXT: addiw a2, a2, 1 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 16 +; RV64I-NEXT: lui a3, 16 +; RV64I-NEXT: addiw a3, a3, -1 +; RV64I-NEXT: slli a3, a3, 16 +; RV64I-NEXT: addi a3, a3, 1 +; RV64I-NEXT: slli a3, a3, 16 +; RV64I-NEXT: addi a3, a3, -1 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: gorc16_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orc16.w a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc16_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: orc16.w a0, a0 +; RV64IBP-NEXT: ret + %and = shl i64 %a, 16 + %shl = and i64 %and, -281470681808896 + %and1 = lshr i64 %a, 16 + %shr = and i64 %and1, 281470681808895 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define i64 @gorc32(i64 %a) nounwind { +; RV64I-LABEL: gorc32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: srli a2, a0, 32 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: gorc32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orc32 a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: orc32 a0, a0 +; RV64IBP-NEXT: ret + %shl = shl i64 %a, 32 + %shr = lshr i64 %a, 32 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define signext i32 @grev1_i32(i32 signext %a) nounwind { +; RV64I-LABEL: grev1_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: lui a2, 171 +; RV64I-NEXT: addiw a2, a2, -1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -1366 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: grev1_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: greviw a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: grev1_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: greviw a0, a0, 1 +; RV64IBP-NEXT: ret + %and = shl i32 %a, 1 + %shl = and i32 %and, -1431655766 + %and1 = lshr i32 %a, 1 + %shr = and i32 %and1, 1431655765 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i64 @grev1_i64(i64 %a) nounwind { +; RV64I-LABEL: grev1_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: lui a2, 1026731 +; RV64I-NEXT: addiw a2, a2, -1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -1366 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: lui a2, 21845 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: grev1_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev.p a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: grev1_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rev.p a0, a0 +; RV64IBP-NEXT: ret + %and = shl i64 %a, 1 + %shl = and i64 %and, -6148914691236517206 + %and1 = lshr i64 %a, 1 + %shr = and i64 %and1, 6148914691236517205 + %or = or i64 %shl, %shr + ret i64 %or +} + +define signext i32 @grev2_i32(i32 signext %a) nounwind { +; RV64I-LABEL: grev2_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: lui a2, 205 +; RV64I-NEXT: addiw a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -820 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: grev2_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: greviw a0, a0, 2 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: grev2_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: greviw a0, a0, 2 +; RV64IBP-NEXT: ret + %and = shl i32 %a, 2 + %shl = and i32 %and, -858993460 + %and1 = lshr i32 %a, 2 + %shr = and i32 %and1, 858993459 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i64 @grev2_i64(i64 %a) nounwind { +; RV64I-LABEL: grev2_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: lui a2, 1035469 +; RV64I-NEXT: addiw a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -820 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: lui a2, 13107 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: grev2_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev2.n a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: grev2_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rev2.n a0, a0 +; RV64IBP-NEXT: ret + %and = shl i64 %a, 2 + %shl = and i64 %and, -3689348814741910324 + %and1 = lshr i64 %a, 2 + %shr = and i64 %and1, 3689348814741910323 + %or = or i64 %shl, %shr + ret i64 %or +} + +define signext i32 @grev4_i32(i32 signext %a) nounwind { +; RV64I-LABEL: grev4_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 4 +; RV64I-NEXT: lui a2, 241 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a0, a0, 4 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: grev4_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: greviw a0, a0, 4 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: grev4_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: greviw a0, a0, 4 +; RV64IBP-NEXT: ret + %and = shl i32 %a, 4 + %shl = and i32 %and, -252645136 + %and1 = lshr i32 %a, 4 + %shr = and i32 %and1, 252645135 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i64 @grev4_i64(i64 %a) nounwind { +; RV64I-LABEL: grev4_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 4 +; RV64I-NEXT: lui a2, 1044721 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a0, a0, 4 +; RV64I-NEXT: lui a2, 3855 +; RV64I-NEXT: addiw a2, a2, 241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: grev4_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev4.b a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: grev4_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rev4.b a0, a0 +; RV64IBP-NEXT: ret + %and = shl i64 %a, 4 + %shl = and i64 %and, -1085102592571150096 + %and1 = lshr i64 %a, 4 + %shr = and i64 %and1, 1085102592571150095 + %or = or i64 %shl, %shr + ret i64 %or +} + +define signext i32 @grev8_i32(i32 signext %a) nounwind { +; RV64I-LABEL: grev8_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -255 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: addi a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: lui a2, 4080 +; RV64I-NEXT: addiw a2, a2, 255 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: grev8_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: greviw a0, a0, 8 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: grev8_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: greviw a0, a0, 8 +; RV64IBP-NEXT: ret + %and = shl i32 %a, 8 + %shl = and i32 %and, -16711936 + %and1 = lshr i32 %a, 8 + %shr = and i32 %and1, 16711935 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i64 @grev8_i64(i64 %a) nounwind { +; RV64I-LABEL: grev8_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: lui a2, 1044496 +; RV64I-NEXT: addiw a2, a2, -255 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: addi a2, a2, -255 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: addi a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: lui a2, 4080 +; RV64I-NEXT: addiw a2, a2, 255 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: addi a2, a2, 255 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: addi a2, a2, 255 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: grev8_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev8.h a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: grev8_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rev8.h a0, a0 +; RV64IBP-NEXT: ret + %and = shl i64 %a, 8 + %shl = and i64 %and, -71777214294589696 + %and1 = lshr i64 %a, 8 + %shr = and i64 %and1, 71777214294589695 + %or = or i64 %shl, %shr + ret i64 %or +} + +define signext i32 @grev16_i32(i32 signext %a) nounwind { +; RV64I-LABEL: grev16_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: srliw a0, a0, 16 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: grev16_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: greviw a0, a0, 16 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: grev16_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: greviw a0, a0, 16 +; RV64IBP-NEXT: ret + %shl = shl i32 %a, 16 + %shr = lshr i32 %a, 16 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i64 @grev16_i64(i64 %a) nounwind { +; RV64I-LABEL: grev16_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: lui a2, 1048560 +; RV64I-NEXT: addiw a2, a2, 1 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a0, a0, 16 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -1 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: addi a2, a2, 1 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: grev16_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev16.w a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: grev16_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rev16.w a0, a0 +; RV64IBP-NEXT: ret + %and = shl i64 %a, 16 + %shl = and i64 %and, -281470681808896 + %and1 = lshr i64 %a, 16 + %shr = and i64 %and1, 281470681808895 + %or = or i64 %shl, %shr + ret i64 %or +} + +define i64 @grev32(i64 %a) nounwind { +; RV64I-LABEL: grev32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: grev32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rori a0, a0, 32 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: grev32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rori a0, a0, 32 +; RV64IBP-NEXT: ret + %shl = shl i64 %a, 32 + %shr = lshr i64 %a, 32 + %or = or i64 %shl, %shr + ret i64 %or +} + +declare i32 @llvm.bswap.i32(i32) + +define signext i32 @bswap_i32(i32 signext %a) nounwind { +; RV64I-LABEL: bswap_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: addi a2, zero, 255 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: slli a3, a0, 24 +; RV64I-NEXT: slli a4, a2, 40 +; RV64I-NEXT: and a3, a3, a4 +; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: slli a3, a0, 40 +; RV64I-NEXT: slli a2, a2, 48 +; RV64I-NEXT: and a2, a3, a2 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 32 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: bswap_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: greviw a0, a0, 24 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: bswap_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: greviw a0, a0, 24 +; RV64IBP-NEXT: ret + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + ret i32 %1 +} + +declare i64 @llvm.bswap.i64(i64) + +define i64 @bswap_i64(i64 %a) { +; RV64I-LABEL: bswap_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: lui a2, 4080 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 8 +; RV64I-NEXT: addi a3, zero, 255 +; RV64I-NEXT: slli a4, a3, 24 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: srli a2, a0, 40 +; RV64I-NEXT: lui a4, 16 +; RV64I-NEXT: addiw a4, a4, -256 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: srli a4, a0, 56 +; RV64I-NEXT: or a2, a2, a4 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: slli a4, a3, 32 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 24 +; RV64I-NEXT: slli a5, a3, 40 +; RV64I-NEXT: and a4, a4, a5 +; RV64I-NEXT: or a2, a4, a2 +; RV64I-NEXT: slli a4, a0, 40 +; RV64I-NEXT: slli a3, a3, 48 +; RV64I-NEXT: and a3, a4, a3 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: bswap_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev8 a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: bswap_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rev8 a0, a0 +; RV64IBP-NEXT: ret + %1 = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %1 +} + +declare i32 @llvm.bitreverse.i32(i32) + +define signext i32 @bitreverse_i32(i32 signext %a) nounwind { +; RV64I-LABEL: bitreverse_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: lui a2, 4080 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 8 +; RV64I-NEXT: addi a3, zero, 255 +; RV64I-NEXT: slli a4, a3, 24 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: srli a2, a0, 40 +; RV64I-NEXT: lui a4, 16 +; RV64I-NEXT: addiw a4, a4, -256 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: srli a4, a0, 56 +; RV64I-NEXT: or a2, a2, a4 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: slli a4, a3, 32 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 24 +; RV64I-NEXT: slli a5, a3, 40 +; RV64I-NEXT: and a4, a4, a5 +; RV64I-NEXT: or a2, a4, a2 +; RV64I-NEXT: slli a4, a0, 40 +; RV64I-NEXT: slli a3, a3, 48 +; RV64I-NEXT: and a3, a4, a3 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a1, a1, 4 +; RV64I-NEXT: lui a2, 1044721 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: srli a0, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: lui a1, 13107 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: lui a2, 1035469 +; RV64I-NEXT: addiw a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -820 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: lui a1, 349525 +; RV64I-NEXT: addiw a1, a1, 1365 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a1, a1, 1 +; RV64I-NEXT: lui a2, 873813 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 33 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 32 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: bitreverse_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: greviw a0, a0, 31 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: bitreverse_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: greviw a0, a0, 31 +; RV64IBP-NEXT: ret + %1 = tail call i32 @llvm.bitreverse.i32(i32 %a) + ret i32 %1 +} + +declare i64 @llvm.bitreverse.i64(i64) + +define i64 @bitreverse_i64(i64 %a) nounwind { +; RV64I-LABEL: bitreverse_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: lui a2, 4080 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 8 +; RV64I-NEXT: addi a3, zero, 255 +; RV64I-NEXT: slli a4, a3, 24 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: srli a2, a0, 40 +; RV64I-NEXT: lui a4, 16 +; RV64I-NEXT: addiw a4, a4, -256 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: srli a4, a0, 56 +; RV64I-NEXT: or a2, a2, a4 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: slli a4, a3, 32 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 24 +; RV64I-NEXT: slli a5, a3, 40 +; RV64I-NEXT: and a4, a4, a5 +; RV64I-NEXT: or a2, a4, a2 +; RV64I-NEXT: slli a4, a0, 40 +; RV64I-NEXT: slli a3, a3, 48 +; RV64I-NEXT: and a3, a4, a3 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a1, a1, 4 +; RV64I-NEXT: lui a2, 1044721 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: srli a0, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: lui a1, 13107 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: lui a2, 1035469 +; RV64I-NEXT: addiw a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -820 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: lui a1, 21845 +; RV64I-NEXT: addiw a1, a1, 1365 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 1365 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 1365 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 1365 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a1, a1, 1 +; RV64I-NEXT: lui a2, 1026731 +; RV64I-NEXT: addiw a2, a2, -1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -1366 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: bitreverse_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: bitreverse_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rev a0, a0 +; RV64IBP-NEXT: ret + %1 = call i64 @llvm.bitreverse.i64(i64 %a) + ret i64 %1 +} + +; There's no [un]shfliw instruction as slliu.w occupies the encoding slot that +; would be occupied by shfliw. + +define i64 @shfl1_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: shfl1_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 1035469 +; RV64I-NEXT: addiw a1, a1, -819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -819 +; RV64I-NEXT: slli a1, a1, 13 +; RV64I-NEXT: addi a1, a1, -1639 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a2, a0, 1 +; RV64I-NEXT: lui a3, 4369 +; RV64I-NEXT: addiw a3, a3, 273 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, 273 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, 273 +; RV64I-NEXT: slli a4, a3, 14 +; RV64I-NEXT: addi a4, a4, 1092 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: slli a2, a3, 13 +; RV64I-NEXT: addi a2, a2, 546 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: shfl1_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: zip.n a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: shfl1_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: zip.n a0, a0 +; RV64IBP-NEXT: ret + %and = and i64 %a, -7378697629483820647 + %shl = shl i64 %a, 1 + %and1 = and i64 %shl, 4919131752989213764 + %or = or i64 %and1, %and + %shr = lshr i64 %a, 1 + %and2 = and i64 %shr, 2459565876494606882 + %or3 = or i64 %or, %and2 + ret i64 %or3 +} + +define i64 @shfl2_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: shfl2_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 1044721 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 14 +; RV64I-NEXT: addi a1, a1, 963 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a2, a0, 2 +; RV64I-NEXT: lui a3, 48 +; RV64I-NEXT: addiw a3, a3, 771 +; RV64I-NEXT: slli a3, a3, 16 +; RV64I-NEXT: addi a3, a3, 771 +; RV64I-NEXT: slli a4, a3, 16 +; RV64I-NEXT: addi a4, a4, 771 +; RV64I-NEXT: slli a4, a4, 12 +; RV64I-NEXT: addi a4, a4, 48 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: slli a2, a3, 14 +; RV64I-NEXT: addi a2, a2, 193 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -1012 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: shfl2_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: zip2.b a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: shfl2_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: zip2.b a0, a0 +; RV64IBP-NEXT: ret + %and = and i64 %a, -4340410370284600381 + %shl = shl i64 %a, 2 + %and1 = and i64 %shl, 3472328296227680304 + %or = or i64 %and1, %and + %shr = lshr i64 %a, 2 + %and2 = and i64 %shr, 868082074056920076 + %or3 = or i64 %or, %and2 + ret i64 %or3 +} + +define i64 @shfl4_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: shfl4_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 1048560 +; RV64I-NEXT: addiw a1, a1, 255 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 255 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 255 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 15 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a2, a0, 4 +; RV64I-NEXT: lui a3, 240 +; RV64I-NEXT: addiw a3, a3, 15 +; RV64I-NEXT: slli a3, a3, 16 +; RV64I-NEXT: addi a3, a3, 15 +; RV64I-NEXT: slli a4, a3, 12 +; RV64I-NEXT: addi a4, a4, 1 +; RV64I-NEXT: slli a4, a4, 12 +; RV64I-NEXT: addi a4, a4, -256 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: srli a0, a0, 4 +; RV64I-NEXT: slli a2, a3, 20 +; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: shfl4_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: zip4.h a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: shfl4_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: zip4.h a0, a0 +; RV64IBP-NEXT: ret + %and = and i64 %a, -1148435428713435121 + %shl = shl i64 %a, 4 + %and1 = and i64 %shl, 1080880403494997760 + %or = or i64 %and1, %and + %shr = lshr i64 %a, 4 + %and2 = and i64 %shr, 67555025218437360 + %or3 = or i64 %or, %and2 + ret i64 %or3 +} + +define i64 @shfl8_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: shfl8_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 1048560 +; RV64I-NEXT: addiw a1, a1, 1 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: slli a1, a1, 24 +; RV64I-NEXT: addi a1, a1, 255 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: addi a3, zero, 255 +; RV64I-NEXT: slli a4, a3, 32 +; RV64I-NEXT: addi a4, a4, 255 +; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: slli a2, a3, 24 +; RV64I-NEXT: addi a2, a2, 1 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: addi a2, a2, -256 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: shfl8_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: zip8.w a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: shfl8_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: zip8.w a0, a0 +; RV64IBP-NEXT: ret + %and = and i64 %a, -72056494543077121 + %shl = shl i64 %a, 8 + %and1 = and i64 %shl, 71776119077928960 + %or = or i64 %and1, %and + %shr = lshr i64 %a, 8 + %and2 = and i64 %shr, 280375465148160 + %or3 = or i64 %or, %and2 + ret i64 %or3 +} + +define i64 @shfl16(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: shfl16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a1, zero, -1 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: addi a1, a1, 1 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a2, a0, 16 +; RV64I-NEXT: lui a3, 16 +; RV64I-NEXT: addiw a3, a3, -1 +; RV64I-NEXT: slli a4, a3, 32 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: srli a0, a0, 16 +; RV64I-NEXT: slli a2, a3, 16 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: shfl16: +; RV64IB: # %bb.0: +; RV64IB-NEXT: zip16 a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: shfl16: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: zip16 a0, a0 +; RV64IBP-NEXT: ret + %and = and i64 %a, -281474976645121 + %shl = shl i64 %a, 16 + %and1 = and i64 %shl, 281470681743360 + %or = or i64 %and1, %and + %shr = lshr i64 %a, 16 + %and2 = and i64 %shr, 4294901760 + %or3 = or i64 %or, %and2 + ret i64 %or3 +} diff --git a/llvm/test/CodeGen/RISCV/rv64Zbs.ll b/llvm/test/CodeGen/RISCV/rv64Zbs.ll new file mode 100644 index 0000000000000..f7990b36dec86 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64Zbs.ll @@ -0,0 +1,235 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IB +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbs -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IBS + +define signext i32 @sbclr_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: sbclr_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a2, zero, 1 +; RV64I-NEXT: sllw a1, a2, a1 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sbclr_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sbclrw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBS-LABEL: sbclr_i32: +; RV64IBS: # %bb.0: +; RV64IBS-NEXT: sbclrw a0, a0, a1 +; RV64IBS-NEXT: ret + %and = and i32 %b, 31 + %shl = shl nuw i32 1, %and + %neg = xor i32 %shl, -1 + %and1 = and i32 %neg, %a + ret i32 %and1 +} + +define i64 @sbclr_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: sbclr_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a2, zero, 1 +; RV64I-NEXT: sll a1, a2, a1 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sbclr_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sbclr a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBS-LABEL: sbclr_i64: +; RV64IBS: # %bb.0: +; RV64IBS-NEXT: sbclr a0, a0, a1 +; RV64IBS-NEXT: ret + %and = and i64 %b, 63 + %shl = shl nuw i64 1, %and + %neg = xor i64 %shl, -1 + %and1 = and i64 %neg, %a + ret i64 %and1 +} + +define signext i32 @sbset_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: sbset_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a2, zero, 1 +; RV64I-NEXT: sllw a1, a2, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sbset_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sbsetw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBS-LABEL: sbset_i32: +; RV64IBS: # %bb.0: +; RV64IBS-NEXT: sbsetw a0, a0, a1 +; RV64IBS-NEXT: ret + %and = and i32 %b, 31 + %shl = shl nuw i32 1, %and + %or = or i32 %shl, %a + ret i32 %or +} + +define i64 @sbset_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: sbset_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a2, zero, 1 +; RV64I-NEXT: sll a1, a2, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sbset_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sbset a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBS-LABEL: sbset_i64: +; RV64IBS: # %bb.0: +; RV64IBS-NEXT: sbset a0, a0, a1 +; RV64IBS-NEXT: ret + %conv = and i64 %b, 63 + %shl = shl nuw i64 1, %conv + %or = or i64 %shl, %a + ret i64 %or +} + +define signext i32 @sbinv_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: sbinv_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a2, zero, 1 +; RV64I-NEXT: sllw a1, a2, a1 +; RV64I-NEXT: xor a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sbinv_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sbinvw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBS-LABEL: sbinv_i32: +; RV64IBS: # %bb.0: +; RV64IBS-NEXT: sbinvw a0, a0, a1 +; RV64IBS-NEXT: ret + %and = and i32 %b, 31 + %shl = shl nuw i32 1, %and + %xor = xor i32 %shl, %a + ret i32 %xor +} + +define i64 @sbinv_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: sbinv_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a2, zero, 1 +; RV64I-NEXT: sll a1, a2, a1 +; RV64I-NEXT: xor a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sbinv_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sbinv a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBS-LABEL: sbinv_i64: +; RV64IBS: # %bb.0: +; RV64IBS-NEXT: sbinv a0, a0, a1 +; RV64IBS-NEXT: ret + %conv = and i64 %b, 63 + %shl = shl nuw i64 1, %conv + %xor = xor i64 %shl, %a + ret i64 %xor +} + +define signext i32 @sbext_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: sbext_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srlw a0, a0, a1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sbext_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sbextw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBS-LABEL: sbext_i32: +; RV64IBS: # %bb.0: +; RV64IBS-NEXT: sbextw a0, a0, a1 +; RV64IBS-NEXT: ret + %and = and i32 %b, 31 + %shr = lshr i32 %a, %and + %and1 = and i32 %shr, 1 + ret i32 %and1 +} + +define i64 @sbext_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: sbext_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srl a0, a0, a1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sbext_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sbext a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBS-LABEL: sbext_i64: +; RV64IBS: # %bb.0: +; RV64IBS-NEXT: sbext a0, a0, a1 +; RV64IBS-NEXT: ret + %conv = and i64 %b, 63 + %shr = lshr i64 %a, %conv + %and1 = and i64 %shr, 1 + ret i64 %and1 +} + +define signext i32 @sbexti_i32(i32 signext %a) nounwind { +; RV64I-LABEL: sbexti_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a0, a0, 5 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sbexti_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sbexti a0, a0, 5 +; RV64IB-NEXT: ret +; +; RV64IBS-LABEL: sbexti_i32: +; RV64IBS: # %bb.0: +; RV64IBS-NEXT: sbexti a0, a0, 5 +; RV64IBS-NEXT: ret + %shr = lshr i32 %a, 5 + %and = and i32 %shr, 1 + ret i32 %and +} + +define i64 @sbexti_i64(i64 %a) nounwind { +; RV64I-LABEL: sbexti_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a0, a0, 5 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sbexti_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sbexti a0, a0, 5 +; RV64IB-NEXT: ret +; +; RV64IBS-LABEL: sbexti_i64: +; RV64IBS: # %bb.0: +; RV64IBS-NEXT: sbexti a0, a0, 5 +; RV64IBS-NEXT: ret + %shr = lshr i64 %a, 5 + %and = and i64 %shr, 1 + ret i64 %and +} diff --git a/llvm/test/CodeGen/RISCV/rv64Zbt.ll b/llvm/test/CodeGen/RISCV/rv64Zbt.ll new file mode 100644 index 0000000000000..22e25fadbd910 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64Zbt.ll @@ -0,0 +1,266 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IB +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IBT + +define signext i32 @cmix_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind { +; RV64I-LABEL: cmix_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: cmix_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: cmix a0, a1, a0, a2 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: cmix_i32: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: cmix a0, a1, a0, a2 +; RV64IBT-NEXT: ret + %and = and i32 %b, %a + %neg = xor i32 %b, -1 + %and1 = and i32 %neg, %c + %or = or i32 %and1, %and + ret i32 %or +} + +define i64 @cmix_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV64I-LABEL: cmix_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: cmix_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: cmix a0, a1, a0, a2 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: cmix_i64: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: cmix a0, a1, a0, a2 +; RV64IBT-NEXT: ret + %and = and i64 %b, %a + %neg = xor i64 %b, -1 + %and1 = and i64 %neg, %c + %or = or i64 %and1, %and + ret i64 %or +} + +define signext i32 @cmov_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind { +; RV64I-LABEL: cmov_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: beqz a1, .LBB2_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: .LBB2_2: +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: cmov_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: cmov a0, a1, a0, a2 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: cmov_i32: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: cmov a0, a1, a0, a2 +; RV64IBT-NEXT: ret + %tobool.not = icmp eq i32 %b, 0 + %cond = select i1 %tobool.not, i32 %c, i32 %a + ret i32 %cond +} + +define i64 @cmov_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV64I-LABEL: cmov_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: beqz a1, .LBB3_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: .LBB3_2: +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: cmov_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: cmov a0, a1, a0, a2 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: cmov_i64: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: cmov a0, a1, a0, a2 +; RV64IBT-NEXT: ret + %tobool.not = icmp eq i64 %b, 0 + %cond = select i1 %tobool.not, i64 %c, i64 %a + ret i64 %cond +} + +declare i32 @llvm.fshl.i32(i32, i32, i32) + +define signext i32 @fshl_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind { +; RV64I-LABEL: fshl_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a3, a2, 31 +; RV64I-NEXT: beqz a3, .LBB4_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: addi a4, zero, 32 +; RV64I-NEXT: sub a2, a4, a2 +; RV64I-NEXT: srlw a1, a1, a2 +; RV64I-NEXT: sllw a0, a0, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: .LBB4_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: fshl_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: fslw a0, a0, a2, a1 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: fshl_i32: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: fslw a0, a0, a2, a1 +; RV64IBT-NEXT: ret + %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + +declare i64 @llvm.fshl.i64(i64, i64, i64) + +define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV64I-LABEL: fshl_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a3, a2, 63 +; RV64I-NEXT: beqz a3, .LBB5_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: sll a0, a0, a2 +; RV64I-NEXT: addi a2, zero, 64 +; RV64I-NEXT: sub a2, a2, a3 +; RV64I-NEXT: srl a1, a1, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: .LBB5_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: fshl_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: fsl a0, a0, a2, a1 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: fshl_i64: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: fsl a0, a0, a2, a1 +; RV64IBT-NEXT: ret + %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) + ret i64 %1 +} + +declare i32 @llvm.fshr.i32(i32, i32, i32) + +define signext i32 @fshr_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind { +; RV64I-LABEL: fshr_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a3, a2, 31 +; RV64I-NEXT: beqz a3, .LBB6_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: srlw a1, a1, a3 +; RV64I-NEXT: addi a3, zero, 32 +; RV64I-NEXT: sub a2, a3, a2 +; RV64I-NEXT: sllw a0, a0, a2 +; RV64I-NEXT: or a1, a0, a1 +; RV64I-NEXT: .LBB6_2: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: fshr_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: fsrw a0, a0, a2, a1 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: fshr_i32: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: fsrw a0, a0, a2, a1 +; RV64IBT-NEXT: ret + %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + +declare i64 @llvm.fshr.i64(i64, i64, i64) + +define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV64I-LABEL: fshr_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a3, a2, 63 +; RV64I-NEXT: beqz a3, .LBB7_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: srl a1, a1, a2 +; RV64I-NEXT: addi a2, zero, 64 +; RV64I-NEXT: sub a2, a2, a3 +; RV64I-NEXT: sll a0, a0, a2 +; RV64I-NEXT: or a1, a0, a1 +; RV64I-NEXT: .LBB7_2: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: fshr_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: fsr a0, a0, a2, a1 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: fshr_i64: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: fsr a0, a0, a2, a1 +; RV64IBT-NEXT: ret + %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) + ret i64 %1 +} + +define signext i32 @fshri_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: fshri_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a1, a1, 5 +; RV64I-NEXT: slli a0, a0, 27 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: fshri_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: fsriw a0, a0, a1, 5 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: fshri_i32: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: fsriw a0, a0, a1, 5 +; RV64IBT-NEXT: ret + %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 5) + ret i32 %1 +} + +define i64 @fshri_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: fshri_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a1, 5 +; RV64I-NEXT: slli a0, a0, 59 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: fshri_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: fsri a0, a0, a1, 5 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: fshri_i64: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: fsri a0, a0, a1, 5 +; RV64IBT-NEXT: ret + %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 5) + ret i64 %1 +} diff --git a/llvm/test/CodeGen/RISCV/stack-store-check.ll b/llvm/test/CodeGen/RISCV/stack-store-check.ll new file mode 100644 index 0000000000000..c8f733bd6ce93 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/stack-store-check.ll @@ -0,0 +1,324 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv32 -o - %s | FileCheck %s +; This test has been minimized from GCC Torture Suite's regstack-1.c +; and checks that RISCVInstrInfo::storeRegToStackSlot works at the basic +; level. + +@U = external local_unnamed_addr global fp128, align 16 +@Y1 = external local_unnamed_addr global fp128, align 16 +@X = external local_unnamed_addr global fp128, align 16 +@Y = external local_unnamed_addr global fp128, align 16 +@T = external local_unnamed_addr global fp128, align 16 +@S = external local_unnamed_addr global fp128, align 16 + +define void @main() local_unnamed_addr nounwind { +; CHECK-LABEL: main: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -688 +; CHECK-NEXT: sw ra, 684(sp) +; CHECK-NEXT: sw s0, 680(sp) +; CHECK-NEXT: sw s1, 676(sp) +; CHECK-NEXT: sw s2, 672(sp) +; CHECK-NEXT: sw s3, 668(sp) +; CHECK-NEXT: sw s4, 664(sp) +; CHECK-NEXT: sw s5, 660(sp) +; CHECK-NEXT: sw s6, 656(sp) +; CHECK-NEXT: sw s7, 652(sp) +; CHECK-NEXT: sw s8, 648(sp) +; CHECK-NEXT: sw s9, 644(sp) +; CHECK-NEXT: sw s10, 640(sp) +; CHECK-NEXT: sw s11, 636(sp) +; CHECK-NEXT: lui a0, %hi(U) +; CHECK-NEXT: lw s6, %lo(U)(a0) +; CHECK-NEXT: lw s7, %lo(U+4)(a0) +; CHECK-NEXT: lw s8, %lo(U+8)(a0) +; CHECK-NEXT: lw s0, %lo(U+12)(a0) +; CHECK-NEXT: sw zero, 612(sp) +; CHECK-NEXT: sw zero, 608(sp) +; CHECK-NEXT: sw zero, 604(sp) +; CHECK-NEXT: sw zero, 600(sp) +; CHECK-NEXT: sw s0, 596(sp) +; CHECK-NEXT: sw s8, 592(sp) +; CHECK-NEXT: sw s7, 588(sp) +; CHECK-NEXT: addi a0, sp, 616 +; CHECK-NEXT: addi a1, sp, 600 +; CHECK-NEXT: addi a2, sp, 584 +; CHECK-NEXT: sw s6, 584(sp) +; CHECK-NEXT: call __subtf3 +; CHECK-NEXT: lw s3, 616(sp) +; CHECK-NEXT: lw s4, 620(sp) +; CHECK-NEXT: lw s9, 624(sp) +; CHECK-NEXT: lw s11, 628(sp) +; CHECK-NEXT: sw s0, 548(sp) +; CHECK-NEXT: sw s8, 544(sp) +; CHECK-NEXT: sw s7, 540(sp) +; CHECK-NEXT: sw s6, 536(sp) +; CHECK-NEXT: sw s11, 564(sp) +; CHECK-NEXT: sw s9, 560(sp) +; CHECK-NEXT: sw s4, 556(sp) +; CHECK-NEXT: addi a0, sp, 568 +; CHECK-NEXT: addi a1, sp, 552 +; CHECK-NEXT: addi a2, sp, 536 +; CHECK-NEXT: sw s3, 552(sp) +; CHECK-NEXT: call __subtf3 +; CHECK-NEXT: lw a0, 568(sp) +; CHECK-NEXT: sw a0, 40(sp) +; CHECK-NEXT: lw a0, 572(sp) +; CHECK-NEXT: sw a0, 32(sp) +; CHECK-NEXT: lw a0, 576(sp) +; CHECK-NEXT: sw a0, 24(sp) +; CHECK-NEXT: lw a0, 580(sp) +; CHECK-NEXT: sw a0, 16(sp) +; CHECK-NEXT: sw zero, 500(sp) +; CHECK-NEXT: sw zero, 496(sp) +; CHECK-NEXT: sw zero, 492(sp) +; CHECK-NEXT: sw zero, 488(sp) +; CHECK-NEXT: sw s0, 516(sp) +; CHECK-NEXT: sw s8, 512(sp) +; CHECK-NEXT: sw s7, 508(sp) +; CHECK-NEXT: addi a0, sp, 520 +; CHECK-NEXT: addi a1, sp, 504 +; CHECK-NEXT: addi a2, sp, 488 +; CHECK-NEXT: sw s6, 504(sp) +; CHECK-NEXT: call __addtf3 +; CHECK-NEXT: lw s2, 520(sp) +; CHECK-NEXT: lw s10, 524(sp) +; CHECK-NEXT: lw s5, 528(sp) +; CHECK-NEXT: lw s1, 532(sp) +; CHECK-NEXT: sw s1, 8(sp) +; CHECK-NEXT: lui a0, %hi(Y1) +; CHECK-NEXT: lw a1, %lo(Y1)(a0) +; CHECK-NEXT: sw a1, 48(sp) +; CHECK-NEXT: lw a2, %lo(Y1+4)(a0) +; CHECK-NEXT: sw a2, 52(sp) +; CHECK-NEXT: lw a3, %lo(Y1+8)(a0) +; CHECK-NEXT: sw a3, 4(sp) +; CHECK-NEXT: lw a0, %lo(Y1+12)(a0) +; CHECK-NEXT: sw a0, 0(sp) +; CHECK-NEXT: sw a0, 308(sp) +; CHECK-NEXT: sw a3, 304(sp) +; CHECK-NEXT: sw a2, 300(sp) +; CHECK-NEXT: sw a1, 296(sp) +; CHECK-NEXT: sw s11, 324(sp) +; CHECK-NEXT: sw s9, 320(sp) +; CHECK-NEXT: sw s4, 316(sp) +; CHECK-NEXT: addi a0, sp, 328 +; CHECK-NEXT: addi a1, sp, 312 +; CHECK-NEXT: addi a2, sp, 296 +; CHECK-NEXT: sw s3, 312(sp) +; CHECK-NEXT: call __multf3 +; CHECK-NEXT: lw a0, 328(sp) +; CHECK-NEXT: sw a0, 44(sp) +; CHECK-NEXT: lw a0, 332(sp) +; CHECK-NEXT: sw a0, 36(sp) +; CHECK-NEXT: lw a0, 336(sp) +; CHECK-NEXT: sw a0, 28(sp) +; CHECK-NEXT: lw a0, 340(sp) +; CHECK-NEXT: sw a0, 20(sp) +; CHECK-NEXT: sw s0, 468(sp) +; CHECK-NEXT: sw s8, 464(sp) +; CHECK-NEXT: sw s7, 460(sp) +; CHECK-NEXT: sw s6, 456(sp) +; CHECK-NEXT: sw s1, 452(sp) +; CHECK-NEXT: sw s5, 448(sp) +; CHECK-NEXT: sw s10, 444(sp) +; CHECK-NEXT: addi a0, sp, 472 +; CHECK-NEXT: addi a1, sp, 456 +; CHECK-NEXT: addi a2, sp, 440 +; CHECK-NEXT: sw s2, 440(sp) +; CHECK-NEXT: call __addtf3 +; CHECK-NEXT: lw a3, 472(sp) +; CHECK-NEXT: lw a0, 476(sp) +; CHECK-NEXT: lw a1, 480(sp) +; CHECK-NEXT: lw a2, 484(sp) +; CHECK-NEXT: sw zero, 420(sp) +; CHECK-NEXT: sw zero, 416(sp) +; CHECK-NEXT: sw zero, 412(sp) +; CHECK-NEXT: sw zero, 408(sp) +; CHECK-NEXT: sw a2, 404(sp) +; CHECK-NEXT: sw a1, 400(sp) +; CHECK-NEXT: sw a0, 396(sp) +; CHECK-NEXT: addi a0, sp, 424 +; CHECK-NEXT: addi a1, sp, 408 +; CHECK-NEXT: addi a2, sp, 392 +; CHECK-NEXT: sw a3, 392(sp) +; CHECK-NEXT: call __subtf3 +; CHECK-NEXT: lw a0, 424(sp) +; CHECK-NEXT: lw a1, 436(sp) +; CHECK-NEXT: lw a2, 432(sp) +; CHECK-NEXT: lw a3, 428(sp) +; CHECK-NEXT: lui a4, %hi(X) +; CHECK-NEXT: sw a1, %lo(X+12)(a4) +; CHECK-NEXT: sw a2, %lo(X+8)(a4) +; CHECK-NEXT: sw a3, %lo(X+4)(a4) +; CHECK-NEXT: sw a0, %lo(X)(a4) +; CHECK-NEXT: lw s8, 0(sp) +; CHECK-NEXT: sw s8, 212(sp) +; CHECK-NEXT: lw s7, 4(sp) +; CHECK-NEXT: sw s7, 208(sp) +; CHECK-NEXT: lw a0, 52(sp) +; CHECK-NEXT: sw a0, 204(sp) +; CHECK-NEXT: lw a0, 48(sp) +; CHECK-NEXT: sw a0, 200(sp) +; CHECK-NEXT: lw s6, 16(sp) +; CHECK-NEXT: sw s6, 228(sp) +; CHECK-NEXT: lw s4, 24(sp) +; CHECK-NEXT: sw s4, 224(sp) +; CHECK-NEXT: lw s0, 32(sp) +; CHECK-NEXT: sw s0, 220(sp) +; CHECK-NEXT: addi a0, sp, 232 +; CHECK-NEXT: addi a1, sp, 216 +; CHECK-NEXT: addi a2, sp, 200 +; CHECK-NEXT: lw s1, 40(sp) +; CHECK-NEXT: sw s1, 216(sp) +; CHECK-NEXT: call __multf3 +; CHECK-NEXT: lw a0, 232(sp) +; CHECK-NEXT: sw a0, 12(sp) +; CHECK-NEXT: lw s3, 236(sp) +; CHECK-NEXT: lw s9, 240(sp) +; CHECK-NEXT: lw s11, 244(sp) +; CHECK-NEXT: sw zero, 356(sp) +; CHECK-NEXT: sw zero, 352(sp) +; CHECK-NEXT: sw zero, 348(sp) +; CHECK-NEXT: sw zero, 344(sp) +; CHECK-NEXT: lw a0, 8(sp) +; CHECK-NEXT: sw a0, 372(sp) +; CHECK-NEXT: sw s5, 368(sp) +; CHECK-NEXT: sw s10, 364(sp) +; CHECK-NEXT: addi a0, sp, 376 +; CHECK-NEXT: addi a1, sp, 360 +; CHECK-NEXT: addi a2, sp, 344 +; CHECK-NEXT: sw s2, 360(sp) +; CHECK-NEXT: call __multf3 +; CHECK-NEXT: lw a0, 376(sp) +; CHECK-NEXT: lw a1, 388(sp) +; CHECK-NEXT: lw a2, 384(sp) +; CHECK-NEXT: lw a3, 380(sp) +; CHECK-NEXT: lui a4, %hi(S) +; CHECK-NEXT: sw a1, %lo(S+12)(a4) +; CHECK-NEXT: sw a2, %lo(S+8)(a4) +; CHECK-NEXT: sw a3, %lo(S+4)(a4) +; CHECK-NEXT: sw a0, %lo(S)(a4) +; CHECK-NEXT: sw s6, 260(sp) +; CHECK-NEXT: sw s4, 256(sp) +; CHECK-NEXT: sw s0, 252(sp) +; CHECK-NEXT: sw s1, 248(sp) +; CHECK-NEXT: lw a0, 20(sp) +; CHECK-NEXT: sw a0, 276(sp) +; CHECK-NEXT: lw a0, 28(sp) +; CHECK-NEXT: sw a0, 272(sp) +; CHECK-NEXT: lw a0, 36(sp) +; CHECK-NEXT: sw a0, 268(sp) +; CHECK-NEXT: addi a0, sp, 280 +; CHECK-NEXT: addi a1, sp, 264 +; CHECK-NEXT: addi a2, sp, 248 +; CHECK-NEXT: lw a3, 44(sp) +; CHECK-NEXT: sw a3, 264(sp) +; CHECK-NEXT: call __subtf3 +; CHECK-NEXT: lw a0, 280(sp) +; CHECK-NEXT: lw a1, 292(sp) +; CHECK-NEXT: lw a2, 288(sp) +; CHECK-NEXT: lw a3, 284(sp) +; CHECK-NEXT: lui a4, %hi(T) +; CHECK-NEXT: sw a1, %lo(T+12)(a4) +; CHECK-NEXT: sw a2, %lo(T+8)(a4) +; CHECK-NEXT: sw a3, %lo(T+4)(a4) +; CHECK-NEXT: sw a0, %lo(T)(a4) +; CHECK-NEXT: sw zero, 164(sp) +; CHECK-NEXT: sw zero, 160(sp) +; CHECK-NEXT: sw zero, 156(sp) +; CHECK-NEXT: sw zero, 152(sp) +; CHECK-NEXT: sw s11, 180(sp) +; CHECK-NEXT: sw s9, 176(sp) +; CHECK-NEXT: sw s3, 172(sp) +; CHECK-NEXT: addi a0, sp, 184 +; CHECK-NEXT: addi a1, sp, 168 +; CHECK-NEXT: addi a2, sp, 152 +; CHECK-NEXT: lw a3, 12(sp) +; CHECK-NEXT: sw a3, 168(sp) +; CHECK-NEXT: call __addtf3 +; CHECK-NEXT: lw a0, 184(sp) +; CHECK-NEXT: lw a1, 196(sp) +; CHECK-NEXT: lw a2, 192(sp) +; CHECK-NEXT: lw a3, 188(sp) +; CHECK-NEXT: lui a4, %hi(Y) +; CHECK-NEXT: sw a1, %lo(Y+12)(a4) +; CHECK-NEXT: sw a2, %lo(Y+8)(a4) +; CHECK-NEXT: sw a3, %lo(Y+4)(a4) +; CHECK-NEXT: sw a0, %lo(Y)(a4) +; CHECK-NEXT: sw zero, 116(sp) +; CHECK-NEXT: sw zero, 112(sp) +; CHECK-NEXT: sw zero, 108(sp) +; CHECK-NEXT: sw zero, 104(sp) +; CHECK-NEXT: sw s8, 132(sp) +; CHECK-NEXT: sw s7, 128(sp) +; CHECK-NEXT: lw a0, 52(sp) +; CHECK-NEXT: sw a0, 124(sp) +; CHECK-NEXT: addi a0, sp, 136 +; CHECK-NEXT: addi a1, sp, 120 +; CHECK-NEXT: addi a2, sp, 104 +; CHECK-NEXT: lw a3, 48(sp) +; CHECK-NEXT: sw a3, 120(sp) +; CHECK-NEXT: call __multf3 +; CHECK-NEXT: lw a3, 136(sp) +; CHECK-NEXT: lw a0, 140(sp) +; CHECK-NEXT: lw a1, 144(sp) +; CHECK-NEXT: lw a2, 148(sp) +; CHECK-NEXT: lui a4, 786400 +; CHECK-NEXT: sw a4, 68(sp) +; CHECK-NEXT: sw zero, 64(sp) +; CHECK-NEXT: sw zero, 60(sp) +; CHECK-NEXT: sw zero, 56(sp) +; CHECK-NEXT: sw a2, 84(sp) +; CHECK-NEXT: sw a1, 80(sp) +; CHECK-NEXT: sw a0, 76(sp) +; CHECK-NEXT: addi a0, sp, 88 +; CHECK-NEXT: addi a1, sp, 72 +; CHECK-NEXT: addi a2, sp, 56 +; CHECK-NEXT: sw a3, 72(sp) +; CHECK-NEXT: call __addtf3 +; CHECK-NEXT: lw a0, 96(sp) +; CHECK-NEXT: lw a1, 100(sp) +; CHECK-NEXT: lw a2, 88(sp) +; CHECK-NEXT: lw a3, 92(sp) +; CHECK-NEXT: lui a4, %hi(Y1) +; CHECK-NEXT: sw a0, %lo(Y1+8)(a4) +; CHECK-NEXT: sw a1, %lo(Y1+12)(a4) +; CHECK-NEXT: sw a2, %lo(Y1)(a4) +; CHECK-NEXT: sw a3, %lo(Y1+4)(a4) +; CHECK-NEXT: lw s11, 636(sp) +; CHECK-NEXT: lw s10, 640(sp) +; CHECK-NEXT: lw s9, 644(sp) +; CHECK-NEXT: lw s8, 648(sp) +; CHECK-NEXT: lw s7, 652(sp) +; CHECK-NEXT: lw s6, 656(sp) +; CHECK-NEXT: lw s5, 660(sp) +; CHECK-NEXT: lw s4, 664(sp) +; CHECK-NEXT: lw s3, 668(sp) +; CHECK-NEXT: lw s2, 672(sp) +; CHECK-NEXT: lw s1, 676(sp) +; CHECK-NEXT: lw s0, 680(sp) +; CHECK-NEXT: lw ra, 684(sp) +; CHECK-NEXT: addi sp, sp, 688 +; CHECK-NEXT: ret + %1 = load fp128, fp128* @U, align 16 + %2 = fsub fp128 0xL00000000000000000000000000000000, %1 + %3 = fsub fp128 %2, %1 + %4 = fadd fp128 %1, 0xL00000000000000000000000000000000 + %5 = load fp128, fp128* @Y1, align 16 + %6 = fmul fp128 %2, %5 + %7 = fadd fp128 %1, %4 + %8 = fsub fp128 0xL00000000000000000000000000000000, %7 + store fp128 %8, fp128* @X, align 16 + %9 = fmul fp128 %3, %5 + %10 = fmul fp128 0xL00000000000000000000000000000000, %4 + store fp128 %10, fp128* @S, align 16 + %11 = fsub fp128 %6, %3 + store fp128 %11, fp128* @T, align 16 + %12 = fadd fp128 0xL00000000000000000000000000000000, %9 + store fp128 %12, fp128* @Y, align 16 + %13 = fmul fp128 0xL00000000000000000000000000000000, %5 + %14 = fadd fp128 %13, 0xL0000000000000000BFFE000000000000 + store fp128 %14, fp128* @Y1, align 16 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/tls-models.ll b/llvm/test/CodeGen/RISCV/tls-models.ll index 27f63ff336740..25a2f71beb317 100644 --- a/llvm/test/CodeGen/RISCV/tls-models.ll +++ b/llvm/test/CodeGen/RISCV/tls-models.ll @@ -23,9 +23,10 @@ define i32* @f1() nounwind { ; RV32-PIC: # %bb.0: # %entry ; RV32-PIC-NEXT: addi sp, sp, -16 ; RV32-PIC-NEXT: sw ra, 12(sp) -; RV32-PIC-NEXT: .Ltmp0: +; RV32-PIC-NEXT: .LBB0_1: # %entry +; RV32-PIC-NEXT: # Label of block must be emitted ; RV32-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(unspecified) -; RV32-PIC-NEXT: addi a0, a0, %pcrel_lo(.Ltmp0) +; RV32-PIC-NEXT: addi a0, a0, %pcrel_lo(.LBB0_1) ; RV32-PIC-NEXT: call __tls_get_addr@plt ; RV32-PIC-NEXT: lw ra, 12(sp) ; RV32-PIC-NEXT: addi sp, sp, 16 @@ -35,9 +36,10 @@ define i32* @f1() nounwind { ; RV64-PIC: # %bb.0: # %entry ; RV64-PIC-NEXT: addi sp, sp, -16 ; RV64-PIC-NEXT: sd ra, 8(sp) -; RV64-PIC-NEXT: .Ltmp0: +; RV64-PIC-NEXT: .LBB0_1: # %entry +; RV64-PIC-NEXT: # Label of block must be emitted ; RV64-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(unspecified) -; RV64-PIC-NEXT: addi a0, a0, %pcrel_lo(.Ltmp0) +; RV64-PIC-NEXT: addi a0, a0, %pcrel_lo(.LBB0_1) ; RV64-PIC-NEXT: call __tls_get_addr@plt ; RV64-PIC-NEXT: ld ra, 8(sp) ; RV64-PIC-NEXT: addi sp, sp, 16 @@ -45,17 +47,19 @@ define i32* @f1() nounwind { ; ; RV32-NOPIC-LABEL: f1: ; RV32-NOPIC: # %bb.0: # %entry -; RV32-NOPIC-NEXT: .Ltmp0: +; RV32-NOPIC-NEXT: .LBB0_1: # %entry +; RV32-NOPIC-NEXT: # Label of block must be emitted ; RV32-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(unspecified) -; RV32-NOPIC-NEXT: lw a0, %pcrel_lo(.Ltmp0)(a0) +; RV32-NOPIC-NEXT: lw a0, %pcrel_lo(.LBB0_1)(a0) ; RV32-NOPIC-NEXT: add a0, a0, tp ; RV32-NOPIC-NEXT: ret ; ; RV64-NOPIC-LABEL: f1: ; RV64-NOPIC: # %bb.0: # %entry -; RV64-NOPIC-NEXT: .Ltmp0: +; RV64-NOPIC-NEXT: .LBB0_1: # %entry +; RV64-NOPIC-NEXT: # Label of block must be emitted ; RV64-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(unspecified) -; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.Ltmp0)(a0) +; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.LBB0_1)(a0) ; RV64-NOPIC-NEXT: add a0, a0, tp ; RV64-NOPIC-NEXT: ret entry: @@ -70,9 +74,10 @@ define i32* @f2() nounwind { ; RV32-PIC: # %bb.0: # %entry ; RV32-PIC-NEXT: addi sp, sp, -16 ; RV32-PIC-NEXT: sw ra, 12(sp) -; RV32-PIC-NEXT: .Ltmp1: +; RV32-PIC-NEXT: .LBB1_1: # %entry +; RV32-PIC-NEXT: # Label of block must be emitted ; RV32-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(ld) -; RV32-PIC-NEXT: addi a0, a0, %pcrel_lo(.Ltmp1) +; RV32-PIC-NEXT: addi a0, a0, %pcrel_lo(.LBB1_1) ; RV32-PIC-NEXT: call __tls_get_addr@plt ; RV32-PIC-NEXT: lw ra, 12(sp) ; RV32-PIC-NEXT: addi sp, sp, 16 @@ -82,9 +87,10 @@ define i32* @f2() nounwind { ; RV64-PIC: # %bb.0: # %entry ; RV64-PIC-NEXT: addi sp, sp, -16 ; RV64-PIC-NEXT: sd ra, 8(sp) -; RV64-PIC-NEXT: .Ltmp1: +; RV64-PIC-NEXT: .LBB1_1: # %entry +; RV64-PIC-NEXT: # Label of block must be emitted ; RV64-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(ld) -; RV64-PIC-NEXT: addi a0, a0, %pcrel_lo(.Ltmp1) +; RV64-PIC-NEXT: addi a0, a0, %pcrel_lo(.LBB1_1) ; RV64-PIC-NEXT: call __tls_get_addr@plt ; RV64-PIC-NEXT: ld ra, 8(sp) ; RV64-PIC-NEXT: addi sp, sp, 16 @@ -92,17 +98,19 @@ define i32* @f2() nounwind { ; ; RV32-NOPIC-LABEL: f2: ; RV32-NOPIC: # %bb.0: # %entry -; RV32-NOPIC-NEXT: .Ltmp1: +; RV32-NOPIC-NEXT: .LBB1_1: # %entry +; RV32-NOPIC-NEXT: # Label of block must be emitted ; RV32-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld) -; RV32-NOPIC-NEXT: lw a0, %pcrel_lo(.Ltmp1)(a0) +; RV32-NOPIC-NEXT: lw a0, %pcrel_lo(.LBB1_1)(a0) ; RV32-NOPIC-NEXT: add a0, a0, tp ; RV32-NOPIC-NEXT: ret ; ; RV64-NOPIC-LABEL: f2: ; RV64-NOPIC: # %bb.0: # %entry -; RV64-NOPIC-NEXT: .Ltmp1: +; RV64-NOPIC-NEXT: .LBB1_1: # %entry +; RV64-NOPIC-NEXT: # Label of block must be emitted ; RV64-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld) -; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.Ltmp1)(a0) +; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.LBB1_1)(a0) ; RV64-NOPIC-NEXT: add a0, a0, tp ; RV64-NOPIC-NEXT: ret entry: @@ -115,33 +123,37 @@ entry: define i32* @f3() nounwind { ; RV32-PIC-LABEL: f3: ; RV32-PIC: # %bb.0: # %entry -; RV32-PIC-NEXT: .Ltmp2: +; RV32-PIC-NEXT: .LBB2_1: # %entry +; RV32-PIC-NEXT: # Label of block must be emitted ; RV32-PIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ie) -; RV32-PIC-NEXT: lw a0, %pcrel_lo(.Ltmp2)(a0) +; RV32-PIC-NEXT: lw a0, %pcrel_lo(.LBB2_1)(a0) ; RV32-PIC-NEXT: add a0, a0, tp ; RV32-PIC-NEXT: ret ; ; RV64-PIC-LABEL: f3: ; RV64-PIC: # %bb.0: # %entry -; RV64-PIC-NEXT: .Ltmp2: +; RV64-PIC-NEXT: .LBB2_1: # %entry +; RV64-PIC-NEXT: # Label of block must be emitted ; RV64-PIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ie) -; RV64-PIC-NEXT: ld a0, %pcrel_lo(.Ltmp2)(a0) +; RV64-PIC-NEXT: ld a0, %pcrel_lo(.LBB2_1)(a0) ; RV64-PIC-NEXT: add a0, a0, tp ; RV64-PIC-NEXT: ret ; ; RV32-NOPIC-LABEL: f3: ; RV32-NOPIC: # %bb.0: # %entry -; RV32-NOPIC-NEXT: .Ltmp2: +; RV32-NOPIC-NEXT: .LBB2_1: # %entry +; RV32-NOPIC-NEXT: # Label of block must be emitted ; RV32-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ie) -; RV32-NOPIC-NEXT: lw a0, %pcrel_lo(.Ltmp2)(a0) +; RV32-NOPIC-NEXT: lw a0, %pcrel_lo(.LBB2_1)(a0) ; RV32-NOPIC-NEXT: add a0, a0, tp ; RV32-NOPIC-NEXT: ret ; ; RV64-NOPIC-LABEL: f3: ; RV64-NOPIC: # %bb.0: # %entry -; RV64-NOPIC-NEXT: .Ltmp2: +; RV64-NOPIC-NEXT: .LBB2_1: # %entry +; RV64-NOPIC-NEXT: # Label of block must be emitted ; RV64-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ie) -; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.Ltmp2)(a0) +; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.LBB2_1)(a0) ; RV64-NOPIC-NEXT: add a0, a0, tp ; RV64-NOPIC-NEXT: ret entry: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll index a00af0d6a9ec4..5fced6ad29e2a 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; CHECK-LABEL: mul_v16i8 ; CHECK-NOT: %num.elements = add i32 %trip.count.minus.1, 1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll index dab642b94be05..56343a6d65cb5 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=thumbv8.1m.main -mattr=+mve.fp -mve-tail-predication -disable-mve-tail-predication=false %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mattr=+mve.fp -mve-tail-predication -tail-predication=enabled %s -S -o - | FileCheck %s define hidden i32 @_Z4loopPiPjiS0_i(i32* noalias nocapture readonly %s1, i32* noalias nocapture readonly %s2, i32 %x, i32* noalias nocapture %d, i32 %n) { ; CHECK-LABEL: @_Z4loopPiPjiS0_i( diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll index bf6e92a1c8838..e98276e258abd 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled --verify-machineinstrs %s -o - | FileCheck %s define dso_local i32 @vpsel_mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c, i32 %N) { ; CHECK-LABEL: vpsel_mul_reduce_add: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll new file mode 100644 index 0000000000000..162ccf55d068c --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s +%struct.SpeexPreprocessState_ = type { i32, i32, half*, half* } + +define void @foo(%struct.SpeexPreprocessState_* nocapture readonly %st, i16* %x) { +; CHECK-LABEL: foo: +; CHECK: @ %bb.0: @ %entry +; CHECK: dlstp.16 lr, r4 +; CHECK-NEXT: .LBB0_1: @ %do.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrh.u16 q0, [r2], #16 +; CHECK-NEXT: vstrh.16 q0, [r3], #16 +; CHECK-NEXT: letp lr, .LBB0_1 +; CHECK: dlstp.16 lr, r3 +; CHECK-NEXT: .LBB0_3: @ %do.body6 +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrh.u16 q1, [r1], #16 +; CHECK-NEXT: vcvt.f16.s16 q1, q1 +; CHECK-NEXT: vmul.f16 q1, q1, q0 +; CHECK-NEXT: vstrh.16 q1, [r0], #16 +; CHECK-NEXT: letp lr, .LBB0_3 +; CHECK-NEXT: @ %bb.4: @ %do.end13 +; CHECK-NEXT: pop {r4, pc} +entry: + %ps_size = getelementptr inbounds %struct.SpeexPreprocessState_, %struct.SpeexPreprocessState_* %st, i32 0, i32 1 + %0 = load i32, i32* %ps_size, align 4 + %mul = shl nsw i32 %0, 1 + %frame_size = getelementptr inbounds %struct.SpeexPreprocessState_, %struct.SpeexPreprocessState_* %st, i32 0, i32 0 + %1 = load i32, i32* %frame_size, align 4 + %sub = sub nsw i32 %mul, %1 + %inbuf = getelementptr inbounds %struct.SpeexPreprocessState_, %struct.SpeexPreprocessState_* %st, i32 0, i32 3 + %2 = load half*, half** %inbuf, align 4 + %frame = getelementptr inbounds %struct.SpeexPreprocessState_, %struct.SpeexPreprocessState_* %st, i32 0, i32 2 + %3 = load half*, half** %frame, align 4 + br label %do.body + +do.body: ; preds = %do.body, %entry + %pinbuff16.0 = phi half* [ %2, %entry ], [ %add.ptr, %do.body ] + %blkCnt.0 = phi i32 [ %sub, %entry ], [ %sub2, %do.body ] + %pframef16.0 = phi half* [ %3, %entry ], [ %add.ptr1, %do.body ] + %4 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %blkCnt.0) + %5 = bitcast half* %pinbuff16.0 to <8 x half>* + %6 = tail call fast <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %5, i32 2, <8 x i1> %4, <8 x half> zeroinitializer) + %7 = bitcast half* %pframef16.0 to <8 x half>* + tail call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %6, <8 x half>* %7, i32 2, <8 x i1> %4) + %add.ptr = getelementptr inbounds half, half* %pinbuff16.0, i32 8 + %add.ptr1 = getelementptr inbounds half, half* %pframef16.0, i32 8 + %sub2 = add nsw i32 %blkCnt.0, -8 + %cmp = icmp sgt i32 %blkCnt.0, 8 + br i1 %cmp, label %do.body, label %do.end + +do.end: ; preds = %do.body + %8 = load half*, half** %frame, align 4 + %add.ptr4 = getelementptr inbounds half, half* %8, i32 %sub + %9 = load i32, i32* %frame_size, align 4 + br label %do.body6 + +do.body6: ; preds = %do.body6, %do.end + %px.0 = phi i16* [ %x, %do.end ], [ %add.ptr8, %do.body6 ] + %blkCnt.1 = phi i32 [ %9, %do.end ], [ %sub10, %do.body6 ] + %pframef16.1 = phi half* [ %add.ptr4, %do.end ], [ %add.ptr9, %do.body6 ] + %10 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %blkCnt.1) + %11 = bitcast i16* %px.0 to <8 x i16>* + %12 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %11, i32 2, <8 x i1> %10, <8 x i16> zeroinitializer) + %13 = tail call fast <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> %12, i32 0, <8 x i1> %10, <8 x half> undef) + %14 = tail call fast <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> %13, <8 x half> , <8 x i1> %10, <8 x half> undef) + %15 = bitcast half* %pframef16.1 to <8 x half>* + tail call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %14, <8 x half>* %15, i32 2, <8 x i1> %10) + %add.ptr8 = getelementptr inbounds i16, i16* %px.0, i32 8 + %add.ptr9 = getelementptr inbounds half, half* %pframef16.1, i32 8 + %sub10 = add nsw i32 %blkCnt.1, -8 + %cmp12 = icmp sgt i32 %blkCnt.1, 8 + br i1 %cmp12, label %do.body6, label %do.end13 + +do.end13: ; preds = %do.body6 + ret void +} + +declare <8 x i1> @llvm.arm.mve.vctp16(i32) + +declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32 immarg, <8 x i1>, <8 x half>) + +declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32 immarg, <8 x i1>) + +declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) + +declare <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x half>) + +declare <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll index 8d201a23a6898..1fda5c08a0375 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s define dso_local arm_aapcs_vfpcc void @sext_i8(i16* noalias nocapture %a, i8* nocapture readonly %b, i32 %N) { ; CHECK-LABEL: sext_i8: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll index 8c18159c24c56..d8d6af3b9a8dc 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fullfp16 -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fullfp16 -tail-predication=enabled %s -o - | FileCheck %s define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocapture readonly %b, float* nocapture readonly %c, i32 %N) { ; CHECK-LABEL: fast_float_mul: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll index bc2c7e084ea7c..fddbfa8b66207 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O3 -disable-mve-tail-predication=false -mtriple=thumbv8.1m.main -mattr=+mve,+mve.fp %s -o - | FileCheck %s +; RUN: llc -O3 -tail-predication=enabled -mtriple=thumbv8.1m.main -mattr=+mve,+mve.fp %s -o - | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m-arm-none-eabi" diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll index 6c1273db3f80f..428c703dd341e 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture readonly %b, i32 %N) { ; CHECK-LABEL: test_acc_scalar_char: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll index 64702cc3c3155..548ba396bed42 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=armv8.1m.main -mattr=+mve -S -mve-tail-predication -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: opt -mtriple=armv8.1m.main -mattr=+mve -S -mve-tail-predication -tail-predication=enabled %s -o - | FileCheck %s define void @mat_vec_sext_i16(i16** nocapture readonly %A, i16* nocapture readonly %B, i32* noalias nocapture %C, i32 %N) { ; CHECK-LABEL: @mat_vec_sext_i16( diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll index 12c3ca0525f21..66601dd66cb29 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_add_add_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr { ; CHECK-LABEL: one_loop_add_add_v16i8: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll index 13d750310a56c..065e534dd55bd 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s define dso_local void @foo(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: @foo( diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll index d405657f4d17e..e9facfda61335 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s + define arm_aapcs_vfpcc void @uadd_sat(i16* noalias nocapture readonly %pSrcA, i16* noalias nocapture readonly %pSrcB, i16* noalias nocapture %pDst, i32 %blockSize) { ; CHECK-LABEL: uadd_sat: ; CHECK: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll index d3247a3fd28e7..87f23adf7ffa5 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s + define arm_aapcs_vfpcc void @fabs(float* noalias nocapture readonly %pSrcA, float* noalias nocapture %pDst, i32 %blockSize) { ; CHECK-LABEL: fabs: ; CHECK: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll index 962e9df3dc1e9..e72e81da7e7c1 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s + define arm_aapcs_vfpcc void @round(float* noalias nocapture readonly %pSrcA, float* noalias nocapture %pDst, i32 %n) #0 { ; CHECK-LABEL: round: ; CHECK: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll index 790311a54aa1d..3c7ae4dc734ad 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s + define arm_aapcs_vfpcc void @usub_sat(i16* noalias nocapture readonly %pSrcA, i16* noalias nocapture readonly %pSrcB, i16* noalias nocapture %pDst, i32 %blockSize) { ; CHECK-LABEL: usub_sat: ; CHECK: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll index 8c1534be77db0..52cd8fdc6d798 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; TODO: We should be able to generate a vctp for the loads. ; CHECK-LABEL: trunc_v4i32_v4i16 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll index 1926bbeeaa70f..8e46e3385385e 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; The following functions should all fail to become tail-predicated. ; CHECK-NOT: call i32 @llvm.arm.vctp diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll index 3a9d3d1171266..b40b36ced4af2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; CHECK-LABEL: expand_v8i16_v8i32 ; CHECK-NOT: call i32 @llvm.arm.mve.vctp diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll index 5c753134744d6..f3055bc8a575f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll @@ -1,6 +1,6 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve %s -S -o - | FileCheck %s -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false \ -; RUN: -force-mve-tail-predication -mattr=+mve %s -S -o - | FileCheck %s --check-prefix=FORCE +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=force-enabled \ +; RUN: -mattr=+mve %s -S -o - | FileCheck %s --check-prefix=FORCE ; CHECK-LABEL: reduction_i32 ; CHECK: phi i32 [ 0, %vector.ph ] diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll index f1242db364851..4db17c074643f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input, i16* nocapture %Output, i16 signext %Size, i16 signext %N, i16 signext %Scale) local_unnamed_addr { ; CHECK-LABEL: varying_outer_2d_reduction: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll index 26a570ac4c29b..615334300c283 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -tail-predication=enabled --verify-machineinstrs %s -o - | FileCheck %s define dso_local i32 @mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) { ; CHECK-LABEL: mul_reduce_add: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll index aaeae75e072f7..e10cc3153b9c9 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s ; CHECK-LABEL: vec_mul_reduce_add diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll index 19d9c89dabca8..f1a35af8b57ed 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; TODO: The unrolled pattern is preventing the transform ; CHECK-LABEL: mul_v16i8_unroll diff --git a/llvm/test/CodeGen/Thumb2/csel.ll b/llvm/test/CodeGen/Thumb2/csel.ll index f2cf3e839a805..5a56fb6f692da 100644 --- a/llvm/test/CodeGen/Thumb2/csel.ll +++ b/llvm/test/CodeGen/Thumb2/csel.ll @@ -107,9 +107,7 @@ define i32 @csel_var(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: csel_var: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r0, #45 -; CHECK-NEXT: it le -; CHECK-NEXT: movle r1, r2 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: csel r0, r1, r2, gt ; CHECK-NEXT: bx lr entry: %cmp = icmp sgt i32 %a, 45 diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll index acafde53ac830..a691fd553665c 100644 --- a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll +++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll @@ -127,9 +127,10 @@ define double @copysign_d(double %a, double %b) { ; SOFT: bfi r1, [[REG]], #31, #1 ; VFP: lsrs [[REG:r[0-9]+]], r3, #31 ; VFP: bfi r1, [[REG]], #31, #1 -; NEON: vmov.i32 [[REG:d[0-9]+]], #0x80000000 -; NEON: vshl.i64 [[REG]], [[REG]], #32 -; NEON: vbsl [[REG]], d +; NEON: vmov.i32 d16, #0x80000000 +; NEON-NEXT: vshl.i64 d16, d16, #32 +; NEON-NEXT: vbit d0, d1, d16 +; NEON-NEXT: bx lr %1 = call double @llvm.copysign.f64(double %a, double %b) ret double %1 } diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll index 1263ae15b4664..ac55b00ddfb1d 100644 --- a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll +++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll @@ -3,8 +3,8 @@ ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m33 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=NO-VMLA ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP -check-prefix=FP-ARMv8 -check-prefix=VMLA ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 -mattr=-fp64 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=FP-ARMv8 -check-prefix=VMLA -; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=VFP4 -check-prefix=NO-VMLA -; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a57 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=FP-ARMv8 -check-prefix=VMLA +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON-A7 -check-prefix=VFP4 -check-prefix=NO-VMLA +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a57 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON-A57 -check-prefix=FP-ARMv8 -check-prefix=VMLA declare float @llvm.sqrt.f32(float %Val) define float @sqrt_f(float %a) { @@ -123,8 +123,20 @@ define float @copysign_f(float %a, float %b) { ; SP: bfi r{{[0-9]+}}, [[REG]], #31, #1 ; VFP: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31 ; VFP: bfi r{{[0-9]+}}, [[REG]], #31, #1 -; NEON: vmov.i32 [[REG:d[0-9]+]], #0x80000000 -; NEON: vbsl [[REG]], d +; NEON-A7: @ %bb.0: +; NEON-A7-NEXT: vmov.f32 s2, s1 +; NEON-A7-NEXT: @ kill: def $s0 killed $s0 def $d0 +; NEON-A7-NEXT: vmov.i32 d16, #0x80000000 +; NEON-A7-NEXT: vbit d0, d1, d16 +; NEON-A7-NEXT: @ kill: def $s0 killed $s0 killed $d0 +; NEON-A7-NEXT: bx lr +; NEON-A57: @ %bb.0: +; NEON-A57-NEXT: vmov.f32 s2, s1 +; NEON-A57-NEXT: vmov.i32 d16, #0x80000000 +; NEON-A57-NEXT: @ kill: def $s0 killed $s0 def $d0 +; NEON-A57-NEXT: vbit d0, d1, d16 +; NEON-A57-NEXT: @ kill: def $s0 killed $s0 killed $d0 +; NEON-A57-NEXT: bx lr %1 = call float @llvm.copysign.f32(float %a, float %b) ret float %1 } diff --git a/llvm/test/CodeGen/Thumb2/float-ops.ll b/llvm/test/CodeGen/Thumb2/float-ops.ll index fdd1b659d0075..709bd49f22860 100644 --- a/llvm/test/CodeGen/Thumb2/float-ops.ll +++ b/llvm/test/CodeGen/Thumb2/float-ops.ll @@ -278,8 +278,10 @@ define double @select_d(double %a, double %b, i1 %c) { ; CHECK-LABEL: select_d: ; NONE: ldr{{(.w)?}} [[REG:r[0-9]+]], [sp] ; NONE: ands [[REG]], [[REG]], #1 -; NONE-DAG: moveq r0, r2 -; NONE-DAG: moveq r1, r3 +; NOREGS-DAG: moveq r0, r2 +; NOREGS-DAG: moveq r1, r3 +; ONLYREGS-DAG: csel r0, r0, r2 +; ONLYREGS-DAG: csel r1, r1, r3 ; SP: ands r0, r0, #1 ; SP-DAG: vmov [[ALO:r[0-9]+]], [[AHI:r[0-9]+]], d0 ; SP-DAG: vmov [[BLO:r[0-9]+]], [[BHI:r[0-9]+]], d1 diff --git a/llvm/test/CodeGen/Thumb2/mve-abs.ll b/llvm/test/CodeGen/Thumb2/mve-abs.ll index 29878063a8ca6..0b5dcbced1a56 100644 --- a/llvm/test/CodeGen/Thumb2/mve-abs.ll +++ b/llvm/test/CodeGen/Thumb2/mve-abs.ll @@ -42,33 +42,30 @@ define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vmov q1, q0 +; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: vmov lr, s4 -; CHECK-NEXT: vmov r0, s5 -; CHECK-NEXT: rsbs.w r3, lr, #0 +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: rsbs.w lr, r1, #0 ; CHECK-NEXT: sbc.w r2, r12, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: cset r1, mi -; CHECK-NEXT: ands r1, r1, #1 -; CHECK-NEXT: itt eq -; CHECK-NEXT: moveq r2, r0 -; CHECK-NEXT: moveq r3, lr -; CHECK-NEXT: vmov lr, s6 -; CHECK-NEXT: vmov.32 q0[0], r3 -; CHECK-NEXT: vmov r0, s7 -; CHECK-NEXT: vmov.32 q0[1], r2 -; CHECK-NEXT: rsbs.w r2, lr, #0 -; CHECK-NEXT: sbc.w r3, r12, r0 +; CHECK-NEXT: cset r3, mi +; CHECK-NEXT: ands r3, r3, #1 +; CHECK-NEXT: csel r1, lr, r1, ne +; CHECK-NEXT: csel r0, r2, r0, ne +; CHECK-NEXT: vmov.32 q1[0], r1 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: vmov.32 q1[1], r0 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: rsbs r2, r1, #0 +; CHECK-NEXT: sbc.w r12, r12, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: cset r1, mi -; CHECK-NEXT: ands r1, r1, #1 -; CHECK-NEXT: it eq -; CHECK-NEXT: moveq r2, lr -; CHECK-NEXT: vmov.32 q0[2], r2 -; CHECK-NEXT: it eq -; CHECK-NEXT: moveq r3, r0 -; CHECK-NEXT: vmov.32 q0[3], r3 +; CHECK-NEXT: cset r3, mi +; CHECK-NEXT: ands r3, r3, #1 +; CHECK-NEXT: csel r1, r2, r1, ne +; CHECK-NEXT: csel r0, r12, r0, ne +; CHECK-NEXT: vmov.32 q1[2], r1 +; CHECK-NEXT: vmov.32 q1[3], r0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: pop {r7, pc} entry: %0 = icmp slt <2 x i64> %s1, zeroinitializer diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll index 42e8cc91ede8e..e8ab7792b6dfc 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -796,23 +796,23 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl ; CHECK-NEXT: .pad #16 ; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: ldrh r5, [r0] -; CHECK-NEXT: ldr.w r12, [r0, #4] +; CHECK-NEXT: ldr.w r9, [r0, #4] ; CHECK-NEXT: subs r6, r5, #1 ; CHECK-NEXT: cmp r6, #3 ; CHECK-NEXT: bhi .LBB15_6 ; CHECK-NEXT: @ %bb.1: @ %if.then ; CHECK-NEXT: ldr r7, [r0, #8] -; CHECK-NEXT: add.w r4, r12, r6, lsl #1 +; CHECK-NEXT: add.w r4, r9, r6, lsl #1 ; CHECK-NEXT: lsr.w lr, r3, #2 ; CHECK-NEXT: ldrh.w r8, [r7, #6] -; CHECK-NEXT: ldrh.w r9, [r7, #4] +; CHECK-NEXT: ldrh.w r12, [r7, #4] ; CHECK-NEXT: ldrh r6, [r7, #2] ; CHECK-NEXT: ldrh r7, [r7] ; CHECK-NEXT: wls lr, lr, .LBB15_5 ; CHECK-NEXT: @ %bb.2: @ %while.body.lr.ph ; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: bic r5, r3, #3 -; CHECK-NEXT: add.w r10, r12, #2 +; CHECK-NEXT: add.w r10, r9, #2 ; CHECK-NEXT: str r5, [sp] @ 4-byte Spill ; CHECK-NEXT: add.w r5, r2, r5, lsl #1 ; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill @@ -828,7 +828,7 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl ; CHECK-NEXT: vmul.f16 q0, q0, r7 ; CHECK-NEXT: vfma.f16 q0, q1, r6 ; CHECK-NEXT: vldrw.u32 q1, [r5] -; CHECK-NEXT: vfma.f16 q0, q1, r9 +; CHECK-NEXT: vfma.f16 q0, q1, r12 ; CHECK-NEXT: vldrw.u32 q1, [r10, #4] ; CHECK-NEXT: add.w r10, r10, #8 ; CHECK-NEXT: vfma.f16 q0, q1, r8 @@ -838,7 +838,7 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl ; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload ; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: add.w r12, r12, r2, lsl #1 +; CHECK-NEXT: add.w r9, r9, r2, lsl #1 ; CHECK-NEXT: add.w r1, r1, r2, lsl #1 ; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: .LBB15_5: @ %while.end @@ -847,35 +847,35 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl ; CHECK-NEXT: vctp.16 lr ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.16 q0, [r4] -; CHECK-NEXT: vldrw.u32 q0, [r12] -; CHECK-NEXT: add.w r1, r12, #2 +; CHECK-NEXT: vldrw.u32 q0, [r9] +; CHECK-NEXT: add.w r1, r9, #2 ; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: add.w r1, r12, #6 +; CHECK-NEXT: add.w r1, r9, #6 ; CHECK-NEXT: vmul.f16 q0, q0, r7 ; CHECK-NEXT: vfma.f16 q0, q1, r6 -; CHECK-NEXT: vldrw.u32 q1, [r12, #4] -; CHECK-NEXT: vfma.f16 q0, q1, r9 +; CHECK-NEXT: vldrw.u32 q1, [r9, #4] +; CHECK-NEXT: vfma.f16 q0, q1, r12 ; CHECK-NEXT: vldrw.u32 q1, [r1] ; CHECK-NEXT: vfma.f16 q0, q1, r8 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.16 q0, [r2] -; CHECK-NEXT: ldr.w r12, [r0, #4] +; CHECK-NEXT: ldr.w r9, [r0, #4] ; CHECK-NEXT: .LBB15_6: @ %if.end -; CHECK-NEXT: add.w r0, r12, r3, lsl #1 +; CHECK-NEXT: add.w r0, r9, r3, lsl #1 ; CHECK-NEXT: lsr.w lr, r5, #2 ; CHECK-NEXT: wls lr, lr, .LBB15_10 ; CHECK-NEXT: @ %bb.7: @ %while.body51.preheader ; CHECK-NEXT: bic r2, r5, #3 ; CHECK-NEXT: adds r1, r2, r3 -; CHECK-NEXT: mov r3, r12 -; CHECK-NEXT: add.w r1, r12, r1, lsl #1 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: add.w r1, r9, r1, lsl #1 ; CHECK-NEXT: .LBB15_8: @ %while.body51 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #8 ; CHECK-NEXT: vstrb.8 q0, [r3], #8 ; CHECK-NEXT: le lr, .LBB15_8 ; CHECK-NEXT: @ %bb.9: @ %while.end55.loopexit -; CHECK-NEXT: add.w r12, r12, r2, lsl #1 +; CHECK-NEXT: add.w r9, r9, r2, lsl #1 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: .LBB15_10: @ %while.end55 ; CHECK-NEXT: ands r1, r5, #3 @@ -884,7 +884,7 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: vctp.16 r1 ; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r12] +; CHECK-NEXT: vstrht.16 q0, [r9] ; CHECK-NEXT: .LBB15_12: @ %if.end61 ; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll index 0ba224415b67e..306f31be27f96 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled %s -o - | FileCheck %s define arm_aapcs_vfpcc void @fmas1(float* nocapture readonly %x, float* nocapture readonly %y, float* noalias nocapture %z, float %a, i32 %n) { ; CHECK-LABEL: fmas1: diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll index 291c13543d14d..9897b607d6b3a 100644 --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -125,14 +125,12 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r5, #1 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: itt eq -; CHECK-NEXT: moveq r3, r0 -; CHECK-NEXT: moveq r4, r1 +; CHECK-NEXT: csel r4, r4, r1, ne +; CHECK-NEXT: csel r3, r3, r0, ne ; CHECK-NEXT: subs r5, r4, r2 ; CHECK-NEXT: sbcs r3, r3, #0 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r4, r2 -; CHECK-NEXT: str r4, [r11], #4 +; CHECK-NEXT: csel r3, r4, r2, lt +; CHECK-NEXT: str r3, [r11], #4 ; CHECK-NEXT: le lr, .LBB0_7 ; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #8 @@ -406,22 +404,20 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r2, [r12], #4 ; CHECK-NEXT: ldr r4, [r10], #4 -; CHECK-NEXT: smull r4, r5, r4, r2 -; CHECK-NEXT: asrl r4, r5, #31 -; CHECK-NEXT: subs r2, r1, r4 -; CHECK-NEXT: sbcs.w r2, r0, r5 -; CHECK-NEXT: mov.w r2, #0 +; CHECK-NEXT: smull r2, r5, r4, r2 +; CHECK-NEXT: asrl r2, r5, #31 +; CHECK-NEXT: subs r4, r1, r2 +; CHECK-NEXT: sbcs.w r4, r0, r5 +; CHECK-NEXT: mov.w r4, #0 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: itt eq -; CHECK-NEXT: moveq r5, r0 -; CHECK-NEXT: moveq r4, r1 -; CHECK-NEXT: subs r2, r4, r3 -; CHECK-NEXT: sbcs r2, r5, #0 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r4, r3 -; CHECK-NEXT: str r4, [r11], #4 +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r2, r2, r1, ne +; CHECK-NEXT: csel r4, r5, r0, ne +; CHECK-NEXT: subs r5, r2, r3 +; CHECK-NEXT: sbcs r4, r4, #0 +; CHECK-NEXT: csel r2, r2, r3, lt +; CHECK-NEXT: str r2, [r11], #4 ; CHECK-NEXT: le lr, .LBB1_7 ; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #8 @@ -1158,9 +1154,8 @@ define arm_aapcs_vfpcc void @ssatmul_4_q15(i16* nocapture readonly %pSrcA, i16* ; CHECK-NEXT: it lt ; CHECK-NEXT: asrlt r3, r2, #15 ; CHECK-NEXT: cmp r3, r1 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r3, r1 -; CHECK-NEXT: strh r3, [r4], #2 +; CHECK-NEXT: csel r2, r3, r1, lt +; CHECK-NEXT: strh r2, [r4], #2 ; CHECK-NEXT: le lr, .LBB5_7 ; CHECK-NEXT: .LBB5_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc} @@ -1300,9 +1295,8 @@ define arm_aapcs_vfpcc void @ssatmul_8_q15(i16* nocapture readonly %pSrcA, i16* ; CHECK-NEXT: it lt ; CHECK-NEXT: asrlt r3, r2, #15 ; CHECK-NEXT: cmp r3, r1 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r3, r1 -; CHECK-NEXT: strh r3, [r4], #2 +; CHECK-NEXT: csel r2, r3, r1, lt +; CHECK-NEXT: strh r2, [r4], #2 ; CHECK-NEXT: le lr, .LBB6_7 ; CHECK-NEXT: .LBB6_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc} @@ -1439,9 +1433,8 @@ define arm_aapcs_vfpcc void @ssatmul_8i_q15(i16* nocapture readonly %pSrcA, i16* ; CHECK-NEXT: it lt ; CHECK-NEXT: asrlt r3, r2, #15 ; CHECK-NEXT: cmp r3, r1 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r3, r1 -; CHECK-NEXT: strh r3, [r4], #2 +; CHECK-NEXT: csel r2, r3, r1, lt +; CHECK-NEXT: strh r2, [r4], #2 ; CHECK-NEXT: le lr, .LBB7_7 ; CHECK-NEXT: .LBB7_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll index 8457a3ab7a169..7313cb66c9c9b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll +++ b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll @@ -34,78 +34,67 @@ entry: define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: sadd_int64_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov r5, s0 -; CHECK-NEXT: vmov r8, s5 -; CHECK-NEXT: vmov r4, s1 -; CHECK-NEXT: vmov r7, s2 -; CHECK-NEXT: vmov r3, s7 -; CHECK-NEXT: vmov r6, s3 -; CHECK-NEXT: adds.w r12, r5, r0 -; CHECK-NEXT: adc.w r0, r4, r8 -; CHECK-NEXT: asrs r2, r0, #31 -; CHECK-NEXT: vmov.32 q2[0], r2 -; CHECK-NEXT: vmov.32 q2[1], r2 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: adds.w lr, r7, r2 -; CHECK-NEXT: adc.w r2, r6, r3 -; CHECK-NEXT: subs.w r5, r12, r5 -; CHECK-NEXT: sbcs.w r4, r0, r4 -; CHECK-NEXT: asr.w r1, r2, #31 -; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #1 -; CHECK-NEXT: vmov.32 q2[3], r1 -; CHECK-NEXT: adr r1, .LCPI3_0 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: adr r1, .LCPI3_1 -; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: vbic q0, q0, q2 -; CHECK-NEXT: csetm r4, ne -; CHECK-NEXT: vand q1, q1, q2 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vorr q0, q1, q0 -; CHECK-NEXT: vmov.32 q1[0], r4 -; CHECK-NEXT: vmov.32 q1[1], r4 -; CHECK-NEXT: subs.w r4, lr, r7 -; CHECK-NEXT: sbcs.w r4, r2, r6 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: vmov.32 q1[2], r1 -; CHECK-NEXT: vmov.32 q1[3], r1 -; CHECK-NEXT: asr.w r1, r8, #31 +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vmov r0, s5 +; CHECK-NEXT: vmov r2, s1 +; CHECK-NEXT: vmov lr, s4 +; CHECK-NEXT: vmov r4, s2 +; CHECK-NEXT: cmp.w r0, #-1 +; CHECK-NEXT: cset r1, gt +; CHECK-NEXT: cmp.w r2, #-1 +; CHECK-NEXT: cset r3, gt +; CHECK-NEXT: cmp r3, r1 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: cset r12, eq +; CHECK-NEXT: adds.w r1, r1, lr +; CHECK-NEXT: adcs r2, r0 +; CHECK-NEXT: cmp.w r2, #-1 +; CHECK-NEXT: cset r0, gt +; CHECK-NEXT: cmp r3, r0 +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: and.w r0, r0, r12 +; CHECK-NEXT: mvn r12, #-2147483648 +; CHECK-NEXT: and r3, r0, #1 +; CHECK-NEXT: cset r0, mi +; CHECK-NEXT: tst.w r0, #1 +; CHECK-NEXT: cinv r0, r12, eq +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: asrne r1, r2, #31 +; CHECK-NEXT: csel r0, r0, r2, ne ; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: asrs r1, r3, #31 -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: vmov.32 q2[3], r1 -; CHECK-NEXT: veor q1, q2, q1 -; CHECK-NEXT: vmov.32 q2[0], r12 +; CHECK-NEXT: vmov r2, s3 ; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vand q0, q0, q1 -; CHECK-NEXT: vmov.32 q2[2], lr -; CHECK-NEXT: vmov.32 q2[3], r2 -; CHECK-NEXT: vbic q1, q2, q1 -; CHECK-NEXT: vorr q0, q0, q1 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI3_0: -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 2147483648 @ 0x80000000 -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 2147483648 @ 0x80000000 -; CHECK-NEXT: .LCPI3_1: -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 2147483647 @ 0x7fffffff -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 2147483647 @ 0x7fffffff +; CHECK-NEXT: vmov r0, s7 +; CHECK-NEXT: cmp.w r0, #-1 +; CHECK-NEXT: cset r1, gt +; CHECK-NEXT: cmp.w r2, #-1 +; CHECK-NEXT: cset r3, gt +; CHECK-NEXT: cmp r3, r1 +; CHECK-NEXT: vmov r1, s6 +; CHECK-NEXT: cset lr, eq +; CHECK-NEXT: adds r1, r1, r4 +; CHECK-NEXT: adcs r0, r2 +; CHECK-NEXT: cmp.w r0, #-1 +; CHECK-NEXT: cset r2, gt +; CHECK-NEXT: cmp r3, r2 +; CHECK-NEXT: cset r2, ne +; CHECK-NEXT: and.w r2, r2, lr +; CHECK-NEXT: ands r2, r2, #1 +; CHECK-NEXT: it ne +; CHECK-NEXT: asrne r1, r0, #31 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: cset r1, mi +; CHECK-NEXT: tst.w r1, #1 +; CHECK-NEXT: cinv r1, r12, eq +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r0, r1, r0, ne +; CHECK-NEXT: vmov.32 q2[3], r0 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: pop {r4, pc} entry: %0 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2) ret <2 x i64> %0 @@ -144,44 +133,34 @@ entry: define arm_aapcs_vfpcc <2 x i64> @uadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: uadd_int64_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: vmov r3, s0 ; CHECK-NEXT: vmov r0, s5 ; CHECK-NEXT: vmov r1, s1 -; CHECK-NEXT: vmov r4, s2 -; CHECK-NEXT: adds.w lr, r3, r2 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: adcs r1, r12, #0 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: movne.w r2, #-1 +; CHECK-NEXT: vmov.32 q2[0], r2 ; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: adc.w r12, r1, r0 -; CHECK-NEXT: subs.w r3, lr, r3 -; CHECK-NEXT: sbcs.w r1, r12, r1 -; CHECK-NEXT: vmov r3, s3 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: vmov.32 q0[0], lr -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.32 q0[1], r12 -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: vmov r1, s7 -; CHECK-NEXT: adds r2, r2, r4 -; CHECK-NEXT: vmov.32 q0[2], r2 -; CHECK-NEXT: adcs r1, r3 -; CHECK-NEXT: subs r4, r2, r4 -; CHECK-NEXT: sbcs.w r3, r1, r3 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov.32 q0[3], r1 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov r0, s7 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: adcs r1, r12, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r2, #-1 +; CHECK-NEXT: vmov.32 q2[2], r2 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 ; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vorr q0, q0, q2 -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr entry: %0 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2) ret <2 x i64> %0 @@ -221,93 +200,67 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ssub_int64_t(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: ssub_int64_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: vmov lr, s5 -; CHECK-NEXT: vmov r12, s7 -; CHECK-NEXT: vmov r5, s0 -; CHECK-NEXT: vmov r4, s1 -; CHECK-NEXT: rsbs r3, r2, #0 -; CHECK-NEXT: sbcs.w r3, r0, lr -; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r3, #1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: vmov.32 q2[0], r3 -; CHECK-NEXT: vmov.32 q2[1], r3 -; CHECK-NEXT: vmov r3, s6 -; CHECK-NEXT: rsbs r1, r3, #0 -; CHECK-NEXT: sbcs.w r1, r0, r12 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: subs r6, r5, r2 -; CHECK-NEXT: vmov.32 q2[2], r1 -; CHECK-NEXT: vmov.32 q2[3], r1 -; CHECK-NEXT: sbc.w r1, r4, lr -; CHECK-NEXT: subs r5, r6, r5 -; CHECK-NEXT: sbcs.w r5, r1, r4 +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vmov r0, s5 +; CHECK-NEXT: vmov r2, s1 +; CHECK-NEXT: vmov lr, s4 ; CHECK-NEXT: vmov r4, s2 -; CHECK-NEXT: mov.w r5, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csetm r5, ne -; CHECK-NEXT: vmov.32 q1[0], r5 -; CHECK-NEXT: vmov.32 q1[1], r5 -; CHECK-NEXT: vmov r5, s3 -; CHECK-NEXT: subs r3, r4, r3 -; CHECK-NEXT: sbc.w r2, r5, r12 -; CHECK-NEXT: subs r4, r3, r4 -; CHECK-NEXT: sbcs.w r5, r2, r5 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: vmov.32 q1[2], r0 -; CHECK-NEXT: vmov.32 q1[3], r0 -; CHECK-NEXT: asrs r0, r1, #31 -; CHECK-NEXT: veor q0, q2, q1 -; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: cmp.w r0, #-1 +; CHECK-NEXT: cset r1, gt +; CHECK-NEXT: cmp.w r2, #-1 +; CHECK-NEXT: cset r3, gt +; CHECK-NEXT: cmp r3, r1 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: cset r12, ne +; CHECK-NEXT: subs.w r1, r1, lr +; CHECK-NEXT: sbcs r2, r0 +; CHECK-NEXT: cmp.w r2, #-1 +; CHECK-NEXT: cset r0, gt +; CHECK-NEXT: cmp r3, r0 +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: and.w r0, r0, r12 +; CHECK-NEXT: mvn r12, #-2147483648 +; CHECK-NEXT: and r3, r0, #1 +; CHECK-NEXT: cset r0, mi +; CHECK-NEXT: tst.w r0, #1 +; CHECK-NEXT: cinv r0, r12, eq +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: asrne r1, r2, #31 +; CHECK-NEXT: csel r0, r0, r2, ne +; CHECK-NEXT: vmov.32 q2[0], r1 +; CHECK-NEXT: vmov r2, s3 ; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: asrs r0, r2, #31 -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.32 q1[0], r6 +; CHECK-NEXT: vmov r0, s7 +; CHECK-NEXT: cmp.w r0, #-1 +; CHECK-NEXT: cset r1, gt +; CHECK-NEXT: cmp.w r2, #-1 +; CHECK-NEXT: cset r3, gt +; CHECK-NEXT: cmp r3, r1 +; CHECK-NEXT: vmov r1, s6 +; CHECK-NEXT: cset lr, ne +; CHECK-NEXT: subs r1, r4, r1 +; CHECK-NEXT: sbc.w r0, r2, r0 +; CHECK-NEXT: cmp.w r0, #-1 +; CHECK-NEXT: cset r2, gt +; CHECK-NEXT: cmp r3, r2 +; CHECK-NEXT: cset r2, ne +; CHECK-NEXT: and.w r2, r2, lr +; CHECK-NEXT: ands r2, r2, #1 +; CHECK-NEXT: it ne +; CHECK-NEXT: asrne r1, r0, #31 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: cset r1, mi +; CHECK-NEXT: tst.w r1, #1 +; CHECK-NEXT: cinv r1, r12, eq +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r0, r1, r0, ne ; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: adr r0, .LCPI11_0 -; CHECK-NEXT: vldrw.u32 q3, [r0] -; CHECK-NEXT: adr r0, .LCPI11_1 -; CHECK-NEXT: vldrw.u32 q4, [r0] -; CHECK-NEXT: vmov.32 q1[1], r1 -; CHECK-NEXT: vmov.32 q1[2], r3 -; CHECK-NEXT: vbic q3, q3, q2 -; CHECK-NEXT: vand q2, q4, q2 -; CHECK-NEXT: vmov.32 q1[3], r2 -; CHECK-NEXT: vorr q2, q2, q3 -; CHECK-NEXT: vbic q1, q1, q0 -; CHECK-NEXT: vand q0, q2, q0 -; CHECK-NEXT: vorr q0, q0, q1 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI11_0: -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 2147483648 @ 0x80000000 -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 2147483648 @ 0x80000000 -; CHECK-NEXT: .LCPI11_1: -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 2147483647 @ 0x7fffffff -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 2147483647 @ 0x7fffffff +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: pop {r4, pc} entry: %0 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2) ret <2 x i64> %0 @@ -346,44 +299,36 @@ entry: define arm_aapcs_vfpcc <2 x i64> @usub_int64_t(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: usub_int64_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: vmov r3, s0 ; CHECK-NEXT: vmov r0, s5 ; CHECK-NEXT: vmov r1, s1 -; CHECK-NEXT: vmov r4, s2 -; CHECK-NEXT: subs.w lr, r3, r2 +; CHECK-NEXT: subs r2, r3, r2 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: sbcs.w r0, r1, r0 +; CHECK-NEXT: adc r1, r12, #0 +; CHECK-NEXT: rsbs.w r1, r1, #1 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r0, #0 +; CHECK-NEXT: movne r2, #0 +; CHECK-NEXT: vmov.32 q2[0], r2 ; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: sbc.w r12, r1, r0 -; CHECK-NEXT: subs.w r3, r3, lr -; CHECK-NEXT: sbcs.w r1, r1, r12 -; CHECK-NEXT: vmov r3, s3 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: vmov.32 q0[0], lr -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.32 q0[1], r12 -; CHECK-NEXT: vmov.32 q2[1], r1 -; CHECK-NEXT: vmov r1, s7 -; CHECK-NEXT: subs r2, r4, r2 -; CHECK-NEXT: vmov.32 q0[2], r2 -; CHECK-NEXT: sbc.w r1, r3, r1 -; CHECK-NEXT: subs r4, r4, r2 -; CHECK-NEXT: sbcs r3, r1 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov.32 q0[3], r1 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: vmov.32 q2[2], r0 +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov r0, s7 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: subs r2, r3, r2 +; CHECK-NEXT: sbcs.w r0, r1, r0 +; CHECK-NEXT: adc r1, r12, #0 +; CHECK-NEXT: rsbs.w r1, r1, #1 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r2, #0 +; CHECK-NEXT: vmov.32 q2[2], r2 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r0, #0 ; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vbic q0, q0, q2 -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr entry: %0 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2) ret <2 x i64> %0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vctp.ll b/llvm/test/CodeGen/Thumb2/mve-vctp.ll index d6e4d492f5351..67bc161e02c64 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vctp.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vctp.ll @@ -4,8 +4,8 @@ define void @vctp8(i32 %arg, <16 x i8> *%in, <16 x i8>* %out) { ; CHECK-LABEL: vctp8: ; CHECK: @ %bb.0: -; CHECK-NEXT: vctp.8 r0 ; CHECK-NEXT: vldrw.u32 q1, [r1] +; CHECK-NEXT: vctp.8 r0 ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vstrw.32 q0, [r2] @@ -20,8 +20,8 @@ define void @vctp8(i32 %arg, <16 x i8> *%in, <16 x i8>* %out) { define void @vctp16(i32 %arg, <8 x i16> *%in, <8 x i16>* %out) { ; CHECK-LABEL: vctp16: ; CHECK: @ %bb.0: -; CHECK-NEXT: vctp.16 r0 ; CHECK-NEXT: vldrw.u32 q1, [r1] +; CHECK-NEXT: vctp.16 r0 ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vstrw.32 q0, [r2] @@ -36,8 +36,8 @@ define void @vctp16(i32 %arg, <8 x i16> *%in, <8 x i16>* %out) { define void @vctp32(i32 %arg, <4 x i32> *%in, <4 x i32>* %out) { ; CHECK-LABEL: vctp32: ; CHECK: @ %bb.0: -; CHECK-NEXT: vctp.32 r0 ; CHECK-NEXT: vldrw.u32 q1, [r1] +; CHECK-NEXT: vctp.32 r0 ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vstrw.32 q0, [r2] diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll index 29e441e3e90cf..0d22a7f3cd99d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll @@ -732,8 +732,7 @@ define i32 @smin_i32(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 ; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r2, r1 +; CHECK-NEXT: csel r2, r2, r1, lt ; CHECK-NEXT: le lr, .LBB7_8 ; CHECK-NEXT: .LBB7_9: @ %for.cond.cleanup ; CHECK-NEXT: mov r0, r2 @@ -819,8 +818,7 @@ define i32 @smin_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: mvn r4, #-2147483648 ; CHECK-NEXT: vminv.s32 r4, q0 ; CHECK-NEXT: cmp r0, r4 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r0, r4 +; CHECK-NEXT: csel r0, r0, r4, lt ; CHECK-NEXT: le lr, .LBB8_5 ; CHECK-NEXT: @ %bb.6: @ %middle.block ; CHECK-NEXT: cmp r3, r1 @@ -834,8 +832,7 @@ define i32 @smin_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r2, [r1], #4 ; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r0, r2 +; CHECK-NEXT: csel r0, r0, r2, lt ; CHECK-NEXT: le lr, .LBB8_8 ; CHECK-NEXT: .LBB8_9: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} @@ -933,8 +930,7 @@ define i32 @smax_i32(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 ; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: it le -; CHECK-NEXT: movle r2, r1 +; CHECK-NEXT: csel r2, r2, r1, gt ; CHECK-NEXT: le lr, .LBB9_8 ; CHECK-NEXT: .LBB9_9: @ %for.cond.cleanup ; CHECK-NEXT: mov r0, r2 @@ -1020,8 +1016,7 @@ define i32 @smax_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: mov.w r4, #-2147483648 ; CHECK-NEXT: vmaxv.s32 r4, q0 ; CHECK-NEXT: cmp r0, r4 -; CHECK-NEXT: it le -; CHECK-NEXT: movle r0, r4 +; CHECK-NEXT: csel r0, r0, r4, gt ; CHECK-NEXT: le lr, .LBB10_5 ; CHECK-NEXT: @ %bb.6: @ %middle.block ; CHECK-NEXT: cmp r3, r1 @@ -1035,8 +1030,7 @@ define i32 @smax_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r2, [r1], #4 ; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: it le -; CHECK-NEXT: movle r0, r2 +; CHECK-NEXT: csel r0, r0, r2, gt ; CHECK-NEXT: le lr, .LBB10_8 ; CHECK-NEXT: .LBB10_9: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} @@ -1134,8 +1128,7 @@ define i32 @umin_i32(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 ; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: it hs -; CHECK-NEXT: movhs r2, r1 +; CHECK-NEXT: csel r2, r2, r1, lo ; CHECK-NEXT: le lr, .LBB11_8 ; CHECK-NEXT: .LBB11_9: @ %for.cond.cleanup ; CHECK-NEXT: mov r0, r2 @@ -1221,8 +1214,7 @@ define i32 @umin_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: mov.w r4, #-1 ; CHECK-NEXT: vminv.u32 r4, q0 ; CHECK-NEXT: cmp r0, r4 -; CHECK-NEXT: it hs -; CHECK-NEXT: movhs r0, r4 +; CHECK-NEXT: csel r0, r0, r4, lo ; CHECK-NEXT: le lr, .LBB12_5 ; CHECK-NEXT: @ %bb.6: @ %middle.block ; CHECK-NEXT: cmp r3, r1 @@ -1236,8 +1228,7 @@ define i32 @umin_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r2, [r1], #4 ; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: it ls -; CHECK-NEXT: movls r0, r2 +; CHECK-NEXT: csel r0, r0, r2, hi ; CHECK-NEXT: le lr, .LBB12_8 ; CHECK-NEXT: .LBB12_9: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} @@ -1335,8 +1326,7 @@ define i32 @umax_i32(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 ; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: it ls -; CHECK-NEXT: movls r2, r1 +; CHECK-NEXT: csel r2, r2, r1, hi ; CHECK-NEXT: le lr, .LBB13_8 ; CHECK-NEXT: .LBB13_9: @ %for.cond.cleanup ; CHECK-NEXT: mov r0, r2 @@ -1418,8 +1408,7 @@ define i32 @umax_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: vmaxv.u32 r4, q0 ; CHECK-NEXT: cmp r0, r4 -; CHECK-NEXT: it ls -; CHECK-NEXT: movls r0, r4 +; CHECK-NEXT: csel r0, r0, r4, hi ; CHECK-NEXT: le lr, .LBB14_3 ; CHECK-NEXT: @ %bb.4: @ %middle.block ; CHECK-NEXT: cmp r3, r1 @@ -1433,8 +1422,7 @@ define i32 @umax_i32_inloop(i32* nocapture readonly %x, i32 %n) { ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r2, [r1], #4 ; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: it ls -; CHECK-NEXT: movls r0, r2 +; CHECK-NEXT: csel r0, r0, r2, hi ; CHECK-NEXT: le lr, .LBB14_6 ; CHECK-NEXT: @ %bb.7: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll index 36c201cced56c..eca5f44904a16 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll @@ -145,8 +145,7 @@ define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) { ; CHECK-NEXT: vmaxv.s8 r1, q0 ; CHECK-NEXT: sxtb r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, gt ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1) %c = icmp sgt i8 %r, %s2 @@ -161,8 +160,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) { ; CHECK-NEXT: vmaxv.s8 r1, q0 ; CHECK-NEXT: sxtb r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, gt ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1) %rs = sext i8 %r to i32 @@ -180,8 +178,7 @@ define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) { ; CHECK-NEXT: vmaxv.s16 r1, q0 ; CHECK-NEXT: sxth r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, gt ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1) %c = icmp sgt i16 %r, %s2 @@ -197,8 +194,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) { ; CHECK-NEXT: vmaxv.s16 r1, q0 ; CHECK-NEXT: sxth r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, gt ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1) %rs = sext i16 %r to i32 @@ -213,8 +209,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) { ; CHECK-NEXT: mov.w r1, #-2147483648 ; CHECK-NEXT: vmaxv.s32 r1, q0 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, gt ; CHECK-NEXT: bx lr %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %s1) %c = icmp sgt i32 %r, %s2 @@ -230,8 +225,7 @@ define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) { ; CHECK-NEXT: vmaxv.u8 r1, q0 ; CHECK-NEXT: uxtb r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it hi -; CHECK-NEXT: movhi r0, r1 +; CHECK-NEXT: csel r0, r1, r0, hi ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1) %c = icmp ugt i8 %r, %s2 @@ -246,8 +240,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) { ; CHECK-NEXT: vmaxv.u8 r1, q0 ; CHECK-NEXT: uxtb r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it hi -; CHECK-NEXT: movhi r0, r1 +; CHECK-NEXT: csel r0, r1, r0, hi ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1) %rs = zext i8 %r to i32 @@ -264,8 +257,7 @@ define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) { ; CHECK-NEXT: vmaxv.u16 r1, q0 ; CHECK-NEXT: uxth r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it hi -; CHECK-NEXT: movhi r0, r1 +; CHECK-NEXT: csel r0, r1, r0, hi ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1) %c = icmp ugt i16 %r, %s2 @@ -280,8 +272,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) { ; CHECK-NEXT: vmaxv.u16 r1, q0 ; CHECK-NEXT: uxth r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it hi -; CHECK-NEXT: movhi r0, r1 +; CHECK-NEXT: csel r0, r1, r0, hi ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1) %rs = zext i16 %r to i32 @@ -296,8 +287,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) { ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: vmaxv.u32 r1, q0 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it hi -; CHECK-NEXT: movhi r0, r1 +; CHECK-NEXT: csel r0, r1, r0, hi ; CHECK-NEXT: bx lr %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %s1) %c = icmp ugt i32 %r, %s2 @@ -313,8 +303,7 @@ define arm_aapcs_vfpcc i8 @vminv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) { ; CHECK-NEXT: vminv.s8 r1, q0 ; CHECK-NEXT: sxtb r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lt ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1) %c = icmp slt i8 %r, %s2 @@ -329,8 +318,7 @@ define arm_aapcs_vfpcc i32 @vminv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) { ; CHECK-NEXT: vminv.s8 r1, q0 ; CHECK-NEXT: sxtb r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lt ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1) %rs = sext i8 %r to i32 @@ -347,8 +335,7 @@ define arm_aapcs_vfpcc i16 @vminv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) { ; CHECK-NEXT: vminv.s16 r1, q0 ; CHECK-NEXT: sxth r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lt ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1) %c = icmp slt i16 %r, %s2 @@ -363,8 +350,7 @@ define arm_aapcs_vfpcc i32 @vminv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) { ; CHECK-NEXT: vminv.s16 r1, q0 ; CHECK-NEXT: sxth r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lt ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1) %rs = sext i16 %r to i32 @@ -379,8 +365,7 @@ define arm_aapcs_vfpcc i32 @vminv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) { ; CHECK-NEXT: mvn r1, #-2147483648 ; CHECK-NEXT: vminv.s32 r1, q0 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lt ; CHECK-NEXT: bx lr %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %s1) %c = icmp slt i32 %r, %s2 @@ -396,8 +381,7 @@ define arm_aapcs_vfpcc i8 @vminv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) { ; CHECK-NEXT: vminv.u8 r1, q0 ; CHECK-NEXT: uxtb r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lo ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1) %c = icmp ult i8 %r, %s2 @@ -412,8 +396,7 @@ define arm_aapcs_vfpcc i32 @vminv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) { ; CHECK-NEXT: vminv.u8 r1, q0 ; CHECK-NEXT: uxtb r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lo ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1) %rs = zext i8 %r to i32 @@ -430,8 +413,7 @@ define arm_aapcs_vfpcc i16 @vminv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) { ; CHECK-NEXT: vminv.u16 r1, q0 ; CHECK-NEXT: uxth r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lo ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1) %c = icmp ult i16 %r, %s2 @@ -446,8 +428,7 @@ define arm_aapcs_vfpcc i32 @vminv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) { ; CHECK-NEXT: vminv.u16 r1, q0 ; CHECK-NEXT: uxth r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lo ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1) %rs = zext i16 %r to i32 @@ -462,8 +443,7 @@ define arm_aapcs_vfpcc i32 @vminv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) { ; CHECK-NEXT: mov.w r1, #-1 ; CHECK-NEXT: vminv.u32 r1, q0 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lo ; CHECK-NEXT: bx lr %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %s1) %c = icmp ult i32 %r, %s2 diff --git a/llvm/test/CodeGen/WebAssembly/function-pointer64.ll b/llvm/test/CodeGen/WebAssembly/function-pointer64.ll new file mode 100644 index 0000000000000..0bea3930de0af --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/function-pointer64.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -asm-verbose=false -O2 | FileCheck %s +; RUN: llc < %s -asm-verbose=false -O2 --filetype=obj | obj2yaml | FileCheck --check-prefix=YAML %s + +; This tests pointer features that may codegen differently in wasm64. + +target datalayout = "e-m:e-p:64:64-i64:64-n32:64-S128" +target triple = "wasm64-unknown-unknown" + +define void @bar(i32 %n) { +entry: + ret void +} + +define void @foo(void (i32)* %fp) { +entry: + call void %fp(i32 1) + ret void +} + +define void @test() { +entry: + call void @foo(void (i32)* @bar) + store void (i32)* @bar, void (i32)** @fptr + ret void +} + +@fptr = global void (i32)* @bar + +; For simplicity (and compatibility with UB C/C++ code) we keep all types +; of pointers the same size, so function pointers (which are 32-bit indices +; in Wasm) are represented as 64-bit until called. + +; CHECK: .functype foo (i64) -> () +; CHECK-NEXT: i32.const 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: call_indirect (i32) -> () + +; CHECK: .functype test () -> () +; CHECK-NEXT: i64.const bar +; CHECK-NEXT: call foo + + +; Check we're emitting a 64-bit reloc for `i64.const bar` and the global. + +; YAML: Memory: +; YAML-NEXT: Flags: [ IS_64 ] +; YAML-NEXT: Initial: 0x00000001 + +; YAML: - Type: CODE +; YAML: - Type: R_WASM_TABLE_INDEX_SLEB64 +; YAML-NEXT: Index: 0 +; YAML-NEXT: Offset: 0x00000016 + +; YAML: - Type: DATA +; YAML: - Type: R_WASM_TABLE_INDEX_I64 +; YAML-NEXT: Index: 0 +; YAML-NEXT: Offset: 0x00000006 diff --git a/llvm/test/CodeGen/WebAssembly/reg-stackify.ll b/llvm/test/CodeGen/WebAssembly/reg-stackify.ll index 24a8caffea93b..80507b52a0bf6 100644 --- a/llvm/test/CodeGen/WebAssembly/reg-stackify.ll +++ b/llvm/test/CodeGen/WebAssembly/reg-stackify.ll @@ -112,16 +112,14 @@ define i32 @no_sink_readonly_call(i32 %x, i32 %y, i32* %p) { ; CHECK-NEXT: i32.const $push[[L11:[0-9]+]]=, 2{{$}} ; CHECK-NEXT: i32.lt_s $push[[L4:[0-9]+]]=, $3, $pop[[L11]]{{$}} ; CHECK-NEXT: i32.xor $push[[L6:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}} -; CHECK-NEXT: i32.xor $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} -; CHECK-NEXT: i32.const $push10=, 1{{$}} -; CHECK-NEXT: i32.ne $push8=, $pop7, $pop10{{$}} -; CHECK-NEXT: br_if 0, $pop8{{$}} -; CHECK-NEXT: i32.const $push9=, 0{{$}} -; CHECK-NEXT: return $pop9{{$}} +; CHECK-NEXT: i32.eq $push7=, $pop[[L5]], $pop[[L6]]{{$}} +; CHECK-NEXT: br_if 0, $pop7{{$}} +; CHECK-NEXT: i32.const $push8=, 0{{$}} +; CHECK-NEXT: return $pop8{{$}} ; CHECK-NEXT: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: end_block{{$}} -; CHECK-NEXT: i32.const $push14=, 1{{$}} -; CHECK-NEXT: return $pop14{{$}} +; CHECK-NEXT: i32.const $push12=, 1{{$}} +; CHECK-NEXT: return $pop12{{$}} ; NOREGS-LABEL: stack_uses: ; NOREGS: .functype stack_uses (i32, i32, i32, i32) -> (i32){{$}} ; NOREGS-NEXT: block {{$}} @@ -139,9 +137,7 @@ define i32 @no_sink_readonly_call(i32 %x, i32 %y, i32* %p) { ; NOREGS-NEXT: i32.const 2{{$}} ; NOREGS-NEXT: i32.lt_s ; NOREGS-NEXT: i32.xor {{$}} -; NOREGS-NEXT: i32.xor {{$}} -; NOREGS-NEXT: i32.const 1{{$}} -; NOREGS-NEXT: i32.ne {{$}} +; NOREGS-NEXT: i32.eq {{$}} ; NOREGS-NEXT: br_if 0{{$}} ; NOREGS-NEXT: i32.const 0{{$}} ; NOREGS-NEXT: return{{$}} diff --git a/llvm/test/CodeGen/WebAssembly/simd-select.ll b/llvm/test/CodeGen/WebAssembly/simd-select.ll index c3af6f9abe60b..be36f94cf5a6d 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-select.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-select.ll @@ -1,6 +1,7 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mattr=+unimplemented-simd128 | FileCheck %s -; Test that vector selects of various varieties lower correctly to bitselects. +; Test that vector selects of various varieties lower correctly. target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" @@ -8,425 +9,562 @@ target triple = "wasm32-unknown-unknown" ; ============================================================================== ; 16 x i8 ; ============================================================================== -; CHECK-LABEL: vselect_v16i8: -; CHECK-NEXT: .functype vselect_v16i8 (v128, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 7{{$}} -; CHECK-NEXT: i8x16.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 7{{$}} -; CHECK-NEXT: i8x16.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <16 x i8> @vselect_v16i8(<16 x i1> %c, <16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: vselect_v16i8: +; CHECK: .functype vselect_v16i8 (v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 7 +; CHECK-NEXT: i8x16.shl +; CHECK-NEXT: i32.const 7 +; CHECK-NEXT: i8x16.shr_s +; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: # fallthrough-return + %res = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y + ret <16 x i8> %res +} + +define <16 x i8> @vselect_cmp_v16i8(<16 x i8> %a, <16 x i8> %b, +; CHECK-LABEL: vselect_cmp_v16i8: +; CHECK: .functype vselect_cmp_v16i8 (v128, v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i8x16.lt_s +; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: # fallthrough-return + <16 x i8> %x, <16 x i8> %y) { + %c = icmp slt <16 x i8> %a, %b %res = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y ret <16 x i8> %res } -; CHECK-LABEL: select_v16i8: -; CHECK-NEXT: .functype select_v16i8 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <16 x i8> @select_v16i8(i1 zeroext %c, <16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: select_v16i8: +; CHECK: .functype select_v16i8 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %res = select i1 %c, <16 x i8> %x, <16 x i8> %y ret <16 x i8> %res } -; CHECK-LABEL: select_cmp_v16i8: -; CHECK-NEXT: .functype select_cmp_v16i8 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31 -; CHECK-NEXT: i32.shr_s $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i8x16.splat $push[[L2:[0-9]+]]=, $pop[[L1]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L2]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <16 x i8> @select_cmp_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: select_cmp_v16i8: +; CHECK: .functype select_cmp_v16i8 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.lt_s +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp slt i32 %i, 0 %res = select i1 %c, <16 x i8> %x, <16 x i8> %y ret <16 x i8> %res } -; CHECK-LABEL: select_ne_v16i8: -; CHECK-NEXT: .functype select_ne_v16i8 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <16 x i8> @select_ne_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: select_ne_v16i8: +; CHECK: .functype select_ne_v16i8 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp ne i32 %i, 0 %res = select i1 %c, <16 x i8> %x, <16 x i8> %y ret <16 x i8> %res } -; CHECK-LABEL: select_eq_v16i8: -; CHECK-NEXT: .functype select_eq_v16i8 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <16 x i8> @select_eq_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: select_eq_v16i8: +; CHECK: .functype select_eq_v16i8 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp eq i32 %i, 0 %res = select i1 %c, <16 x i8> %x, <16 x i8> %y ret <16 x i8> %res } -; ============================================================================== -; 8 x i16 -; ============================================================================== -; CHECK-LABEL: vselect_v8i16: -; CHECK-NEXT: .functype vselect_v8i16 (v128, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 15{{$}} -; CHECK-NEXT: i16x8.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 15{{$}} -; CHECK-NEXT: i16x8.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <8 x i16> @vselect_v8i16(<8 x i1> %c, <8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: vselect_v8i16: +; CHECK: .functype vselect_v8i16 (v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 15 +; CHECK-NEXT: i16x8.shl +; CHECK-NEXT: i32.const 15 +; CHECK-NEXT: i16x8.shr_s +; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: # fallthrough-return + %res = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y + ret <8 x i16> %res +} + +define <8 x i16> @vselect_cmp_v8i16(<8 x i16> %a, <8 x i16> %b, +; CHECK-LABEL: vselect_cmp_v8i16: +; CHECK: .functype vselect_cmp_v8i16 (v128, v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i16x8.lt_s +; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: # fallthrough-return + <8 x i16> %x, <8 x i16> %y) { + %c = icmp slt <8 x i16> %a, %b %res = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y ret <8 x i16> %res } -; CHECK-LABEL: select_v8i16: -; CHECK-NEXT: .functype select_v8i16 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <8 x i16> @select_v8i16(i1 zeroext %c, <8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: select_v8i16: +; CHECK: .functype select_v8i16 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %res = select i1 %c, <8 x i16> %x, <8 x i16> %y ret <8 x i16> %res } -; CHECK-LABEL: select_cmp_v8i16: -; CHECK-NEXT: .functype select_cmp_v8i16 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31{{$}} -; CHECK-NEXT: i32.shr_s $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i16x8.splat $push[[L2:[0-9]+]]=, $pop[[L1]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L2]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <8 x i16> @select_cmp_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: select_cmp_v8i16: +; CHECK: .functype select_cmp_v8i16 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.lt_s +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp slt i32 %i, 0 %res = select i1 %c, <8 x i16> %x, <8 x i16> %y ret <8 x i16> %res } -; CHECK-LABEL: select_ne_v8i16: -; CHECK-NEXT: .functype select_ne_v8i16 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <8 x i16> @select_ne_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: select_ne_v8i16: +; CHECK: .functype select_ne_v8i16 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp ne i32 %i, 0 %res = select i1 %c, <8 x i16> %x, <8 x i16> %y ret <8 x i16> %res } -; CHECK-LABEL: select_eq_v8i16: -; CHECK-NEXT: .functype select_eq_v8i16 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <8 x i16> @select_eq_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: select_eq_v8i16: +; CHECK: .functype select_eq_v8i16 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp eq i32 %i, 0 %res = select i1 %c, <8 x i16> %x, <8 x i16> %y ret <8 x i16> %res } -; ============================================================================== -; 4 x i32 -; ============================================================================== -; CHECK-LABEL: vselect_v4i32: -; CHECK-NEXT: .functype vselect_v4i32 (v128, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31{{$}} -; CHECK-NEXT: i32x4.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 31{{$}} -; CHECK-NEXT: i32x4.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @vselect_v4i32(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: vselect_v4i32: +; CHECK: .functype vselect_v4i32 (v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 31 +; CHECK-NEXT: i32x4.shl +; CHECK-NEXT: i32.const 31 +; CHECK-NEXT: i32x4.shr_s +; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: # fallthrough-return %res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y ret <4 x i32> %res } +define <4 x i32> @vselect_cmp_v4i32(<4 x i32> %a, <4 x i32> %b, +; CHECK-LABEL: vselect_cmp_v4i32: +; CHECK: .functype vselect_cmp_v4i32 (v128, v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.lt_s +; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: # fallthrough-return + <4 x i32> %x, <4 x i32> %y) { + %c = icmp slt <4 x i32> %a, %b + %res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y + ret <4 x i32> %res +} -; CHECK-LABEL: select_v4i32: -; CHECK-NEXT: .functype select_v4i32 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @select_v4i32(i1 zeroext %c, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: select_v4i32: +; CHECK: .functype select_v4i32 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %res = select i1 %c, <4 x i32> %x, <4 x i32> %y ret <4 x i32> %res } -; CHECK-LABEL: select_cmp_v4i32: -; CHECK-NEXT: .functype select_cmp_v4i32 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31{{$}} -; CHECK-NEXT: i32.shr_s $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i32x4.splat $push[[L2:[0-9]+]]=, $pop[[L1]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L2]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @select_cmp_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: select_cmp_v4i32: +; CHECK: .functype select_cmp_v4i32 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.lt_s +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp slt i32 %i, 0 %res = select i1 %c, <4 x i32> %x, <4 x i32> %y ret <4 x i32> %res } -; CHECK-LABEL: select_ne_v4i32: -; CHECK-NEXT: .functype select_ne_v4i32 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @select_ne_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: select_ne_v4i32: +; CHECK: .functype select_ne_v4i32 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp ne i32 %i, 0 %res = select i1 %c, <4 x i32> %x, <4 x i32> %y ret <4 x i32> %res } -; CHECK-LABEL: select_eq_v4i32: -; CHECK-NEXT: .functype select_eq_v4i32 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x i32> @select_eq_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: select_eq_v4i32: +; CHECK: .functype select_eq_v4i32 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp eq i32 %i, 0 %res = select i1 %c, <4 x i32> %x, <4 x i32> %y ret <4 x i32> %res } -; ============================================================================== -; 2 x i64 -; ============================================================================== -; CHECK-LABEL: vselect_v2i64: -; CHECK-NEXT: .functype vselect_v2i64 (v128, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 63{{$}} -; CHECK-NEXT: i64x2.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 63{{$}} -; CHECK-NEXT: i64x2.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @vselect_v2i64(<2 x i1> %c, <2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: vselect_v2i64: +; CHECK: .functype vselect_v2i64 (v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 63 +; CHECK-NEXT: i64x2.shl +; CHECK-NEXT: i32.const 63 +; CHECK-NEXT: i64x2.shr_s +; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: # fallthrough-return + %res = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y + ret <2 x i64> %res +} + +define <2 x i64> @vselect_cmp_v2i64(<2 x i64> %a, <2 x i64> %b, +; CHECK-LABEL: vselect_cmp_v2i64: +; CHECK: .functype vselect_cmp_v2i64 (v128, v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: i64.const -1 +; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i64x2.extract_lane 0 +; CHECK-NEXT: i64.lt_s +; CHECK-NEXT: i64.select +; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: i64.const -1 +; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.extract_lane 1 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i64x2.extract_lane 1 +; CHECK-NEXT: i64.lt_s +; CHECK-NEXT: i64.select +; CHECK-NEXT: i64x2.replace_lane 1 +; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: # fallthrough-return + <2 x i64> %x, <2 x i64> %y) { + %c = icmp slt <2 x i64> %a, %b %res = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y ret <2 x i64> %res } -; CHECK-LABEL: select_v2i64: -; CHECK-NEXT: .functype select_v2i64 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i64.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @select_v2i64(i1 zeroext %c, <2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: select_v2i64: +; CHECK: .functype select_v2i64 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %res = select i1 %c, <2 x i64> %x, <2 x i64> %y ret <2 x i64> %res } -; CHECK-LABEL: select_cmp_v2i64: -; CHECK-NEXT: .functype select_cmp_v2i64 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.lt_s $push[[L3:[0-9]+]]=, $0, $pop[[L2]]{{$}} -; CHECK-NEXT: i64.select $push[[L4:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $pop[[L3]]{{$}} -; CHECK-NEXT: i64x2.splat $push[[L5:[0-9]+]]=, $pop[[L4]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L5]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @select_cmp_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: select_cmp_v2i64: +; CHECK: .functype select_cmp_v2i64 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.lt_s +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp slt i32 %i, 0 %res = select i1 %c, <2 x i64> %x, <2 x i64> %y ret <2 x i64> %res } -; CHECK-LABEL: select_ne_v2i64: -; CHECK-NEXT: .functype select_ne_v2i64 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i64.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @select_ne_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: select_ne_v2i64: +; CHECK: .functype select_ne_v2i64 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp ne i32 %i, 0 %res = select i1 %c, <2 x i64> %x, <2 x i64> %y ret <2 x i64> %res } -; CHECK-LABEL: select_eq_v2i64: -; CHECK-NEXT: .functype select_eq_v2i64 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i64.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x i64> @select_eq_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: select_eq_v2i64: +; CHECK: .functype select_eq_v2i64 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp eq i32 %i, 0 %res = select i1 %c, <2 x i64> %x, <2 x i64> %y ret <2 x i64> %res } -; ============================================================================== -; 4 x float -; ============================================================================== -; CHECK-LABEL: vselect_v4f32: -; CHECK-NEXT: .functype vselect_v4f32 (v128, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31{{$}} -; CHECK-NEXT: i32x4.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 31{{$}} -; CHECK-NEXT: i32x4.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x float> @vselect_v4f32(<4 x i1> %c, <4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: vselect_v4f32: +; CHECK: .functype vselect_v4f32 (v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 31 +; CHECK-NEXT: i32x4.shl +; CHECK-NEXT: i32.const 31 +; CHECK-NEXT: i32x4.shr_s +; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: # fallthrough-return + %res = select <4 x i1> %c, <4 x float> %x, <4 x float> %y + ret <4 x float> %res +} + +define <4 x float> @vselect_cmp_v4f32(<4 x float> %a, <4 x float> %b, +; CHECK-LABEL: vselect_cmp_v4f32: +; CHECK: .functype vselect_cmp_v4f32 (v128, v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.lt +; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: # fallthrough-return + <4 x float> %x, <4 x float> %y) { + %c = fcmp olt <4 x float> %a, %b %res = select <4 x i1> %c, <4 x float> %x, <4 x float> %y ret <4 x float> %res } -; CHECK-LABEL: select_v4f32: -; CHECK-NEXT: .functype select_v4f32 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: select_v4f32: +; CHECK: .functype select_v4f32 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %res = select i1 %c, <4 x float> %x, <4 x float> %y ret <4 x float> %res } -; CHECK-LABEL: select_cmp_v4f32: -; CHECK-NEXT: .functype select_cmp_v4f32 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31{{$}} -; CHECK-NEXT: i32.shr_s $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i32x4.splat $push[[L2:[0-9]+]]=, $pop[[L1]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L2]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x float> @select_cmp_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: select_cmp_v4f32: +; CHECK: .functype select_cmp_v4f32 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.lt_s +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp slt i32 %i, 0 %res = select i1 %c, <4 x float> %x, <4 x float> %y ret <4 x float> %res } -; CHECK-LABEL: select_ne_v4f32: -; CHECK-NEXT: .functype select_ne_v4f32 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x float> @select_ne_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: select_ne_v4f32: +; CHECK: .functype select_ne_v4f32 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp ne i32 %i, 0 %res = select i1 %c, <4 x float> %x, <4 x float> %y ret <4 x float> %res } -; CHECK-LABEL: select_eq_v4f32: -; CHECK-NEXT: .functype select_eq_v4f32 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <4 x float> @select_eq_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: select_eq_v4f32: +; CHECK: .functype select_eq_v4f32 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp eq i32 %i, 0 %res = select i1 %c, <4 x float> %x, <4 x float> %y ret <4 x float> %res } -; ============================================================================== -; 2 x double -; ============================================================================== -; CHECK-LABEL: vselect_v2f64: -; CHECK-NEXT: .functype vselect_v2f64 (v128, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 63{{$}} -; CHECK-NEXT: i64x2.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 63{{$}} -; CHECK-NEXT: i64x2.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x double> @vselect_v2f64(<2 x i1> %c, <2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: vselect_v2f64: +; CHECK: .functype vselect_v2f64 (v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 63 +; CHECK-NEXT: i64x2.shl +; CHECK-NEXT: i32.const 63 +; CHECK-NEXT: i64x2.shr_s +; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: # fallthrough-return + %res = select <2 x i1> %c, <2 x double> %x, <2 x double> %y + ret <2 x double> %res +} + +define <2 x double> @vselect_cmp_v2f64(<2 x double> %a, <2 x double> %b, +; CHECK-LABEL: vselect_cmp_v2f64: +; CHECK: .functype vselect_cmp_v2f64 (v128, v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.lt +; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: # fallthrough-return + <2 x double> %x, <2 x double> %y) { + %c = fcmp olt <2 x double> %a, %b %res = select <2 x i1> %c, <2 x double> %x, <2 x double> %y ret <2 x double> %res } -; CHECK-LABEL: select_v2f64: -; CHECK-NEXT: .functype select_v2f64 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i64.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: select_v2f64: +; CHECK: .functype select_v2f64 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %res = select i1 %c, <2 x double> %x, <2 x double> %y ret <2 x double> %res } -; CHECK-LABEL: select_cmp_v2f64: -; CHECK-NEXT: .functype select_cmp_v2f64 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i32.lt_s $push[[L3:[0-9]+]]=, $0, $pop[[L2]]{{$}} -; CHECK-NEXT: i64.select $push[[L4:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $pop[[L3]]{{$}} -; CHECK-NEXT: i64x2.splat $push[[L5:[0-9]+]]=, $pop[[L4]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L5]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x double> @select_cmp_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: select_cmp_v2f64: +; CHECK: .functype select_cmp_v2f64 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.lt_s +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp slt i32 %i, 0 %res = select i1 %c, <2 x double> %x, <2 x double> %y ret <2 x double> %res } -; CHECK-LABEL: select_ne_v2f64: -; CHECK-NEXT: .functype select_ne_v2f64 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i64.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x double> @select_ne_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: select_ne_v2f64: +; CHECK: .functype select_ne_v2f64 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp ne i32 %i, 0 %res = select i1 %c, <2 x double> %x, <2 x double> %y ret <2 x double> %res } -; CHECK-LABEL: select_eq_v2f64: -; CHECK-NEXT: .functype select_eq_v2f64 (i32, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, -1{{$}} -; CHECK-NEXT: i64.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}} -; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} define <2 x double> @select_eq_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: select_eq_v2f64: +; CHECK: .functype select_eq_v2f64 (i32, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.select +; CHECK-NEXT: # fallthrough-return %c = icmp eq i32 %i, 0 %res = select i1 %c, <2 x double> %x, <2 x double> %y ret <2 x double> %res diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll index c2a4da1ba5622..88a3b5aea9bd4 100644 --- a/llvm/test/CodeGen/X86/avx512-logic.ll +++ b/llvm/test/CodeGen/X86/avx512-logic.ll @@ -885,3 +885,37 @@ define <16 x i32> @ternlog_xor_andn(<16 x i32> %x, <16 x i32> %y, <16 x i32> %z) %c = xor <16 x i32> %b, %z ret <16 x i32> %c } + +define <16 x i32> @ternlog_or_and_mask(<16 x i32> %x, <16 x i32> %y) { +; KNL-LABEL: ternlog_or_and_mask: +; KNL: ## %bb.0: +; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: ternlog_or_and_mask: +; SKX: ## %bb.0: +; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %a = and <16 x i32> %x, + %b = or <16 x i32> %a, %y + ret <16 x i32> %b +} + +define <8 x i64> @ternlog_xor_and_mask(<8 x i64> %x, <8 x i64> %y) { +; KNL-LABEL: ternlog_xor_and_mask: +; KNL: ## %bb.0: +; KNL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 +; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: ternlog_xor_and_mask: +; SKX: ## %bb.0: +; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %a = and <8 x i64> %x, + %b = xor <8 x i64> %a, %y + ret <8 x i64> %b +} diff --git a/llvm/test/CodeGen/X86/avx512vl-logic.ll b/llvm/test/CodeGen/X86/avx512vl-logic.ll index 0647f4e33bf23..26d905ebeae77 100644 --- a/llvm/test/CodeGen/X86/avx512vl-logic.ll +++ b/llvm/test/CodeGen/X86/avx512vl-logic.ll @@ -987,3 +987,47 @@ define <4 x i32> @ternlog_xor_andn(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { %c = xor <4 x i32> %b, %z ret <4 x i32> %c } + +define <4 x i32> @ternlog_or_and_mask(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: ternlog_or_and_mask: +; CHECK: ## %bb.0: +; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %a = and <4 x i32> %x, + %b = or <4 x i32> %a, %y + ret <4 x i32> %b +} + +define <8 x i32> @ternlog_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y) { +; CHECK-LABEL: ternlog_or_and_mask_ymm: +; CHECK: ## %bb.0: +; CHECK-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: retq + %a = and <8 x i32> %x, + %b = or <8 x i32> %a, %y + ret <8 x i32> %b +} + +define <2 x i64> @ternlog_xor_and_mask(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: ternlog_xor_and_mask: +; CHECK: ## %bb.0: +; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %a = and <2 x i64> %x, + %b = xor <2 x i64> %a, %y + ret <2 x i64> %b +} + +define <4 x i64> @ternlog_xor_and_mask_ymm(<4 x i64> %x, <4 x i64> %y) { +; CHECK-LABEL: ternlog_xor_and_mask_ymm: +; CHECK: ## %bb.0: +; CHECK-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: retq + %a = and <4 x i64> %x, + %b = xor <4 x i64> %a, %y + ret <4 x i64> %b +} diff --git a/llvm/test/CodeGen/X86/callbr-asm-sink.ll b/llvm/test/CodeGen/X86/callbr-asm-sink.ll new file mode 100644 index 0000000000000..758ac37f8ba43 --- /dev/null +++ b/llvm/test/CodeGen/X86/callbr-asm-sink.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +;; Verify that the machine instructions generated from the first +;; getelementptr don't get sunk below the callbr. (Reduced from a bug +;; report.) + +%struct1 = type { i8*, i32 } + +define void @klist_dec_and_del(%struct1*) { +; CHECK-LABEL: klist_dec_and_del: +; CHECK: # %bb.0: +; CHECK-NEXT: leaq 8(%rdi), %rax +; CHECK-NEXT: #APP +; CHECK-NEXT: # 8(%rdi) .Ltmp0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: retq +; CHECK-NEXT: .Ltmp0: # Block address taken +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: movq $0, -8(%rax) +; CHECK-NEXT: retq + %2 = getelementptr inbounds %struct1, %struct1* %0, i64 0, i32 1 + callbr void asm sideeffect "# $0 $1", "*m,X,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %2, i8* blockaddress(@klist_dec_and_del, %3)) + to label %6 [label %3] + +3: + %4 = getelementptr i32, i32* %2, i64 -2 + %5 = bitcast i32* %4 to i8** + store i8* null, i8** %5, align 8 + br label %6 + +6: + ret void +} diff --git a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll index 25e3691913c8c..4446f360ec042 100644 --- a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll +++ b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll @@ -1,9 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_rip ; RUN: llc < %s -mtriple=i686-pc-windows-msvc | FileCheck %s -check-prefix=X32 ; Control Flow Guard is currently only available on Windows ; Test that Control Flow Guard checks are correctly added for x86 vector calls. define void @func_cf_vector_x86(void (%struct.HVA)* %0, %struct.HVA* %1) #0 { +; X32-LABEL: func_cf_vector_x86: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-16, %esp +; X32-NEXT: subl $48, %esp +; X32-NEXT: movl 8(%ebp), %ecx +; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: movups (%eax), %xmm0 +; X32-NEXT: movups 16(%eax), %xmm1 +; X32-NEXT: movaps %xmm0, (%esp) +; X32-NEXT: movaps %xmm1, 16(%esp) +; X32-NEXT: movsd (%esp), %xmm4 +; X32-NEXT: movsd 8(%esp), %xmm5 +; X32-NEXT: movsd 16(%esp), %xmm6 +; X32-NEXT: movsd 24(%esp), %xmm7 +; X32-NEXT: calll *___guard_check_icall_fptr +; X32-NEXT: movaps %xmm4, %xmm0 +; X32-NEXT: movaps %xmm5, %xmm1 +; X32-NEXT: movaps %xmm6, %xmm2 +; X32-NEXT: movaps %xmm7, %xmm3 +; X32-NEXT: calll *%ecx +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl entry: %2 = alloca %struct.HVA, align 8 %3 = bitcast %struct.HVA* %2 to i8* @@ -13,23 +39,6 @@ entry: call x86_vectorcallcc void %0(%struct.HVA inreg %5) ret void - ; X32-LABEL: func_cf_vector_x86 - ; X32: movl 12(%ebp), %eax - ; X32: movl 8(%ebp), %ecx - ; X32: movsd 24(%eax), %xmm4 # xmm4 = mem[0],zero - ; X32: movsd %xmm4, 24(%esp) - ; X32: movsd 16(%eax), %xmm5 # xmm5 = mem[0],zero - ; X32: movsd %xmm5, 16(%esp) - ; X32: movsd (%eax), %xmm6 # xmm6 = mem[0],zero - ; X32: movsd 8(%eax), %xmm7 # xmm7 = mem[0],zero - ; X32: movsd %xmm7, 8(%esp) - ; X32: movsd %xmm6, (%esp) - ; X32: calll *___guard_check_icall_fptr - ; X32: movaps %xmm6, %xmm0 - ; X32: movaps %xmm7, %xmm1 - ; X32: movaps %xmm5, %xmm2 - ; X32: movaps %xmm4, %xmm3 - ; X32: calll *%ecx } attributes #0 = { "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } diff --git a/llvm/test/CodeGen/X86/cfi-basic-block-sections-1.ll b/llvm/test/CodeGen/X86/cfi-basic-block-sections-1.ll new file mode 100644 index 0000000000000..62e669eff9e4e --- /dev/null +++ b/llvm/test/CodeGen/X86/cfi-basic-block-sections-1.ll @@ -0,0 +1,86 @@ +; RUN: llc -O0 %s --basicblock-sections=all -mtriple=x86_64 -filetype=asm --frame-pointer=all -o - | FileCheck --check-prefix=SECTIONS_CFI %s +; RUN: llc -O0 %s --basicblock-sections=all -mtriple=x86_64 -filetype=asm --frame-pointer=none -o - | FileCheck --check-prefix=SECTIONS_NOFP_CFI %s +; RUN: llc -O0 %s --basicblock-sections=all -mtriple=x86_64 -filetype=obj --frame-pointer=all -o - | llvm-dwarfdump --eh-frame - | FileCheck --check-prefix=EH_FRAME %s + +;; void f1(); +;; void f3(bool b) { +;; if (b) +;; f1(); +;; } + + +; SECTIONS_CFI: _Z2f3b: +; SECTIONS_CFI: .cfi_startproc +; SECTIONS_CFI: .cfi_def_cfa_offset 16 +; SECTIONS_CFI: .cfi_offset %rbp, -16 +; SECTIONS_CFI: .cfi_def_cfa_register %rbp +; SECTIONS_CFI: .cfi_endproc + +; SECTIONS_CFI: _Z2f3b.1: +; SECTIONS_CFI-NEXT: .cfi_startproc +; SECTIONS_CFI-NEXT: .cfi_def_cfa %rbp, 16 +; SECTIONS_CFI-NEXT: .cfi_offset %rbp, -16 +; SECTIONS_CFI: .cfi_endproc + +; SECTIONS_CFI: _Z2f3b.2: +; SECTIONS_CFI-NEXT: .cfi_startproc +; SECTIONS_CFI-NEXT: .cfi_def_cfa %rbp, 16 +; SECTIONS_CFI-NEXT: .cfi_offset %rbp, -16 +; SECTIONS_CFI: .cfi_def_cfa +; SECTIONS_CFI: .cfi_endproc + + +; SECTIONS_NOFP_CFI: _Z2f3b: +; SECTIONS_NOFP_CFI: .cfi_startproc +; SECTIONS_NOFP_CFI: .cfi_def_cfa_offset 16 +; SECTIONS_NOFP_CFI: .cfi_endproc + +; SECTIONS_NOFP_CFI: _Z2f3b.1: +; SECTIONS_NOFP_CFI-NEXT: .cfi_startproc +; SECTIONS_NOFP_CFI-NEXT: .cfi_def_cfa %rsp, 16 +; SECTIONS_NOFP_CFI: .cfi_endproc + +; SECTIONS_NOFP_CFI: _Z2f3b.2: +; SECTIONS_NOFP_CFI-NEXT: .cfi_startproc +; SECTIONS_NOFP_CFI-NEXT: .cfi_def_cfa %rsp, 16 +; SECTIONS_NOFP_CFI: .cfi_endproc + + +;; There must be 1 CIE and 3 FDEs. + +; EH_FRAME: CIE +; EH_FRAME: DW_CFA_def_cfa +; EH_FRAME: DW_CFA_offset + +; EH_FRAME: FDE cie= +; EH_FRAME: DW_CFA_def_cfa_offset +; EH_FRAME: DW_CFA_offset +; EH_FRAME: DW_CFA_def_cfa_register + +; EH_FRAME: FDE cie= +; EH_FRAME: DW_CFA_def_cfa +; EH_FRAME: DW_CFA_offset + +; EH_FRAME: FDE cie= +; EH_FRAME: DW_CFA_def_cfa +; EH_FRAME: DW_CFA_offset + +; Function Attrs: noinline optnone uwtable +define dso_local void @_Z2f3b(i1 zeroext %b) { +entry: + %b.addr = alloca i8, align 1 + %frombool = zext i1 %b to i8 + store i8 %frombool, i8* %b.addr, align 1 + %0 = load i8, i8* %b.addr, align 1 + %tobool = trunc i8 %0 to i1 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + call void @_Z2f1v() + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +declare dso_local void @_Z2f1v() diff --git a/llvm/test/CodeGen/X86/cfi-inserter-basic-block-sections-callee-save-registers.ll b/llvm/test/CodeGen/X86/cfi-inserter-basic-block-sections-callee-save-registers.ll new file mode 100644 index 0000000000000..19725138f6ed9 --- /dev/null +++ b/llvm/test/CodeGen/X86/cfi-inserter-basic-block-sections-callee-save-registers.ll @@ -0,0 +1,53 @@ +;; This test checks if CFI instructions for all callee saved registers are emitted +;; correctly with basic block sections. +; RUN: llc %s -mtriple=x86_64 -filetype=asm --basicblock-sections=all --frame-pointer=all -o - | FileCheck --check-prefix=SECTIONS_CFI %s + +; SECTIONS_CFI: _Z3foob: +; SECTIONS_CFI: .cfi_offset %rbp, -16 +; SECTIONS_CFI: .cfi_offset [[RA:%r.+]], -56 +; SECTIONS_CFI-NEXT: .cfi_offset [[RB:%r.+]], -48 +; SECTIONS_CFI-NEXT: .cfi_offset [[RC:%r.+]], -40 +; SECTIONS_CFI-NEXT: .cfi_offset [[RD:%r.+]], -32 +; SECTIONS_CFI-NEXT: .cfi_offset [[RE:%r.+]], -24 + +; SECTIONS_CFI: _Z3foob.1: +; SECTIONS_CFI: .cfi_offset %rbp, -16 +; SECTIONS_CFI: .cfi_offset [[RA]], -56 +; SECTIONS_CFI-NEXT: .cfi_offset [[RB]], -48 +; SECTIONS_CFI-NEXT: .cfi_offset [[RC]], -40 +; SECTIONS_CFI-NEXT: .cfi_offset [[RD]], -32 +; SECTIONS_CFI-NEXT: .cfi_offset [[RE]], -24 + +; SECTIONS_CFI: _Z3foob.2: +; SECTIONS_CFI: .cfi_offset %rbp, -16 +; SECTIONS_CFI: .cfi_offset [[RA]], -56 +; SECTIONS_CFI-NEXT: .cfi_offset [[RB]], -48 +; SECTIONS_CFI-NEXT: .cfi_offset [[RC]], -40 +; SECTIONS_CFI-NEXT: .cfi_offset [[RD]], -32 +; SECTIONS_CFI-NEXT: .cfi_offset [[RE]], -24 + + +;; void foo(bool b) { +;; if (b) // adds a basic block +;; // clobber all callee-save registers to force them to be callee-saved and to +;; // be described by cfi_offset directives. +;; asm("nop" ::: "r12", "r13", "r14", "r15", "rbx"); +;; } + +define dso_local void @_Z3foob(i1 zeroext %b) { +entry: + %b.addr = alloca i8, align 1 + %frombool = zext i1 %b to i8 + store i8 %frombool, i8* %b.addr, align 1 + %0 = load i8, i8* %b.addr, align 1 + %tobool = trunc i8 %0 to i1 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + call void asm sideeffect "nop", "~{r12},~{r13},~{r14},~{r15},~{rbx},~{dirflag},~{fpsr},~{flags}"() #1, !srcloc !2 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} +!2 = !{i32 38} diff --git a/llvm/test/CodeGen/X86/cmov-fp.ll b/llvm/test/CodeGen/X86/cmov-fp.ll index 756324bbdfdc9..6bbad427a9b6d 100644 --- a/llvm/test/CodeGen/X86/cmov-fp.ll +++ b/llvm/test/CodeGen/X86/cmov-fp.ll @@ -1056,11 +1056,11 @@ define float @test16(i32 %a, i32 %b, float %x) nounwind { define x86_fp80 @test17(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-LABEL: test17: ; SSE: # %bb.0: -; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: fldt {{[0-9]+}}(%esp) +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: flds {{\.LCPI.*}} ; SSE-NEXT: fxch %st(1) -; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: fcmovnbe %st(1), %st ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl @@ -1109,11 +1109,11 @@ define x86_fp80 @test17(i32 %a, i32 %b, x86_fp80 %x) nounwind { define x86_fp80 @test18(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-LABEL: test18: ; SSE: # %bb.0: -; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: fldt {{[0-9]+}}(%esp) +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: flds {{\.LCPI.*}} ; SSE-NEXT: fxch %st(1) -; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: fcmovnb %st(1), %st ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl @@ -1162,11 +1162,11 @@ define x86_fp80 @test18(i32 %a, i32 %b, x86_fp80 %x) nounwind { define x86_fp80 @test19(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-LABEL: test19: ; SSE: # %bb.0: -; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: fldt {{[0-9]+}}(%esp) +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: flds {{\.LCPI.*}} ; SSE-NEXT: fxch %st(1) -; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: fcmovb %st(1), %st ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl @@ -1215,11 +1215,11 @@ define x86_fp80 @test19(i32 %a, i32 %b, x86_fp80 %x) nounwind { define x86_fp80 @test20(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-LABEL: test20: ; SSE: # %bb.0: -; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: fldt {{[0-9]+}}(%esp) +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: flds {{\.LCPI.*}} ; SSE-NEXT: fxch %st(1) -; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: fcmovbe %st(1), %st ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl @@ -1268,13 +1268,13 @@ define x86_fp80 @test20(i32 %a, i32 %b, x86_fp80 %x) nounwind { define x86_fp80 @test21(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-LABEL: test21: ; SSE: # %bb.0: -; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: fldt {{[0-9]+}}(%esp) -; SSE-NEXT: flds {{\.LCPI.*}} -; SSE-NEXT: fxch %st(1) +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: setg %al ; SSE-NEXT: testb %al, %al +; SSE-NEXT: flds {{\.LCPI.*}} +; SSE-NEXT: fxch %st(1) ; SSE-NEXT: fcmovne %st(1), %st ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl @@ -1328,13 +1328,13 @@ define x86_fp80 @test21(i32 %a, i32 %b, x86_fp80 %x) nounwind { define x86_fp80 @test22(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-LABEL: test22: ; SSE: # %bb.0: -; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: fldt {{[0-9]+}}(%esp) -; SSE-NEXT: flds {{\.LCPI.*}} -; SSE-NEXT: fxch %st(1) +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: setge %al ; SSE-NEXT: testb %al, %al +; SSE-NEXT: flds {{\.LCPI.*}} +; SSE-NEXT: fxch %st(1) ; SSE-NEXT: fcmovne %st(1), %st ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl @@ -1387,13 +1387,13 @@ define x86_fp80 @test22(i32 %a, i32 %b, x86_fp80 %x) nounwind { define x86_fp80 @test23(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-LABEL: test23: ; SSE: # %bb.0: -; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: fldt {{[0-9]+}}(%esp) -; SSE-NEXT: flds {{\.LCPI.*}} -; SSE-NEXT: fxch %st(1) +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: setl %al ; SSE-NEXT: testb %al, %al +; SSE-NEXT: flds {{\.LCPI.*}} +; SSE-NEXT: fxch %st(1) ; SSE-NEXT: fcmovne %st(1), %st ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl @@ -1446,13 +1446,13 @@ define x86_fp80 @test23(i32 %a, i32 %b, x86_fp80 %x) nounwind { define x86_fp80 @test24(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-LABEL: test24: ; SSE: # %bb.0: -; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: fldt {{[0-9]+}}(%esp) -; SSE-NEXT: flds {{\.LCPI.*}} -; SSE-NEXT: fxch %st(1) +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: setle %al ; SSE-NEXT: testb %al, %al +; SSE-NEXT: flds {{\.LCPI.*}} +; SSE-NEXT: fxch %st(1) ; SSE-NEXT: fcmovne %st(1), %st ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl diff --git a/llvm/test/CodeGen/X86/cmp-bool.ll b/llvm/test/CodeGen/X86/cmp-bool.ll new file mode 100644 index 0000000000000..7af03cd7faf4b --- /dev/null +++ b/llvm/test/CodeGen/X86/cmp-bool.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s + +define void @bool_eq(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind { +; CHECK-LABEL: bool_eq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %esi, %edi +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_2: # %if.then +; CHECK-NEXT: jmpq *%rdx # TAILCALL +entry: + %0 = xor i1 %a, %b + br i1 %0, label %if.end, label %if.then + +if.then: + tail call void %c() #1 + br label %if.end + +if.end: + ret void +} + +define void @bool_ne(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind { +; CHECK-LABEL: bool_ne: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpb %sil, %dil +; CHECK-NEXT: je .LBB1_1 +; CHECK-NEXT: # %bb.2: # %if.then +; CHECK-NEXT: jmpq *%rdx # TAILCALL +; CHECK-NEXT: .LBB1_1: # %if.end +; CHECK-NEXT: retq +entry: + %cmp = xor i1 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void %c() #1 + br label %if.end + +if.end: + ret void +} diff --git a/llvm/test/CodeGen/X86/fixup-lea.ll b/llvm/test/CodeGen/X86/fixup-lea.ll index 3f661a8e991e9..35c3976408f64 100644 --- a/llvm/test/CodeGen/X86/fixup-lea.ll +++ b/llvm/test/CodeGen/X86/fixup-lea.ll @@ -109,31 +109,18 @@ for.end: } define void @foo_pgso(i32 inreg %dns) !prof !14 { -; SLOW-LABEL: foo_pgso: -; SLOW: # %bb.0: # %entry -; SLOW-NEXT: xorl %ecx, %ecx -; SLOW-NEXT: decl %ecx -; SLOW-NEXT: .LBB4_1: # %for.body -; SLOW-NEXT: # =>This Inner Loop Header: Depth=1 -; SLOW-NEXT: movzwl %cx, %edx -; SLOW-NEXT: decl %ecx -; SLOW-NEXT: cmpl %eax, %edx -; SLOW-NEXT: jl .LBB4_1 -; SLOW-NEXT: # %bb.2: # %for.end -; SLOW-NEXT: retl -; -; FAST-LABEL: foo_pgso: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xorl %ecx, %ecx -; FAST-NEXT: decl %ecx -; FAST-NEXT: .LBB4_1: # %for.body -; FAST-NEXT: # =>This Inner Loop Header: Depth=1 -; FAST-NEXT: movzwl %cx, %edx -; FAST-NEXT: addl $-1, %ecx -; FAST-NEXT: cmpl %eax, %edx -; FAST-NEXT: jl .LBB4_1 -; FAST-NEXT: # %bb.2: # %for.end -; FAST-NEXT: retl +; CHECK-LABEL: foo_pgso: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: decl %ecx +; CHECK-NEXT: .LBB4_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movzwl %cx, %edx +; CHECK-NEXT: decl %ecx +; CHECK-NEXT: cmpl %eax, %edx +; CHECK-NEXT: jl .LBB4_1 +; CHECK-NEXT: # %bb.2: # %for.end +; CHECK-NEXT: retl entry: br label %for.body @@ -149,31 +136,18 @@ for.end: } define void @bar_pgso(i32 inreg %dns) !prof !14 { -; SLOW-LABEL: bar_pgso: -; SLOW: # %bb.0: # %entry -; SLOW-NEXT: xorl %ecx, %ecx -; SLOW-NEXT: incl %ecx -; SLOW-NEXT: .LBB5_1: # %for.body -; SLOW-NEXT: # =>This Inner Loop Header: Depth=1 -; SLOW-NEXT: movzwl %cx, %edx -; SLOW-NEXT: incl %ecx -; SLOW-NEXT: cmpl %eax, %edx -; SLOW-NEXT: jl .LBB5_1 -; SLOW-NEXT: # %bb.2: # %for.end -; SLOW-NEXT: retl -; -; FAST-LABEL: bar_pgso: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xorl %ecx, %ecx -; FAST-NEXT: incl %ecx -; FAST-NEXT: .LBB5_1: # %for.body -; FAST-NEXT: # =>This Inner Loop Header: Depth=1 -; FAST-NEXT: movzwl %cx, %edx -; FAST-NEXT: addl $1, %ecx -; FAST-NEXT: cmpl %eax, %edx -; FAST-NEXT: jl .LBB5_1 -; FAST-NEXT: # %bb.2: # %for.end -; FAST-NEXT: retl +; CHECK-LABEL: bar_pgso: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: incl %ecx +; CHECK-NEXT: .LBB5_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movzwl %cx, %edx +; CHECK-NEXT: incl %ecx +; CHECK-NEXT: cmpl %eax, %edx +; CHECK-NEXT: jl .LBB5_1 +; CHECK-NEXT: # %bb.2: # %for.end +; CHECK-NEXT: retl entry: br label %for.body diff --git a/llvm/test/CodeGen/X86/fma.ll b/llvm/test/CodeGen/X86/fma.ll index a687bfd43fa63..91ba1c8891409 100644 --- a/llvm/test/CodeGen/X86/fma.ll +++ b/llvm/test/CodeGen/X86/fma.ll @@ -10,7 +10,7 @@ define float @test_f32(float %a, float %b, float %c) #0 { ; FMA32-LABEL: test_f32: -; FMA32: ## %bb.0: ## %entry +; FMA32: ## %bb.0: ; FMA32-NEXT: pushl %eax ## encoding: [0x50] ; FMA32-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] ; FMA32-NEXT: ## xmm0 = mem[0],zero,zero,zero @@ -24,39 +24,92 @@ define float @test_f32(float %a, float %b, float %c) #0 { ; FMA32-NEXT: retl ## encoding: [0xc3] ; ; FMACALL32-LABEL: test_f32: -; FMACALL32: ## %bb.0: ## %entry +; FMACALL32: ## %bb.0: ; FMACALL32-NEXT: jmp _fmaf ## TAILCALL ; FMACALL32-NEXT: ## encoding: [0xeb,A] ; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1 ; ; FMA64-LABEL: test_f32: -; FMA64: ## %bb.0: ## %entry +; FMA64: ## %bb.0: ; FMA64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa9,0xc2] ; FMA64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 ; FMA64-NEXT: retq ## encoding: [0xc3] ; ; FMACALL64-LABEL: test_f32: -; FMACALL64: ## %bb.0: ## %entry +; FMACALL64: ## %bb.0: ; FMACALL64-NEXT: jmp _fmaf ## TAILCALL ; FMACALL64-NEXT: ## encoding: [0xeb,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1 ; ; AVX512-LABEL: test_f32: -; AVX512: ## %bb.0: ## %entry +; AVX512: ## %bb.0: ; AVX512-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2] ; AVX512-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq ## encoding: [0xc3] ; ; AVX512VL-LABEL: test_f32: -; AVX512VL: ## %bb.0: ## %entry +; AVX512VL: ## %bb.0: ; AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2] ; AVX512VL-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512VL-NEXT: retq ## encoding: [0xc3] -entry: %call = call float @llvm.fma.f32(float %a, float %b, float %c) ret float %call } +define float @test_f32_reassoc(float %a, float %b, float %c) #0 { +; FMA32-LABEL: test_f32_reassoc: +; FMA32: ## %bb.0: +; FMA32-NEXT: pushl %eax ## encoding: [0x50] +; FMA32-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] +; FMA32-NEXT: ## xmm0 = mem[0],zero,zero,zero +; FMA32-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] +; FMA32-NEXT: ## xmm1 = mem[0],zero,zero,zero +; FMA32-NEXT: vfmadd213ss {{[0-9]+}}(%esp), %xmm0, %xmm1 ## encoding: [0xc4,0xe2,0x79,0xa9,0x4c,0x24,0x10] +; FMA32-NEXT: ## xmm1 = (xmm0 * xmm1) + mem +; FMA32-NEXT: vmovss %xmm1, (%esp) ## encoding: [0xc5,0xfa,0x11,0x0c,0x24] +; FMA32-NEXT: flds (%esp) ## encoding: [0xd9,0x04,0x24] +; FMA32-NEXT: popl %eax ## encoding: [0x58] +; FMA32-NEXT: retl ## encoding: [0xc3] +; +; FMACALL32-LABEL: test_f32_reassoc: +; FMACALL32: ## %bb.0: +; FMACALL32-NEXT: pushl %eax ## encoding: [0x50] +; FMACALL32-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] +; FMACALL32-NEXT: ## xmm0 = mem[0],zero,zero,zero +; FMACALL32-NEXT: vmulss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x59,0x44,0x24,0x0c] +; FMACALL32-NEXT: vaddss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0x44,0x24,0x10] +; FMACALL32-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] +; FMACALL32-NEXT: flds (%esp) ## encoding: [0xd9,0x04,0x24] +; FMACALL32-NEXT: popl %eax ## encoding: [0x58] +; FMACALL32-NEXT: retl ## encoding: [0xc3] +; +; FMA64-LABEL: test_f32_reassoc: +; FMA64: ## %bb.0: +; FMA64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa9,0xc2] +; FMA64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 +; FMA64-NEXT: retq ## encoding: [0xc3] +; +; FMACALL64-LABEL: test_f32_reassoc: +; FMACALL64: ## %bb.0: +; FMACALL64-NEXT: mulss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x59,0xc1] +; FMACALL64-NEXT: addss %xmm2, %xmm0 ## encoding: [0xf3,0x0f,0x58,0xc2] +; FMACALL64-NEXT: retq ## encoding: [0xc3] +; +; AVX512-LABEL: test_f32_reassoc: +; AVX512: ## %bb.0: +; AVX512-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2] +; AVX512-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 +; AVX512-NEXT: retq ## encoding: [0xc3] +; +; AVX512VL-LABEL: test_f32_reassoc: +; AVX512VL: ## %bb.0: +; AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2] +; AVX512VL-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 +; AVX512VL-NEXT: retq ## encoding: [0xc3] + %call = call reassoc float @llvm.fma.f32(float %a, float %b, float %c) + ret float %call +} + define double @test_f64(double %a, double %b, double %c) #0 { ; FMA32-LABEL: test_f64: ; FMA32: ## %bb.0: ## %entry @@ -198,41 +251,41 @@ entry: define float @test_f32_cst() #0 { ; FMA32-LABEL: test_f32_cst: ; FMA32: ## %bb.0: ## %entry -; FMA32-NEXT: flds LCPI3_0 ## encoding: [0xd9,0x05,A,A,A,A] -; FMA32-NEXT: ## fixup A - offset: 2, value: LCPI3_0, kind: FK_Data_4 +; FMA32-NEXT: flds LCPI4_0 ## encoding: [0xd9,0x05,A,A,A,A] +; FMA32-NEXT: ## fixup A - offset: 2, value: LCPI4_0, kind: FK_Data_4 ; FMA32-NEXT: retl ## encoding: [0xc3] ; ; FMACALL32-LABEL: test_f32_cst: ; FMACALL32: ## %bb.0: ## %entry -; FMACALL32-NEXT: flds LCPI3_0 ## encoding: [0xd9,0x05,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 2, value: LCPI3_0, kind: FK_Data_4 +; FMACALL32-NEXT: flds LCPI4_0 ## encoding: [0xd9,0x05,A,A,A,A] +; FMACALL32-NEXT: ## fixup A - offset: 2, value: LCPI4_0, kind: FK_Data_4 ; FMACALL32-NEXT: retl ## encoding: [0xc3] ; ; FMA64-LABEL: test_f32_cst: ; FMA64: ## %bb.0: ## %entry ; FMA64-NEXT: vmovss {{.*}}(%rip), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A] -; FMA64-NEXT: ## fixup A - offset: 4, value: LCPI3_0-4, kind: reloc_riprel_4byte +; FMA64-NEXT: ## fixup A - offset: 4, value: LCPI4_0-4, kind: reloc_riprel_4byte ; FMA64-NEXT: ## xmm0 = mem[0],zero,zero,zero ; FMA64-NEXT: retq ## encoding: [0xc3] ; ; FMACALL64-LABEL: test_f32_cst: ; FMACALL64: ## %bb.0: ## %entry ; FMACALL64-NEXT: movss {{.*}}(%rip), %xmm0 ## encoding: [0xf3,0x0f,0x10,0x05,A,A,A,A] -; FMACALL64-NEXT: ## fixup A - offset: 4, value: LCPI3_0-4, kind: reloc_riprel_4byte +; FMACALL64-NEXT: ## fixup A - offset: 4, value: LCPI4_0-4, kind: reloc_riprel_4byte ; FMACALL64-NEXT: ## xmm0 = mem[0],zero,zero,zero ; FMACALL64-NEXT: retq ## encoding: [0xc3] ; ; AVX512-LABEL: test_f32_cst: ; AVX512: ## %bb.0: ## %entry ; AVX512-NEXT: vmovss {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A] -; AVX512-NEXT: ## fixup A - offset: 4, value: LCPI3_0-4, kind: reloc_riprel_4byte +; AVX512-NEXT: ## fixup A - offset: 4, value: LCPI4_0-4, kind: reloc_riprel_4byte ; AVX512-NEXT: ## xmm0 = mem[0],zero,zero,zero ; AVX512-NEXT: retq ## encoding: [0xc3] ; ; AVX512VL-LABEL: test_f32_cst: ; AVX512VL: ## %bb.0: ## %entry ; AVX512VL-NEXT: vmovss {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI3_0-4, kind: reloc_riprel_4byte +; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI4_0-4, kind: reloc_riprel_4byte ; AVX512VL-NEXT: ## xmm0 = mem[0],zero,zero,zero ; AVX512VL-NEXT: retq ## encoding: [0xc3] entry: @@ -1373,19 +1426,19 @@ entry: define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 { ; FMA32-LABEL: test_v2f64: -; FMA32: ## %bb.0: ## %entry +; FMA32: ## %bb.0: ; FMA32-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] ; FMA32-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 ; FMA32-NEXT: retl ## encoding: [0xc3] ; ; FMA64-LABEL: test_v2f64: -; FMA64: ## %bb.0: ## %entry +; FMA64: ## %bb.0: ; FMA64-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] ; FMA64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 ; FMA64-NEXT: retq ## encoding: [0xc3] ; ; FMACALL64-LABEL: test_v2f64: -; FMACALL64: ## %bb.0: ## %entry +; FMACALL64: ## %bb.0: ; FMACALL64-NEXT: subq $72, %rsp ## encoding: [0x48,0x83,0xec,0x48] ; FMACALL64-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill ; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x54,0x24,0x20] @@ -1420,19 +1473,19 @@ define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> % ; FMACALL64-NEXT: retq ## encoding: [0xc3] ; ; AVX512-LABEL: test_v2f64: -; AVX512: ## %bb.0: ## %entry +; AVX512: ## %bb.0: ; AVX512-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] ; AVX512-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq ## encoding: [0xc3] ; ; AVX512VL-LABEL: test_v2f64: -; AVX512VL: ## %bb.0: ## %entry +; AVX512VL: ## %bb.0: ; AVX512VL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] ; AVX512VL-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512VL-NEXT: retq ## encoding: [0xc3] ; ; FMACALL32_BDVER2-LABEL: test_v2f64: -; FMACALL32_BDVER2: ## %bb.0: ## %entry +; FMACALL32_BDVER2: ## %bb.0: ; FMACALL32_BDVER2-NEXT: subl $108, %esp ## encoding: [0x83,0xec,0x6c] ; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x50] @@ -1465,11 +1518,50 @@ define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> % ; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1],mem[0,1] ; FMACALL32_BDVER2-NEXT: addl $108, %esp ## encoding: [0x83,0xc4,0x6c] ; FMACALL32_BDVER2-NEXT: retl ## encoding: [0xc3] -entry: %call = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) ret <2 x double> %call } +define <2 x double> @test_v2f64_reassoc(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 { +; FMA32-LABEL: test_v2f64_reassoc: +; FMA32: ## %bb.0: +; FMA32-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] +; FMA32-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 +; FMA32-NEXT: retl ## encoding: [0xc3] +; +; FMACALL32-LABEL: test_v2f64_reassoc: +; FMACALL32: ## %bb.0: +; FMACALL32-NEXT: vmulpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x59,0xc1] +; FMACALL32-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc2] +; FMACALL32-NEXT: retl ## encoding: [0xc3] +; +; FMA64-LABEL: test_v2f64_reassoc: +; FMA64: ## %bb.0: +; FMA64-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] +; FMA64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 +; FMA64-NEXT: retq ## encoding: [0xc3] +; +; FMACALL64-LABEL: test_v2f64_reassoc: +; FMACALL64: ## %bb.0: +; FMACALL64-NEXT: mulpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x59,0xc1] +; FMACALL64-NEXT: addpd %xmm2, %xmm0 ## encoding: [0x66,0x0f,0x58,0xc2] +; FMACALL64-NEXT: retq ## encoding: [0xc3] +; +; AVX512-LABEL: test_v2f64_reassoc: +; AVX512: ## %bb.0: +; AVX512-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] +; AVX512-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 +; AVX512-NEXT: retq ## encoding: [0xc3] +; +; AVX512VL-LABEL: test_v2f64_reassoc: +; AVX512VL: ## %bb.0: +; AVX512VL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] +; AVX512VL-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 +; AVX512VL-NEXT: retq ## encoding: [0xc3] + %call = call reassoc <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) + ret <2 x double> %call +} + define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 { ; FMA32-LABEL: test_v4f64: ; FMA32: ## %bb.0: ## %entry diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll index 3049365b6f328..43b1f4a79aff8 100644 --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -1821,6 +1821,10 @@ define double @fadd_fma_fmul_1(double %a, double %b, double %c, double %d, doubl ret double %a2 } +; Minimum FMF - the 1st fadd is contracted because that combines +; fmul+fadd as specified by the order of operations; the 2nd fadd +; requires reassociation to fuse with c*d. + define float @fadd_fma_fmul_fmf(float %a, float %b, float %c, float %d, float %n0) nounwind { ; FMA-LABEL: fadd_fma_fmul_fmf: ; FMA: # %bb.0: @@ -1846,25 +1850,28 @@ define float @fadd_fma_fmul_fmf(float %a, float %b, float %c, float %d, float %n ret float %a2 } -; Minimum FMF, commute final add operands, change type. +; Not minimum FMF. define float @fadd_fma_fmul_2(float %a, float %b, float %c, float %d, float %n0) nounwind { ; FMA-LABEL: fadd_fma_fmul_2: ; FMA: # %bb.0: -; FMA-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4 -; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; FMA-NEXT: vmulss %xmm3, %xmm2, %xmm2 +; FMA-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 +; FMA-NEXT: vaddss %xmm2, %xmm4, %xmm0 ; FMA-NEXT: retq ; ; FMA4-LABEL: fadd_fma_fmul_2: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddss {{.*#+}} xmm2 = (xmm2 * xmm3) + xmm4 +; FMA4-NEXT: vmulss %xmm3, %xmm2, %xmm2 ; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 +; FMA4-NEXT: vaddss %xmm0, %xmm4, %xmm0 ; FMA4-NEXT: retq ; ; AVX512-LABEL: fadd_fma_fmul_2: ; AVX512: # %bb.0: -; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4 -; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; AVX512-NEXT: vmulss %xmm3, %xmm2, %xmm2 +; AVX512-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 +; AVX512-NEXT: vaddss %xmm2, %xmm4, %xmm0 ; AVX512-NEXT: retq %m1 = fmul float %a, %b %m2 = fmul float %c, %d diff --git a/llvm/test/CodeGen/X86/masked-iv-unsafe.ll b/llvm/test/CodeGen/X86/masked-iv-unsafe.ll index 76f2ad22b44a2..e4c82faa90d8f 100644 --- a/llvm/test/CodeGen/X86/masked-iv-unsafe.ll +++ b/llvm/test/CodeGen/X86/masked-iv-unsafe.ll @@ -402,9 +402,9 @@ return: define void @another_count_down_signed(double* %d, i64 %n) nounwind { ; CHECK-LABEL: another_count_down_signed: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: shlq $24, %rax -; CHECK-NEXT: leaq -10(%rsi), %rcx +; CHECK-NEXT: leaq -10(%rsi), %rax +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: shlq $24, %rcx ; CHECK-NEXT: shlq $8, %rsi ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero @@ -417,17 +417,17 @@ define void @another_count_down_signed(double* %d, i64 %n) nounwind { ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero ; CHECK-NEXT: mulsd %xmm0, %xmm3 ; CHECK-NEXT: movsd %xmm3, (%rdi,%rdx,8) -; CHECK-NEXT: movq %rax, %rdx +; CHECK-NEXT: movq %rcx, %rdx ; CHECK-NEXT: sarq $24, %rdx ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero ; CHECK-NEXT: mulsd %xmm1, %xmm3 ; CHECK-NEXT: movsd %xmm3, (%rdi,%rdx,8) ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero ; CHECK-NEXT: mulsd %xmm2, %xmm3 -; CHECK-NEXT: movsd %xmm3, 80(%rdi,%rcx,8) -; CHECK-NEXT: addq $-16777216, %rax # imm = 0xFF000000 +; CHECK-NEXT: movsd %xmm3, 80(%rdi,%rax,8) +; CHECK-NEXT: addq $-16777216, %rcx # imm = 0xFF000000 ; CHECK-NEXT: addq $-256, %rsi -; CHECK-NEXT: decq %rcx +; CHECK-NEXT: decq %rax ; CHECK-NEXT: jne .LBB7_1 ; CHECK-NEXT: # %bb.2: # %return ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll index c68a3a5fe3246..cc6f3153d2ca1 100644 --- a/llvm/test/CodeGen/X86/popcnt.ll +++ b/llvm/test/CodeGen/X86/popcnt.ll @@ -1034,8 +1034,454 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize { ret i128 %cnt } +define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 { +; X32-LABEL: cnt32_pgso: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: shrl %ecx +; X32-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X32-NEXT: subl %ecx, %eax +; X32-NEXT: movl $858993459, %ecx # imm = 0x33333333 +; X32-NEXT: movl %eax, %edx +; X32-NEXT: andl %ecx, %edx +; X32-NEXT: shrl $2, %eax +; X32-NEXT: andl %ecx, %eax +; X32-NEXT: addl %edx, %eax +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: shrl $4, %ecx +; X32-NEXT: addl %eax, %ecx +; X32-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X32-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101 +; X32-NEXT: shrl $24, %eax +; X32-NEXT: retl +; +; X64-LABEL: cnt32_pgso: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X64-NEXT: subl %eax, %edi +; X64-NEXT: movl $858993459, %eax # imm = 0x33333333 +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: andl %eax, %ecx +; X64-NEXT: shrl $2, %edi +; X64-NEXT: andl %eax, %edi +; X64-NEXT: addl %ecx, %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrl $4, %eax +; X64-NEXT: addl %edi, %eax +; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X64-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 +; X64-NEXT: shrl $24, %eax +; X64-NEXT: retq +; +; X32-POPCNT-LABEL: cnt32_pgso: +; X32-POPCNT: # %bb.0: +; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax +; X32-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: cnt32_pgso: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: popcntl %edi, %eax +; X64-POPCNT-NEXT: retq + %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) + ret i32 %cnt +} + +define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 { +; X32-NOSSE-LABEL: cnt64_pgso: +; X32-NOSSE: # %bb.0: +; X32-NOSSE-NEXT: pushl %ebx +; X32-NOSSE-NEXT: pushl %edi +; X32-NOSSE-NEXT: pushl %esi +; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NOSSE-NEXT: movl %ecx, %edx +; X32-NOSSE-NEXT: shrl %edx +; X32-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555 +; X32-NOSSE-NEXT: andl %esi, %edx +; X32-NOSSE-NEXT: subl %edx, %ecx +; X32-NOSSE-NEXT: movl $858993459, %edx # imm = 0x33333333 +; X32-NOSSE-NEXT: movl %ecx, %edi +; X32-NOSSE-NEXT: andl %edx, %edi +; X32-NOSSE-NEXT: shrl $2, %ecx +; X32-NOSSE-NEXT: andl %edx, %ecx +; X32-NOSSE-NEXT: addl %edi, %ecx +; X32-NOSSE-NEXT: movl %ecx, %edi +; X32-NOSSE-NEXT: shrl $4, %edi +; X32-NOSSE-NEXT: addl %ecx, %edi +; X32-NOSSE-NEXT: movl $252645135, %ecx # imm = 0xF0F0F0F +; X32-NOSSE-NEXT: andl %ecx, %edi +; X32-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101 +; X32-NOSSE-NEXT: shrl $24, %edi +; X32-NOSSE-NEXT: movl %eax, %ebx +; X32-NOSSE-NEXT: shrl %ebx +; X32-NOSSE-NEXT: andl %esi, %ebx +; X32-NOSSE-NEXT: subl %ebx, %eax +; X32-NOSSE-NEXT: movl %eax, %esi +; X32-NOSSE-NEXT: andl %edx, %esi +; X32-NOSSE-NEXT: shrl $2, %eax +; X32-NOSSE-NEXT: andl %edx, %eax +; X32-NOSSE-NEXT: addl %esi, %eax +; X32-NOSSE-NEXT: movl %eax, %edx +; X32-NOSSE-NEXT: shrl $4, %edx +; X32-NOSSE-NEXT: addl %eax, %edx +; X32-NOSSE-NEXT: andl %ecx, %edx +; X32-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101 +; X32-NOSSE-NEXT: shrl $24, %eax +; X32-NOSSE-NEXT: addl %edi, %eax +; X32-NOSSE-NEXT: xorl %edx, %edx +; X32-NOSSE-NEXT: popl %esi +; X32-NOSSE-NEXT: popl %edi +; X32-NOSSE-NEXT: popl %ebx +; X32-NOSSE-NEXT: retl +; +; X64-LABEL: cnt64_pgso: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq %rax +; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; X64-NEXT: andq %rax, %rcx +; X64-NEXT: subq %rcx, %rdi +; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: andq %rax, %rcx +; X64-NEXT: shrq $2, %rdi +; X64-NEXT: andq %rax, %rdi +; X64-NEXT: addq %rcx, %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $4, %rax +; X64-NEXT: addq %rdi, %rax +; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F +; X64-NEXT: andq %rax, %rcx +; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101 +; X64-NEXT: imulq %rcx, %rax +; X64-NEXT: shrq $56, %rax +; X64-NEXT: retq +; +; X32-POPCNT-LABEL: cnt64_pgso: +; X32-POPCNT: # %bb.0: +; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx +; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax +; X32-POPCNT-NEXT: addl %ecx, %eax +; X32-POPCNT-NEXT: xorl %edx, %edx +; X32-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: cnt64_pgso: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: popcntq %rdi, %rax +; X64-POPCNT-NEXT: retq +; +; X32-SSE2-LABEL: cnt64_pgso: +; X32-SSE2: # %bb.0: +; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X32-SSE2-NEXT: psrlw $1, %xmm1 +; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1 +; X32-SSE2-NEXT: psubb %xmm1, %xmm0 +; X32-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] +; X32-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X32-SSE2-NEXT: pand %xmm1, %xmm2 +; X32-SSE2-NEXT: psrlw $2, %xmm0 +; X32-SSE2-NEXT: pand %xmm1, %xmm0 +; X32-SSE2-NEXT: paddb %xmm2, %xmm0 +; X32-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X32-SSE2-NEXT: psrlw $4, %xmm1 +; X32-SSE2-NEXT: paddb %xmm0, %xmm1 +; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1 +; X32-SSE2-NEXT: pxor %xmm0, %xmm0 +; X32-SSE2-NEXT: psadbw %xmm1, %xmm0 +; X32-SSE2-NEXT: movd %xmm0, %eax +; X32-SSE2-NEXT: xorl %edx, %edx +; X32-SSE2-NEXT: retl +; +; X32-SSSE3-LABEL: cnt64_pgso: +; X32-SSSE3: # %bb.0: +; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; X32-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; X32-SSSE3-NEXT: movdqa %xmm1, %xmm2 +; X32-SSSE3-NEXT: pand %xmm0, %xmm2 +; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; X32-SSSE3-NEXT: movdqa %xmm3, %xmm4 +; X32-SSSE3-NEXT: pshufb %xmm2, %xmm4 +; X32-SSSE3-NEXT: psrlw $4, %xmm1 +; X32-SSSE3-NEXT: pand %xmm0, %xmm1 +; X32-SSSE3-NEXT: pshufb %xmm1, %xmm3 +; X32-SSSE3-NEXT: paddb %xmm4, %xmm3 +; X32-SSSE3-NEXT: pxor %xmm0, %xmm0 +; X32-SSSE3-NEXT: psadbw %xmm3, %xmm0 +; X32-SSSE3-NEXT: movd %xmm0, %eax +; X32-SSSE3-NEXT: xorl %edx, %edx +; X32-SSSE3-NEXT: retl + %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) + ret i64 %cnt +} + +define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 { +; X32-NOSSE-LABEL: cnt128_pgso: +; X32-NOSSE: # %bb.0: +; X32-NOSSE-NEXT: pushl %ebp +; X32-NOSSE-NEXT: pushl %ebx +; X32-NOSSE-NEXT: pushl %edi +; X32-NOSSE-NEXT: pushl %esi +; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NOSSE-NEXT: movl %ebx, %ecx +; X32-NOSSE-NEXT: shrl %ecx +; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 +; X32-NOSSE-NEXT: andl %edi, %ecx +; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 +; X32-NOSSE-NEXT: subl %ecx, %ebx +; X32-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333 +; X32-NOSSE-NEXT: movl %ebx, %ebp +; X32-NOSSE-NEXT: andl %ecx, %ebp +; X32-NOSSE-NEXT: shrl $2, %ebx +; X32-NOSSE-NEXT: andl %ecx, %ebx +; X32-NOSSE-NEXT: addl %ebp, %ebx +; X32-NOSSE-NEXT: movl %ebx, %ebp +; X32-NOSSE-NEXT: shrl $4, %ebp +; X32-NOSSE-NEXT: addl %ebx, %ebp +; X32-NOSSE-NEXT: movl %eax, %ebx +; X32-NOSSE-NEXT: shrl %ebx +; X32-NOSSE-NEXT: andl %edi, %ebx +; X32-NOSSE-NEXT: subl %ebx, %eax +; X32-NOSSE-NEXT: movl %eax, %ebx +; X32-NOSSE-NEXT: andl %ecx, %ebx +; X32-NOSSE-NEXT: shrl $2, %eax +; X32-NOSSE-NEXT: andl %ecx, %eax +; X32-NOSSE-NEXT: addl %ebx, %eax +; X32-NOSSE-NEXT: movl %eax, %edi +; X32-NOSSE-NEXT: shrl $4, %edi +; X32-NOSSE-NEXT: addl %eax, %edi +; X32-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F +; X32-NOSSE-NEXT: andl %ebx, %ebp +; X32-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101 +; X32-NOSSE-NEXT: shrl $24, %eax +; X32-NOSSE-NEXT: andl %ebx, %edi +; X32-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101 +; X32-NOSSE-NEXT: shrl $24, %edi +; X32-NOSSE-NEXT: addl %eax, %edi +; X32-NOSSE-NEXT: movl %esi, %eax +; X32-NOSSE-NEXT: shrl %eax +; X32-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555 +; X32-NOSSE-NEXT: andl %ebp, %eax +; X32-NOSSE-NEXT: subl %eax, %esi +; X32-NOSSE-NEXT: movl %esi, %eax +; X32-NOSSE-NEXT: andl %ecx, %eax +; X32-NOSSE-NEXT: shrl $2, %esi +; X32-NOSSE-NEXT: andl %ecx, %esi +; X32-NOSSE-NEXT: addl %eax, %esi +; X32-NOSSE-NEXT: movl %esi, %eax +; X32-NOSSE-NEXT: shrl $4, %eax +; X32-NOSSE-NEXT: addl %esi, %eax +; X32-NOSSE-NEXT: movl %edx, %esi +; X32-NOSSE-NEXT: shrl %esi +; X32-NOSSE-NEXT: andl %ebp, %esi +; X32-NOSSE-NEXT: subl %esi, %edx +; X32-NOSSE-NEXT: movl %edx, %esi +; X32-NOSSE-NEXT: andl %ecx, %esi +; X32-NOSSE-NEXT: shrl $2, %edx +; X32-NOSSE-NEXT: andl %ecx, %edx +; X32-NOSSE-NEXT: addl %esi, %edx +; X32-NOSSE-NEXT: movl %edx, %ecx +; X32-NOSSE-NEXT: shrl $4, %ecx +; X32-NOSSE-NEXT: addl %edx, %ecx +; X32-NOSSE-NEXT: andl %ebx, %eax +; X32-NOSSE-NEXT: andl %ebx, %ecx +; X32-NOSSE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 +; X32-NOSSE-NEXT: shrl $24, %eax +; X32-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101 +; X32-NOSSE-NEXT: shrl $24, %ecx +; X32-NOSSE-NEXT: addl %eax, %ecx +; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NOSSE-NEXT: addl %edi, %ecx +; X32-NOSSE-NEXT: xorl %edx, %edx +; X32-NOSSE-NEXT: movl %edx, 12(%eax) +; X32-NOSSE-NEXT: movl %edx, 8(%eax) +; X32-NOSSE-NEXT: movl %edx, 4(%eax) +; X32-NOSSE-NEXT: movl %ecx, (%eax) +; X32-NOSSE-NEXT: popl %esi +; X32-NOSSE-NEXT: popl %edi +; X32-NOSSE-NEXT: popl %ebx +; X32-NOSSE-NEXT: popl %ebp +; X32-NOSSE-NEXT: retl $4 +; +; X64-LABEL: cnt128_pgso: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: shrq %rax +; X64-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555 +; X64-NEXT: andq %r8, %rax +; X64-NEXT: subq %rax, %rsi +; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: andq %rax, %rcx +; X64-NEXT: shrq $2, %rsi +; X64-NEXT: andq %rax, %rsi +; X64-NEXT: addq %rcx, %rsi +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: shrq $4, %rcx +; X64-NEXT: addq %rsi, %rcx +; X64-NEXT: movabsq $1085102592571150095, %r9 # imm = 0xF0F0F0F0F0F0F0F +; X64-NEXT: andq %r9, %rcx +; X64-NEXT: movabsq $72340172838076673, %rdx # imm = 0x101010101010101 +; X64-NEXT: imulq %rdx, %rcx +; X64-NEXT: shrq $56, %rcx +; X64-NEXT: movq %rdi, %rsi +; X64-NEXT: shrq %rsi +; X64-NEXT: andq %r8, %rsi +; X64-NEXT: subq %rsi, %rdi +; X64-NEXT: movq %rdi, %rsi +; X64-NEXT: andq %rax, %rsi +; X64-NEXT: shrq $2, %rdi +; X64-NEXT: andq %rax, %rdi +; X64-NEXT: addq %rsi, %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $4, %rax +; X64-NEXT: addq %rdi, %rax +; X64-NEXT: andq %r9, %rax +; X64-NEXT: imulq %rdx, %rax +; X64-NEXT: shrq $56, %rax +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: retq +; +; X32-POPCNT-LABEL: cnt128_pgso: +; X32-POPCNT: # %bb.0: +; X32-POPCNT-NEXT: pushl %esi +; X32-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx +; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx +; X32-POPCNT-NEXT: addl %ecx, %edx +; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx +; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi +; X32-POPCNT-NEXT: addl %ecx, %esi +; X32-POPCNT-NEXT: addl %edx, %esi +; X32-POPCNT-NEXT: xorl %ecx, %ecx +; X32-POPCNT-NEXT: movl %ecx, 12(%eax) +; X32-POPCNT-NEXT: movl %ecx, 8(%eax) +; X32-POPCNT-NEXT: movl %ecx, 4(%eax) +; X32-POPCNT-NEXT: movl %esi, (%eax) +; X32-POPCNT-NEXT: popl %esi +; X32-POPCNT-NEXT: retl $4 +; +; X64-POPCNT-LABEL: cnt128_pgso: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: popcntq %rsi, %rcx +; X64-POPCNT-NEXT: popcntq %rdi, %rax +; X64-POPCNT-NEXT: addq %rcx, %rax +; X64-POPCNT-NEXT: xorl %edx, %edx +; X64-POPCNT-NEXT: retq +; +; X32-SSE2-LABEL: cnt128_pgso: +; X32-SSE2: # %bb.0: +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X32-SSE2-NEXT: psrlw $1, %xmm1 +; X32-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85] +; X32-SSE2-NEXT: pand %xmm2, %xmm1 +; X32-SSE2-NEXT: psubb %xmm1, %xmm0 +; X32-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] +; X32-SSE2-NEXT: movdqa %xmm0, %xmm3 +; X32-SSE2-NEXT: pand %xmm1, %xmm3 +; X32-SSE2-NEXT: psrlw $2, %xmm0 +; X32-SSE2-NEXT: pand %xmm1, %xmm0 +; X32-SSE2-NEXT: paddb %xmm3, %xmm0 +; X32-SSE2-NEXT: movdqa %xmm0, %xmm3 +; X32-SSE2-NEXT: psrlw $4, %xmm3 +; X32-SSE2-NEXT: paddb %xmm0, %xmm3 +; X32-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; X32-SSE2-NEXT: pand %xmm0, %xmm3 +; X32-SSE2-NEXT: pxor %xmm4, %xmm4 +; X32-SSE2-NEXT: psadbw %xmm4, %xmm3 +; X32-SSE2-NEXT: movd %xmm3, %ecx +; X32-SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero +; X32-SSE2-NEXT: movdqa %xmm3, %xmm5 +; X32-SSE2-NEXT: psrlw $1, %xmm5 +; X32-SSE2-NEXT: pand %xmm2, %xmm5 +; X32-SSE2-NEXT: psubb %xmm5, %xmm3 +; X32-SSE2-NEXT: movdqa %xmm3, %xmm2 +; X32-SSE2-NEXT: pand %xmm1, %xmm2 +; X32-SSE2-NEXT: psrlw $2, %xmm3 +; X32-SSE2-NEXT: pand %xmm1, %xmm3 +; X32-SSE2-NEXT: paddb %xmm2, %xmm3 +; X32-SSE2-NEXT: movdqa %xmm3, %xmm1 +; X32-SSE2-NEXT: psrlw $4, %xmm1 +; X32-SSE2-NEXT: paddb %xmm3, %xmm1 +; X32-SSE2-NEXT: pand %xmm0, %xmm1 +; X32-SSE2-NEXT: psadbw %xmm4, %xmm1 +; X32-SSE2-NEXT: movd %xmm1, %edx +; X32-SSE2-NEXT: addl %ecx, %edx +; X32-SSE2-NEXT: xorl %ecx, %ecx +; X32-SSE2-NEXT: movl %ecx, 12(%eax) +; X32-SSE2-NEXT: movl %ecx, 8(%eax) +; X32-SSE2-NEXT: movl %ecx, 4(%eax) +; X32-SSE2-NEXT: movl %edx, (%eax) +; X32-SSE2-NEXT: retl $4 +; +; X32-SSSE3-LABEL: cnt128_pgso: +; X32-SSSE3: # %bb.0: +; X32-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; X32-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; X32-SSSE3-NEXT: movdqa %xmm1, %xmm2 +; X32-SSSE3-NEXT: pand %xmm0, %xmm2 +; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; X32-SSSE3-NEXT: movdqa %xmm3, %xmm4 +; X32-SSSE3-NEXT: pshufb %xmm2, %xmm4 +; X32-SSSE3-NEXT: psrlw $4, %xmm1 +; X32-SSSE3-NEXT: pand %xmm0, %xmm1 +; X32-SSSE3-NEXT: movdqa %xmm3, %xmm2 +; X32-SSSE3-NEXT: pshufb %xmm1, %xmm2 +; X32-SSSE3-NEXT: paddb %xmm4, %xmm2 +; X32-SSSE3-NEXT: pxor %xmm1, %xmm1 +; X32-SSSE3-NEXT: psadbw %xmm1, %xmm2 +; X32-SSSE3-NEXT: movd %xmm2, %ecx +; X32-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero +; X32-SSSE3-NEXT: movdqa %xmm2, %xmm4 +; X32-SSSE3-NEXT: pand %xmm0, %xmm4 +; X32-SSSE3-NEXT: movdqa %xmm3, %xmm5 +; X32-SSSE3-NEXT: pshufb %xmm4, %xmm5 +; X32-SSSE3-NEXT: psrlw $4, %xmm2 +; X32-SSSE3-NEXT: pand %xmm0, %xmm2 +; X32-SSSE3-NEXT: pshufb %xmm2, %xmm3 +; X32-SSSE3-NEXT: paddb %xmm5, %xmm3 +; X32-SSSE3-NEXT: psadbw %xmm1, %xmm3 +; X32-SSSE3-NEXT: movd %xmm3, %edx +; X32-SSSE3-NEXT: addl %ecx, %edx +; X32-SSSE3-NEXT: xorl %ecx, %ecx +; X32-SSSE3-NEXT: movl %ecx, 12(%eax) +; X32-SSSE3-NEXT: movl %ecx, 8(%eax) +; X32-SSSE3-NEXT: movl %ecx, 4(%eax) +; X32-SSSE3-NEXT: movl %edx, (%eax) +; X32-SSSE3-NEXT: retl $4 + %cnt = tail call i128 @llvm.ctpop.i128(i128 %x) + ret i128 %cnt +} + declare i8 @llvm.ctpop.i8(i8) nounwind readnone declare i16 @llvm.ctpop.i16(i16) nounwind readnone declare i32 @llvm.ctpop.i32(i32) nounwind readnone declare i64 @llvm.ctpop.i64(i64) nounwind readnone declare i128 @llvm.ctpop.i128(i128) nounwind readnone + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 0} diff --git a/llvm/test/CodeGen/X86/post-ra-sched.ll b/llvm/test/CodeGen/X86/post-ra-sched.ll index f6de77a698835..70882fba50608 100644 --- a/llvm/test/CodeGen/X86/post-ra-sched.ll +++ b/llvm/test/CodeGen/X86/post-ra-sched.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -mtriple=i386 -mcpu=pentium4 | FileCheck %s -; RUN: llc < %s -mtriple=i386 -mcpu=pentium4m | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386 -mcpu=pentium4 | FileCheck %s --check-prefix=PENTIUM4 +; RUN: llc < %s -mtriple=i386 -mcpu=pentium4m | FileCheck %s --check-prefix=PENTIUM4 ; RUN: llc < %s -mtriple=i386 -mcpu=pentium-m | FileCheck %s ; RUN: llc < %s -mtriple=i386 -mcpu=prescott | FileCheck %s ; RUN: llc < %s -mtriple=i386 -mcpu=nocona | FileCheck %s @@ -9,12 +10,26 @@ ; happens during the post-RA-scheduler, which should be enabled by ; default with the above specified cpus. +; Pentium4 is the default 32-bit CPU on Linux and currently has the postRA +; scheduler disabled. Leaving the command lines in place in case we change that. + @ptrs = external global [0 x i32*], align 4 @idxa = common global i32 0, align 4 @idxb = common global i32 0, align 4 @res = common global i32 0, align 4 define void @addindirect() { +; PENTIUM4-LABEL: addindirect: +; PENTIUM4: # %bb.0: # %entry +; PENTIUM4-NEXT: movl idxa, %eax +; PENTIUM4-NEXT: movl ptrs(,%eax,4), %eax +; PENTIUM4-NEXT: movl idxb, %ecx +; PENTIUM4-NEXT: movl ptrs(,%ecx,4), %ecx +; PENTIUM4-NEXT: movl (%ecx), %ecx +; PENTIUM4-NEXT: addl (%eax), %ecx +; PENTIUM4-NEXT: movl %ecx, res +; PENTIUM4-NEXT: retl +; ; CHECK-LABEL: addindirect: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl idxb, %ecx diff --git a/llvm/test/CodeGen/X86/pr27202.ll b/llvm/test/CodeGen/X86/pr27202.ll index bb6be1d1685da..f3b319ead5982 100644 --- a/llvm/test/CodeGen/X86/pr27202.ll +++ b/llvm/test/CodeGen/X86/pr27202.ll @@ -14,6 +14,19 @@ define i1 @foo(i32 %i) optsize { ret i1 %cmp } +define i1 @foo_pgso(i32 %i) !prof !14 { +; CHECK-LABEL: foo_pgso: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $305419896, %eax # imm = 0x12345678 +; CHECK-NEXT: andl %eax, %edi +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %and = and i32 %i, 305419896 + %cmp = icmp eq i32 %and, 305419896 + ret i1 %cmp +} + ; 8-bit ALU immediates probably have small encodings. ; We do not want to hoist the constant into a register here. @@ -52,3 +65,20 @@ define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize { %or4 = or i64 %or, %shl ret i64 %or4 } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 0} diff --git a/llvm/test/CodeGen/X86/pr34088.ll b/llvm/test/CodeGen/X86/pr34088.ll index 6950e50dd7556..a57ff09cc037b 100644 --- a/llvm/test/CodeGen/X86/pr34088.ll +++ b/llvm/test/CodeGen/X86/pr34088.ll @@ -6,7 +6,7 @@ %struct.Buffer = type { i8*, i32 } ; This test checks that the load of store %2 is not dropped. -; +; define i32 @pr34088() local_unnamed_addr { ; CHECK-LABEL: pr34088: ; CHECK: # %bb.0: # %entry @@ -18,13 +18,13 @@ define i32 @pr34088() local_unnamed_addr { ; CHECK-NEXT: andl $-16, %esp ; CHECK-NEXT: subl $32, %esp ; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205] -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: movaps %xmm0, (%esp) ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205] ; CHECK-NEXT: movaps %xmm1, (%esp) +; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: movl %ebp, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: .cfi_def_cfa %esp, 4 diff --git a/llvm/test/CodeGen/X86/pr40539.ll b/llvm/test/CodeGen/X86/pr40539.ll index f2135cd2e73b2..f52fec51203a8 100644 --- a/llvm/test/CodeGen/X86/pr40539.ll +++ b/llvm/test/CodeGen/X86/pr40539.ll @@ -40,7 +40,6 @@ define zeroext i1 @_Z8test_cosv() { ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: divss {{\.LCPI.*}}, %xmm0 ; CHECK-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; CHECK-NEXT: flds {{[0-9]+}}(%esp) @@ -49,6 +48,7 @@ define zeroext i1 @_Z8test_cosv() { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: fstps (%esp) ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: ucomiss %xmm0, %xmm1 ; CHECK-NEXT: setae %cl ; CHECK-NEXT: ucomiss {{\.LCPI.*}}, %xmm0 diff --git a/llvm/test/CodeGen/X86/pr46455.ll b/llvm/test/CodeGen/X86/pr46455.ll new file mode 100644 index 0000000000000..e5ed94aa54934 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr46455.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512vl,avx512bw,avx512dq | FileCheck %s + +define void @EntryModule(i8** %buffer_table) { +; CHECK-LABEL: EntryModule: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 24(%rdi), %rcx +; CHECK-NEXT: vcmpneqps (%rax), %ymm0, %ymm0 +; CHECK-NEXT: vpsrld $31, %xmm0, %xmm1 +; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3] +; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] +; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,3] +; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm1 +; CHECK-NEXT: vpsubd %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vmovd %xmm0, (%rcx) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %i = bitcast i8** %buffer_table to <8 x float>** + %i1 = load <8 x float>*, <8 x float>** %i, align 8 + %i6 = load <8 x float>, <8 x float>* %i1, align 16 + %i7 = fcmp une <8 x float> %i6, zeroinitializer + %i8 = zext <8 x i1> %i7 to <8 x i32> + %i18 = getelementptr inbounds i8*, i8** %buffer_table, i64 3 + %i19 = load i8*, i8** %i18, align 8 + %shift = shufflevector <8 x i32> %i8, <8 x i32> undef, <8 x i32> + %i20 = add nuw nsw <8 x i32> %shift, %i8 + %shift13 = shufflevector <8 x i32> %i8, <8 x i32> undef, <8 x i32> + %i21 = add nuw nsw <8 x i32> %i20, %shift13 + %shift14 = shufflevector <8 x i32> %i8, <8 x i32> undef, <8 x i32> + %i22 = add nuw nsw <8 x i32> %i21, %shift14 + %i23 = extractelement <8 x i32> %i22, i32 0 + %i24 = bitcast i8* %i19 to i32* + store i32 %i23, i32* %i24, align 8 + ret void +} diff --git a/llvm/test/CodeGen/X86/reverse_branches.ll b/llvm/test/CodeGen/X86/reverse_branches.ll index 170fc6a762807..7a9ff8452d1d2 100644 --- a/llvm/test/CodeGen/X86/reverse_branches.ll +++ b/llvm/test/CodeGen/X86/reverse_branches.ll @@ -48,25 +48,25 @@ define i32 @test_branches_order() uwtable ssp { ; CHECK-NEXT: jg LBB0_7 ; CHECK-NEXT: ## %bb.2: ## %for.cond1.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl $-1, %r13d -; CHECK-NEXT: movq %r15, %rbx -; CHECK-NEXT: movq %r14, %rbp +; CHECK-NEXT: movl $-1, %ebp +; CHECK-NEXT: movq %r15, %rdi +; CHECK-NEXT: movq %r14, %rbx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_3: ## %for.cond1 ; CHECK-NEXT: ## Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: incl %r13d -; CHECK-NEXT: cmpl $999, %r13d ## imm = 0x3E7 +; CHECK-NEXT: incl %ebp +; CHECK-NEXT: cmpl $999, %ebp ## imm = 0x3E7 ; CHECK-NEXT: jg LBB0_6 ; CHECK-NEXT: ## %bb.4: ## %for.body3 ; CHECK-NEXT: ## in Loop: Header=BB0_3 Depth=2 -; CHECK-NEXT: addq $1002, %rbp ## imm = 0x3EA -; CHECK-NEXT: movq %rbx, %rdi -; CHECK-NEXT: addq $1001, %rbx ## imm = 0x3E9 +; CHECK-NEXT: addq $1002, %rbx ## imm = 0x3EA +; CHECK-NEXT: leaq 1001(%rdi), %r13 ; CHECK-NEXT: movl $1000, %edx ## imm = 0x3E8 ; CHECK-NEXT: movl $120, %esi ; CHECK-NEXT: callq _memchr -; CHECK-NEXT: cmpq %rax, %rbp +; CHECK-NEXT: cmpq %rax, %rbx +; CHECK-NEXT: movq %r13, %rdi ; CHECK-NEXT: je LBB0_3 ; CHECK-NEXT: jmp LBB0_5 ; CHECK-NEXT: LBB0_7: ## %for.end11 diff --git a/llvm/test/CodeGen/X86/rot16.ll b/llvm/test/CodeGen/X86/rot16.ll index 5a1a8da3c677b..a6adb42242dda 100644 --- a/llvm/test/CodeGen/X86/rot16.ll +++ b/llvm/test/CodeGen/X86/rot16.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X32,BASE32 +; RUN: llc < %s -mtriple=i686-- -mattr=movbe | FileCheck %s --check-prefixes=X32,MOVBE32 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64,BASE64 +; RUN: llc < %s -mtriple=x86_64-- -mattr=movbe | FileCheck %s --check-prefixes=X64,MOVBE64 define i16 @foo(i16 %x, i16 %y, i16 %z) nounwind { ; X32-LABEL: foo: @@ -230,3 +232,103 @@ define i16 @rot16_trunc(i32 %x, i32 %y) nounwind { %t3 = trunc i32 %t2 to i16 ret i16 %t3 } + +define i16 @rotate16(i16 %x) { +; BASE32-LABEL: rotate16: +; BASE32: # %bb.0: +; BASE32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; BASE32-NEXT: rolw $8, %ax +; BASE32-NEXT: retl +; +; MOVBE32-LABEL: rotate16: +; MOVBE32: # %bb.0: +; MOVBE32-NEXT: movbew {{[0-9]+}}(%esp), %ax +; MOVBE32-NEXT: retl +; +; X64-LABEL: rotate16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw $8, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %r = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 8) + ret i16 %r +} + +; TODO: Should this always be rolw with memory operand? + +define void @rotate16_in_place_memory(i8* %p) { +; BASE32-LABEL: rotate16_in_place_memory: +; BASE32: # %bb.0: +; BASE32-NEXT: movl {{[0-9]+}}(%esp), %eax +; BASE32-NEXT: rolw $8, (%eax) +; BASE32-NEXT: retl +; +; MOVBE32-LABEL: rotate16_in_place_memory: +; MOVBE32: # %bb.0: +; MOVBE32-NEXT: movl {{[0-9]+}}(%esp), %eax +; MOVBE32-NEXT: movzwl (%eax), %ecx +; MOVBE32-NEXT: movbew %cx, (%eax) +; MOVBE32-NEXT: retl +; +; BASE64-LABEL: rotate16_in_place_memory: +; BASE64: # %bb.0: +; BASE64-NEXT: rolw $8, (%rdi) +; BASE64-NEXT: retq +; +; MOVBE64-LABEL: rotate16_in_place_memory: +; MOVBE64: # %bb.0: +; MOVBE64-NEXT: movzwl (%rdi), %eax +; MOVBE64-NEXT: movbew %ax, (%rdi) +; MOVBE64-NEXT: retq + %p0 = getelementptr i8, i8* %p, i64 0 + %p1 = getelementptr i8, i8* %p, i64 1 + %i0 = load i8, i8* %p0, align 1 + %i1 = load i8, i8* %p1, align 1 + store i8 %i1, i8* %p0, align 1 + store i8 %i0, i8* %p1, align 1 + ret void +} + +define void @rotate16_memory(i8* %p, i8* %q) { +; BASE32-LABEL: rotate16_memory: +; BASE32: # %bb.0: +; BASE32-NEXT: movl {{[0-9]+}}(%esp), %eax +; BASE32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; BASE32-NEXT: movzwl (%ecx), %ecx +; BASE32-NEXT: rolw $8, %cx +; BASE32-NEXT: movw %cx, (%eax) +; BASE32-NEXT: retl +; +; MOVBE32-LABEL: rotate16_memory: +; MOVBE32: # %bb.0: +; MOVBE32-NEXT: movl {{[0-9]+}}(%esp), %eax +; MOVBE32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; MOVBE32-NEXT: movzwl (%ecx), %ecx +; MOVBE32-NEXT: movbew %cx, (%eax) +; MOVBE32-NEXT: retl +; +; BASE64-LABEL: rotate16_memory: +; BASE64: # %bb.0: +; BASE64-NEXT: movzwl (%rdi), %eax +; BASE64-NEXT: rolw $8, %ax +; BASE64-NEXT: movw %ax, (%rsi) +; BASE64-NEXT: retq +; +; MOVBE64-LABEL: rotate16_memory: +; MOVBE64: # %bb.0: +; MOVBE64-NEXT: movzwl (%rdi), %eax +; MOVBE64-NEXT: movbew %ax, (%rsi) +; MOVBE64-NEXT: retq + %p0 = getelementptr i8, i8* %p, i64 0 + %p1 = getelementptr i8, i8* %p, i64 1 + %q0 = getelementptr i8, i8* %q, i64 0 + %q1 = getelementptr i8, i8* %q, i64 1 + %i0 = load i8, i8* %p0, align 1 + %i1 = load i8, i8* %p1, align 1 + store i8 %i1, i8* %q0, align 1 + store i8 %i0, i8* %q1, align 1 + ret void +} + +declare i16 @llvm.fshl.i16(i16, i16, i16) diff --git a/llvm/test/CodeGen/X86/rotate-extract.ll b/llvm/test/CodeGen/X86/rotate-extract.ll index 9ef29c7883d4d..41003c9d335d0 100644 --- a/llvm/test/CodeGen/X86/rotate-extract.ll +++ b/llvm/test/CodeGen/X86/rotate-extract.ll @@ -306,9 +306,9 @@ define i32 @extract_add_1_comut(i32 %i) nounwind { define i32 @no_extract_add_1(i32 %i) nounwind { ; X86-LABEL: no_extract_add_1: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx), %eax -; X86-NEXT: shrl $27, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax), %ecx +; X86-NEXT: shrl $27, %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll index f2c7c2fa4a564..295fdfb5a2617 100644 --- a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll +++ b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll @@ -3,8 +3,6 @@ ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefix=SLOW ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefix=SLOW ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefix=SLOW -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefix=SLOW -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefix=SLOW ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=SLOW ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=SLOW ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=SLOW @@ -14,6 +12,10 @@ ; Intel chips with fast unaligned memory accesses +; Marked fast because this is the default 32-bit mode CPU in clang. +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefix=FAST + ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefix=FAST diff --git a/llvm/test/CodeGen/X86/statepoint-vreg.mir b/llvm/test/CodeGen/X86/statepoint-vreg.mir new file mode 100644 index 0000000000000..311a71205f2aa --- /dev/null +++ b/llvm/test/CodeGen/X86/statepoint-vreg.mir @@ -0,0 +1,156 @@ +# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +# RUN: llc -o - %s -start-after=finalize-isel | FileCheck %s + +--- | + ; ModuleID = 'test/CodeGen/X86/statepoint-vreg.ll' + source_filename = "test/CodeGen/X86/statepoint-vreg.ll" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-pc-linux-gnu" + + declare void @bar() + + define i32 @test_basic(i32 addrspace(1)* %obj1, i32 addrspace(1)* %obj2) gc "statepoint-example" { + ; CHECK-LABEL: test_basic: + ; CHECK: # %bb.0: + ; CHECK-NEXT: pushq %r14 + ; CHECK-NEXT: .cfi_def_cfa_offset 16 + ; CHECK-NEXT: pushq %rbx + ; CHECK-NEXT: .cfi_def_cfa_offset 24 + ; CHECK-NEXT: pushq %rax + ; CHECK-NEXT: .cfi_def_cfa_offset 32 + ; CHECK-NEXT: .cfi_offset %rbx, -24 + ; CHECK-NEXT: .cfi_offset %r14, -16 + ; CHECK-NEXT: movq %rsi, %r14 + ; CHECK-NEXT: movq %rdi, %rbx + ; CHECK-NEXT: callq bar + ; CHECK-NEXT: .Ltmp0: + ; CHECK-NEXT: movl (%rbx), %eax + ; CHECK-NEXT: addl (%r14), %eax + ; CHECK-NEXT: addq $8, %rsp + ; CHECK-NEXT: .cfi_def_cfa_offset 24 + ; CHECK-NEXT: popq %rbx + ; CHECK-NEXT: .cfi_def_cfa_offset 16 + ; CHECK-NEXT: popq %r14 + ; CHECK-NEXT: .cfi_def_cfa_offset 8 + ; CHECK-NEXT: retq + %token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* %obj1, i32 addrspace(1)* %obj2) ] + %rel1 = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 0, i32 0) ; (%obj1, %obj1) + %rel2 = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 1, i32 1) ; (%obj2, %obj2) + %a = load i32, i32 addrspace(1)* %rel1, align 4 + %b = load i32, i32 addrspace(1)* %rel2, align 4 + %c = add i32 %a, %b + ret i32 %c + } + + ; CHECK-LABEL: __LLVM_StackMaps: + ; CHECK-NEXT: .byte 3 + ; CHECK-NEXT: .byte 0 + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .long 1 + ; CHECK-NEXT: .long 0 + ; CHECK-NEXT: .long 1 + ; CHECK-NEXT: .quad test_basic + ; CHECK-NEXT: .quad 24 + ; CHECK-NEXT: .quad 1 + ; CHECK-NEXT: .quad 2882400000 + ; CHECK-NEXT: .long .Ltmp0-test_basic + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .short 8 + ; CHECK-NEXT: .byte 4 + ; CHECK-NEXT: .byte 0 + ; CHECK-NEXT: .short 8 + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .long 0 + ; CHECK-NEXT: .byte 4 + ; CHECK-NEXT: .byte 0 + ; CHECK-NEXT: .short 8 + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .long 0 + ; CHECK-NEXT: .byte 4 + ; CHECK-NEXT: .byte 0 + ; CHECK-NEXT: .short 8 + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .long 1 + ; CHECK-NEXT: .byte 4 + ; CHECK-NEXT: .byte 0 + ; CHECK-NEXT: .short 8 + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .long 0 + ; CHECK-NEXT: .byte 1 + ; CHECK-NEXT: .byte 0 + ; CHECK-NEXT: .short 8 + ; CHECK-NEXT: .short 14 + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .long 0 + ; CHECK-NEXT: .byte 1 + ; CHECK-NEXT: .byte 0 + ; CHECK-NEXT: .short 8 + ; CHECK-NEXT: .short 14 + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .long 0 + ; CHECK-NEXT: .byte 1 + ; CHECK-NEXT: .byte 0 + ; CHECK-NEXT: .short 8 + ; CHECK-NEXT: .short 3 + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .long 0 + ; CHECK-NEXT: .byte 1 + ; CHECK-NEXT: .byte 0 + ; CHECK-NEXT: .short 8 + ; CHECK-NEXT: .short 3 + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .long 0 + ; CHECK-NEXT: .p2align 3 + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .short 0 + ; CHECK-NEXT: .p2align 3 + + declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 immarg, i32 immarg, void ()*, i32 immarg, i32 immarg, ...) + + ; Function Attrs: nounwind readonly + declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32 immarg, i32 immarg) #0 + + attributes #0 = { nounwind readonly } + attributes #1 = { nounwind } + +... +--- +name: test_basic +alignment: 16 +selected: false +failedISel: false +tracksRegLiveness: true +registers: + - { id: 0, class: gr64, preferred-register: '' } + - { id: 1, class: gr64, preferred-register: '' } + - { id: 2, class: gr64, preferred-register: '' } + - { id: 3, class: gr64, preferred-register: '' } + - { id: 4, class: gr32, preferred-register: '' } + - { id: 5, class: gr32, preferred-register: '' } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } + - { reg: '$rsi', virtual-reg: '%1' } +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $rdi, $rsi + + %1:gr64 = COPY $rsi + %0:gr64 = COPY $rdi + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %2:gr64, %3:gr64 = STATEPOINT 2882400000, 0, 0, @bar, 2, 0, 2, 0, 2, 1, 2, 0, %1, %1(tied-def 0), %0, %0(tied-def 1), csr_64, implicit-def $rsp, implicit-def $ssp + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %4:gr32 = MOV32rm killed %3, 1, $noreg, 0, $noreg :: (load 4 from %ir.rel1, addrspace 1) + %5:gr32 = ADD32rm %4, killed %2, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load 4 from %ir.rel2, addrspace 1) + $eax = COPY %5 + RET 0, $eax + +... diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll index 768684067f32f..60fd01eac0960 100644 --- a/llvm/test/CodeGen/X86/stores-merging.ll +++ b/llvm/test/CodeGen/X86/stores-merging.ll @@ -246,10 +246,7 @@ define void @pr43446_1(i8* %a) { define void @rotate16_in_place(i8* %p) { ; CHECK-LABEL: rotate16_in_place: ; CHECK: # %bb.0: -; CHECK-NEXT: movb (%rdi), %al -; CHECK-NEXT: movb 1(%rdi), %cl -; CHECK-NEXT: movb %cl, (%rdi) -; CHECK-NEXT: movb %al, 1(%rdi) +; CHECK-NEXT: rolw $8, (%rdi) ; CHECK-NEXT: retq %p0 = getelementptr i8, i8* %p, i64 0 %p1 = getelementptr i8, i8* %p, i64 1 @@ -263,10 +260,9 @@ define void @rotate16_in_place(i8* %p) { define void @rotate16(i8* %p, i8* %q) { ; CHECK-LABEL: rotate16: ; CHECK: # %bb.0: -; CHECK-NEXT: movb (%rdi), %al -; CHECK-NEXT: movb 1(%rdi), %cl -; CHECK-NEXT: movb %cl, (%rsi) -; CHECK-NEXT: movb %al, 1(%rsi) +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: rolw $8, %ax +; CHECK-NEXT: movw %ax, (%rsi) ; CHECK-NEXT: retq %p0 = getelementptr i8, i8* %p, i64 0 %p1 = getelementptr i8, i8* %p, i64 1 @@ -282,10 +278,7 @@ define void @rotate16(i8* %p, i8* %q) { define void @rotate32_in_place(i16* %p) { ; CHECK-LABEL: rotate32_in_place: ; CHECK: # %bb.0: -; CHECK-NEXT: movzwl (%rdi), %eax -; CHECK-NEXT: movzwl 2(%rdi), %ecx -; CHECK-NEXT: movw %cx, (%rdi) -; CHECK-NEXT: movw %ax, 2(%rdi) +; CHECK-NEXT: roll $16, (%rdi) ; CHECK-NEXT: retq %p0 = getelementptr i16, i16* %p, i64 0 %p1 = getelementptr i16, i16* %p, i64 1 @@ -299,10 +292,9 @@ define void @rotate32_in_place(i16* %p) { define void @rotate32(i16* %p) { ; CHECK-LABEL: rotate32: ; CHECK: # %bb.0: -; CHECK-NEXT: movzwl (%rdi), %eax -; CHECK-NEXT: movzwl 2(%rdi), %ecx -; CHECK-NEXT: movw %cx, 84(%rdi) -; CHECK-NEXT: movw %ax, 86(%rdi) +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: roll $16, %eax +; CHECK-NEXT: movl %eax, 84(%rdi) ; CHECK-NEXT: retq %p0 = getelementptr i16, i16* %p, i64 0 %p1 = getelementptr i16, i16* %p, i64 1 @@ -318,10 +310,7 @@ define void @rotate32(i16* %p) { define void @rotate64_in_place(i32* %p) { ; CHECK-LABEL: rotate64_in_place: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: movl 4(%rdi), %ecx -; CHECK-NEXT: movl %ecx, (%rdi) -; CHECK-NEXT: movl %eax, 4(%rdi) +; CHECK-NEXT: rolq $32, (%rdi) ; CHECK-NEXT: retq %p0 = getelementptr i32, i32* %p, i64 0 %p1 = getelementptr i32, i32* %p, i64 1 @@ -335,10 +324,9 @@ define void @rotate64_in_place(i32* %p) { define void @rotate64(i32* %p) { ; CHECK-LABEL: rotate64: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: movl 4(%rdi), %ecx -; CHECK-NEXT: movl %ecx, 8(%rdi) -; CHECK-NEXT: movl %eax, 12(%rdi) +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: rolq $32, %rax +; CHECK-NEXT: movq %rax, 8(%rdi) ; CHECK-NEXT: retq %p0 = getelementptr i32, i32* %p, i64 0 %p1 = getelementptr i32, i32* %p, i64 1 @@ -354,10 +342,9 @@ define void @rotate64(i32* %p) { define void @rotate64_iterate(i16* %p) { ; CHECK-LABEL: rotate64_iterate: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: movl 4(%rdi), %ecx -; CHECK-NEXT: movl %ecx, 84(%rdi) -; CHECK-NEXT: movl %eax, 88(%rdi) +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: rolq $32, %rax +; CHECK-NEXT: movq %rax, 84(%rdi) ; CHECK-NEXT: retq %p0 = getelementptr i16, i16* %p, i64 0 %p1 = getelementptr i16, i16* %p, i64 1 @@ -378,6 +365,8 @@ define void @rotate64_iterate(i16* %p) { ret void } +; TODO: recognize this as 2 rotates? + define void @rotate32_consecutive(i16* %p) { ; CHECK-LABEL: rotate32_consecutive: ; CHECK: # %bb.0: @@ -409,17 +398,17 @@ define void @rotate32_consecutive(i16* %p) { ret void } +; Same as above, but now the stores are not all consecutive. + define void @rotate32_twice(i16* %p) { ; CHECK-LABEL: rotate32_twice: ; CHECK: # %bb.0: -; CHECK-NEXT: movzwl (%rdi), %eax -; CHECK-NEXT: movzwl 2(%rdi), %ecx -; CHECK-NEXT: movzwl 4(%rdi), %edx -; CHECK-NEXT: movzwl 6(%rdi), %esi -; CHECK-NEXT: movw %cx, 84(%rdi) -; CHECK-NEXT: movw %ax, 86(%rdi) -; CHECK-NEXT: movw %si, 108(%rdi) -; CHECK-NEXT: movw %dx, 110(%rdi) +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: movl 4(%rdi), %ecx +; CHECK-NEXT: roll $16, %eax +; CHECK-NEXT: roll $16, %ecx +; CHECK-NEXT: movl %eax, 84(%rdi) +; CHECK-NEXT: movl %ecx, 108(%rdi) ; CHECK-NEXT: retq %p0 = getelementptr i16, i16* %p, i64 0 %p1 = getelementptr i16, i16* %p, i64 1 diff --git a/llvm/test/CodeGen/X86/testb-je-fusion.ll b/llvm/test/CodeGen/X86/testb-je-fusion.ll index e631d8993dc82..90e011e08d1fd 100644 --- a/llvm/test/CodeGen/X86/testb-je-fusion.ll +++ b/llvm/test/CodeGen/X86/testb-je-fusion.ll @@ -238,8 +238,8 @@ define i32 @macrofuse_alu_je(i32 %flags, i8* %p) nounwind { ; NOFUSION_MISCHEDPOSTRA-LABEL: macrofuse_alu_je: ; NOFUSION_MISCHEDPOSTRA: # %bb.0: # %entry ; NOFUSION_MISCHEDPOSTRA-NEXT: movl %edi, %eax -; NOFUSION_MISCHEDPOSTRA-NEXT: addl $-512, %eax # imm = 0xFE00 ; NOFUSION_MISCHEDPOSTRA-NEXT: movb $1, (%rsi) +; NOFUSION_MISCHEDPOSTRA-NEXT: addl $-512, %eax # imm = 0xFE00 ; NOFUSION_MISCHEDPOSTRA-NEXT: je .LBB2_2 ; NOFUSION_MISCHEDPOSTRA-NEXT: # %bb.1: # %if.then ; NOFUSION_MISCHEDPOSTRA-NEXT: movl $1, %eax @@ -249,8 +249,8 @@ define i32 @macrofuse_alu_je(i32 %flags, i8* %p) nounwind { ; BRANCHFUSIONONLY_MISCHEDPOSTRA-LABEL: macrofuse_alu_je: ; BRANCHFUSIONONLY_MISCHEDPOSTRA: # %bb.0: # %entry ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT: movl %edi, %eax -; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT: addl $-512, %eax # imm = 0xFE00 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT: movb $1, (%rsi) +; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT: addl $-512, %eax # imm = 0xFE00 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT: je .LBB2_2 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT: # %bb.1: # %if.then ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT: movl $1, %eax @@ -340,8 +340,8 @@ define i32 @macrofuse_dec_je(i32 %flags, i8* %p) nounwind { ; NOFUSION_MISCHEDPOSTRA-LABEL: macrofuse_dec_je: ; NOFUSION_MISCHEDPOSTRA: # %bb.0: # %entry ; NOFUSION_MISCHEDPOSTRA-NEXT: movl %edi, %eax -; NOFUSION_MISCHEDPOSTRA-NEXT: decl %eax ; NOFUSION_MISCHEDPOSTRA-NEXT: movb $1, (%rsi) +; NOFUSION_MISCHEDPOSTRA-NEXT: decl %eax ; NOFUSION_MISCHEDPOSTRA-NEXT: je .LBB3_2 ; NOFUSION_MISCHEDPOSTRA-NEXT: # %bb.1: # %if.then ; NOFUSION_MISCHEDPOSTRA-NEXT: movl $1, %eax @@ -351,8 +351,8 @@ define i32 @macrofuse_dec_je(i32 %flags, i8* %p) nounwind { ; BRANCHFUSIONONLY_MISCHEDPOSTRA-LABEL: macrofuse_dec_je: ; BRANCHFUSIONONLY_MISCHEDPOSTRA: # %bb.0: # %entry ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT: movl %edi, %eax -; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT: decl %eax ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT: movb $1, (%rsi) +; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT: decl %eax ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT: je .LBB3_2 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT: # %bb.1: # %if.then ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT: movl $1, %eax diff --git a/llvm/test/CodeGen/X86/topdepthreduce-postra.mir b/llvm/test/CodeGen/X86/topdepthreduce-postra.mir new file mode 100644 index 0000000000000..7ca826d582b5c --- /dev/null +++ b/llvm/test/CodeGen/X86/topdepthreduce-postra.mir @@ -0,0 +1,16 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64 -enable-post-misched -run-pass=postmisched -o - %s | FileCheck %s +--- +# Check that postmisched's TopDepthReduce heuristic moves the DEC32r later +# because of the dependency on eax +name: test +body: | + bb.0: + ; CHECK-LABEL: name: test + ; CHECK: $eax = MOV32rr killed $edi + ; CHECK: MOV8mi killed renamable $rsi, 1, $noreg, 0, $noreg, 1 :: (store 1) + ; CHECK: renamable $eax = DEC32r killed renamable $eax, implicit-def $eflags + $eax = MOV32rr $edi + renamable $eax = DEC32r killed renamable $eax, implicit-def $eflags + MOV8mi killed renamable $rsi, 1, $noreg, 0, $noreg, 1 :: (store 1) +... diff --git a/llvm/test/CodeGen/X86/twoaddr-lea.ll b/llvm/test/CodeGen/X86/twoaddr-lea.ll index 077cf805bcb15..716d20d63c443 100644 --- a/llvm/test/CodeGen/X86/twoaddr-lea.ll +++ b/llvm/test/CodeGen/X86/twoaddr-lea.ll @@ -68,8 +68,9 @@ bb2: br label %bb6 bb3: -; CHECK: subl %e[[REG0:[a-z0-9]+]], -; CHECK: addq $4, %r[[REG0]] +; CHECK: LBB3_3: +; CHECK: addq $4, %r +; CHECK: subl %e %tmp14 = phi i64 [ %tmp15, %bb5 ], [ 0, %bb1 ] %tmp15 = add nuw i64 %tmp14, 4 %tmp16 = trunc i64 %tmp14 to i32 diff --git a/llvm/test/CodeGen/X86/twoaddr-pass-sink.ll b/llvm/test/CodeGen/X86/twoaddr-pass-sink.ll deleted file mode 100644 index a06eaec894caa..0000000000000 --- a/llvm/test/CodeGen/X86/twoaddr-pass-sink.ll +++ /dev/null @@ -1,30 +0,0 @@ -; REQUIRES: asserts -; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 -stats 2>&1 | grep "Number of 3-address instructions sunk" - -define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind { -entry: - %tmp25 = bitcast <2 x i64> %a1 to <8 x i16> ; <<8 x i16>> [#uses=1] - br label %bb -bb: ; preds = %bb, %entry - %skiplist_addr.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; [#uses=3] - %vYp_addr.0.rec = shl i32 %skiplist_addr.0.rec, 3 ; [#uses=3] - %vDct_addr.0 = getelementptr <2 x i64>, <2 x i64>* %vDct, i32 %vYp_addr.0.rec ; <<2 x i64>*> [#uses=1] - %vYp_addr.0 = getelementptr <2 x i64>, <2 x i64>* %vYp, i32 %vYp_addr.0.rec ; <<2 x i64>*> [#uses=1] - %skiplist_addr.0 = getelementptr i8, i8* %skiplist, i32 %skiplist_addr.0.rec ; [#uses=1] - %vDct_addr.0.sum43 = or i32 %vYp_addr.0.rec, 1 ; [#uses=1] - %tmp7 = getelementptr <2 x i64>, <2 x i64>* %vDct, i32 %vDct_addr.0.sum43 ; <<2 x i64>*> [#uses=1] - %tmp8 = load <2 x i64>, <2 x i64>* %tmp7, align 16 ; <<2 x i64>> [#uses=1] - %tmp11 = load <2 x i64>, <2 x i64>* %vDct_addr.0, align 16 ; <<2 x i64>> [#uses=1] - %tmp13 = bitcast <2 x i64> %tmp8 to <8 x i16> ; <<8 x i16>> [#uses=1] - %tmp15 = bitcast <2 x i64> %tmp11 to <8 x i16> ; <<8 x i16>> [#uses=1] - %tmp16 = shufflevector <8 x i16> %tmp15, <8 x i16> %tmp13, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1] - %tmp26 = mul <8 x i16> %tmp25, %tmp16 ; <<8 x i16>> [#uses=1] - %tmp27 = bitcast <8 x i16> %tmp26 to <2 x i64> ; <<2 x i64>> [#uses=1] - store <2 x i64> %tmp27, <2 x i64>* %vYp_addr.0, align 16 - %tmp37 = load i8, i8* %skiplist_addr.0, align 1 ; [#uses=1] - %tmp38 = icmp eq i8 %tmp37, 0 ; [#uses=1] - %indvar.next = add i32 %skiplist_addr.0.rec, 1 ; [#uses=1] - br i1 %tmp38, label %return, label %bb -return: ; preds = %bb - ret void -} diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll index a5f6be558e8cf..1d596f5db3aeb 100644 --- a/llvm/test/CodeGen/X86/vector-trunc.ll +++ b/llvm/test/CodeGen/X86/vector-trunc.ll @@ -456,7 +456,7 @@ define <8 x i16> @trunc8i32_8i16_lshr(<8 x i32> %a) { ; ; SSSE3-LABEL: trunc8i32_8i16_lshr: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2,3,6,7,10,11,14,15,10,11,14,15,14,15,255,255] +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2,3,6,7,10,11,14,15,10,11,14,15,14,15,128,128] ; SSSE3-NEXT: pshufb %xmm2, %xmm1 ; SSSE3-NEXT: pshufb %xmm2, %xmm0 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] diff --git a/llvm/test/DebugInfo/AArch64/eh-frame.ll b/llvm/test/DebugInfo/AArch64/eh-frame.ll new file mode 100644 index 0000000000000..1becd769d52fc --- /dev/null +++ b/llvm/test/DebugInfo/AArch64/eh-frame.ll @@ -0,0 +1,20 @@ +; RUN: llc -filetype=obj -mtriple=aarch64 %s -o %t.o +; RUN: llvm-readobj -r %t.o | FileCheck %s --check-prefix=REL32 +; RUN: llvm-dwarfdump --eh-frame %t.o 2>&1 | FileCheck %s + +; REL32: R_AARCH64_PREL32 .text 0x0 +; REL32-NEXT: R_AARCH64_PREL32 .text 0x4 + +; CHECK-NOT: warning: +; CHECK: FDE cie=00000000 pc=00000000...00000004 +; CHECK: FDE cie=00000000 pc=00000004...00000008 + +define void @foo() { +entry: + ret void +} + +define void @bar() { +entry: + ret void +} diff --git a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll index d42dcf0dd1742..23a1f56fdf409 100644 --- a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll +++ b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll @@ -40,7 +40,7 @@ ; OBJ: SubSectionType: FrameData (0xF5) ; OBJ: FrameData { ; OBJ: RvaStart: 0x0 -; OBJ: CodeSize: 0x34 +; OBJ: CodeSize: 0x36 ; OBJ: PrologSize: 0x9 ; OBJ: FrameFunc [ ; OBJ-NEXT: $T0 .raSearch = @@ -50,7 +50,7 @@ ; OBJ: } ; OBJ: FrameData { ; OBJ: RvaStart: 0x7 -; OBJ: CodeSize: 0x2D +; OBJ: CodeSize: 0x2F ; OBJ: PrologSize: 0x2 ; OBJ: FrameFunc [ ; OBJ-NEXT: $T0 .raSearch = @@ -61,7 +61,7 @@ ; OBJ: } ; OBJ: FrameData { ; OBJ: RvaStart: 0x8 -; OBJ: CodeSize: 0x2C +; OBJ: CodeSize: 0x2E ; OBJ: PrologSize: 0x1 ; OBJ: FrameFunc [ ; OBJ-NEXT: $T0 .raSearch = @@ -73,7 +73,7 @@ ; OBJ: } ; OBJ: FrameData { ; OBJ: RvaStart: 0x9 -; OBJ: CodeSize: 0x2B +; OBJ: CodeSize: 0x2D ; OBJ: PrologSize: 0x0 ; OBJ: FrameFunc [ ; OBJ-NEXT: $T0 .raSearch = diff --git a/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll b/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll index 26fe7c49e7acf..c604234a60554 100644 --- a/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll +++ b/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll @@ -15,9 +15,9 @@ ; CHECK: subl $20, %esp ; CHECK: .cv_fpo_stackalloc 20 ; CHECK: .cv_fpo_endprologue +; CHECK: movl 28(%esp), %esi ; CHECK: ___security_cookie -; CHECK: movl 28(%esp), %esi ; CHECK: movl %esi, {{[0-9]*}}(%esp) ; CHECK: movl %esi, {{[0-9]*}}(%esp) ; CHECK: movl %esi, {{[0-9]*}}(%esp) @@ -30,7 +30,7 @@ ; CHECK: addl $20, %esp ; CHECK: popl %esi ; CHECK: retl -; CHECK: Ltmp3: +; CHECK: Ltmp2: ; CHECK: .cv_fpo_endproc ; ModuleID = 't.c' diff --git a/llvm/test/DebugInfo/COFF/types-array.ll b/llvm/test/DebugInfo/COFF/types-array.ll index 2962f970aca14..19ddcf9ffe2c9 100644 --- a/llvm/test/DebugInfo/COFF/types-array.ll +++ b/llvm/test/DebugInfo/COFF/types-array.ll @@ -51,7 +51,7 @@ ; CHECK: PtrParent: 0x0 ; CHECK: PtrEnd: 0x0 ; CHECK: PtrNext: 0x0 -; CHECK: CodeSize: 0x39 +; CHECK: CodeSize: 0x2A ; CHECK: DbgStart: 0x0 ; CHECK: DbgEnd: 0x0 ; CHECK: FunctionType: f (0x1002) @@ -73,7 +73,7 @@ ; CHECK: LocalVariableAddrRange { ; CHECK: OffsetStart: .text+0x6 ; CHECK: ISectStart: 0x0 -; CHECK: Range: 0x33 +; CHECK: Range: 0x24 ; CHECK: } ; CHECK: } ; CHECK: ProcEnd { diff --git a/llvm/test/DebugInfo/PowerPC/eh-frame.ll b/llvm/test/DebugInfo/PowerPC/eh-frame.ll new file mode 100644 index 0000000000000..36b1c272f94cc --- /dev/null +++ b/llvm/test/DebugInfo/PowerPC/eh-frame.ll @@ -0,0 +1,38 @@ +; RUN: llc -filetype=obj -mtriple=powerpc %s -o %t32.o +; RUN: llvm-readobj -r %t32.o | FileCheck %s --check-prefix=PPC_REL +; RUN: llvm-dwarfdump --eh-frame %t32.o 2>&1 | FileCheck %s --check-prefix=PPC + +; PPC_REL: R_PPC_REL32 .text 0x0 +; PPC_REL-NEXT: R_PPC_REL32 .text 0x4 + +; PPC-NOT: warning: +; PPC: FDE cie=00000000 pc=00000000...00000004 +; PPC: FDE cie=00000000 pc=00000004...00000008 + +; RUN: llc -filetype=obj -mtriple=ppc64 %s -o %t64.o +; RUN: llvm-readobj -r %t64.o | FileCheck %s --check-prefix=PPC64_REL +; RUN: llvm-dwarfdump --eh-frame %t64.o 2>&1 | FileCheck %s --check-prefix=PPC64 + +; PPC64_REL: R_PPC64_REL32 .text 0x0 +; PPC64_REL-NEXT: R_PPC64_REL32 .text 0x10 + +; PPC64-NOT: warning: +; PPC64: FDE cie=00000000 pc=00000000...00000010 +; PPC64: FDE cie=00000000 pc=00000010...00000020 + +; RUN: llc -filetype=obj -mtriple=ppc64le -code-model=large %s -o %t64l.o +; RUN: llvm-readobj -r %t64l.o | FileCheck %s --check-prefix=PPC64L_REL +; RUN: llvm-dwarfdump --eh-frame %t64l.o 2>&1 | FileCheck %s --check-prefix=PPC64 + +; PPC64L_REL: R_PPC64_REL64 .text 0x0 +; PPC64L_REL-NEXT: R_PPC64_REL64 .text 0x10 + +define void @foo() { +entry: + ret void +} + +define void @bar() { +entry: + ret void +} diff --git a/llvm/test/DebugInfo/X86/dbgloc-insert-extract-val-instrs.ll b/llvm/test/DebugInfo/X86/dbgloc-insert-extract-val-instrs.ll new file mode 100644 index 0000000000000..cbbc830e61ac9 --- /dev/null +++ b/llvm/test/DebugInfo/X86/dbgloc-insert-extract-val-instrs.ll @@ -0,0 +1,57 @@ +;; Check that every instruction inserted by -deadargelim has a debug location. +;; The test was generated by using -debugify option. + +; RUN: opt < %s -deadargelim -S 2>&1 | FileCheck %s + +; CHECK-LABEL: fn +; CHECK: %oldret = extractvalue { i32, i32, i16 } %z, 0, !dbg ![[LOC:.*]] +; CHECK: %newret = insertvalue { i32, i32 } undef, i32 %oldret, 0, !dbg ![[LOC:.*]] +; CHECK: %oldret1 = extractvalue { i32, i32, i16 } %z, 1, !dbg ![[LOC:.*]] +; CHECK: %newret2 = insertvalue { i32, i32 } %newret, i32 %oldret1, 1, !dbg ![[LOC:.*]] + +; CHECK-LABEL: fn1 +; CHECK: %newret = extractvalue { i32, i32 } %ret, 0, !dbg ![[LOC2:.*]] +; CHECK: %oldret = insertvalue { i32, i32, i16 } undef, i32 %newret, 0, !dbg ![[LOC2:.*]] +; CHECK: %newret1 = extractvalue { i32, i32 } %ret, 1, !dbg ![[LOC2:.*]] +; CHECK: %oldret2 = insertvalue { i32, i32, i16 } %oldret, i32 %newret1, 1, !dbg ![[LOC2:.*]] + +; ModuleID = 'test.ll' +source_filename = "test.ll" + +define internal { i32, i32, i16 } @fn() !dbg !6 { + %x = insertvalue { i32, i32, i16 } undef, i32 1, 0, !dbg !8 + %y = insertvalue { i32, i32, i16 } %x, i32 2, 1, !dbg !9 + %z = insertvalue { i32, i32, i16 } %y, i16 3, 2, !dbg !10 + ret { i32, i32, i16 } %z, !dbg !11 +} + +define i32 @fn1() !dbg !12 { + %ret = call { i32, i32, i16 } @fn(), !dbg !13 + %b = extractvalue { i32, i32, i16 } %ret, 0, !dbg !14 + %c = extractvalue { i32, i32, i16 } %ret, 1, !dbg !15 + %d = add i32 %b, %c, !dbg !16 + ret i32 %d, !dbg !17 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "test.ll", directory: "/") +!2 = !{} +!5 = !{i32 2, !"Debug Info Version", i32 3} +!6 = distinct !DISubprogram(name: "fn", linkageName: "fn", scope: null, file: !1, line: 1, type: !7, scopeLine: 1, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 1, column: 1, scope: !6) +!9 = !DILocation(line: 2, column: 1, scope: !6) +!10 = !DILocation(line: 3, column: 1, scope: !6) +!11 = !DILocation(line: 4, column: 1, scope: !6) +!12 = distinct !DISubprogram(name: "fn1", linkageName: "fn1", scope: null, file: !1, line: 5, type: !7, scopeLine: 5, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!13 = !DILocation(line: 5, column: 1, scope: !12) +!14 = !DILocation(line: 6, column: 1, scope: !12) +!15 = !DILocation(line: 7, column: 1, scope: !12) +!16 = !DILocation(line: 8, column: 1, scope: !12) +!17 = !DILocation(line: 9, column: 1, scope: !12) + +; CHECK: ![[LOC]] = !DILocation(line: 4 +; CHECK: ![[LOC2]] = !DILocation(line: 5 diff --git a/llvm/test/DebugInfo/X86/dwarfdump-rnglists-zero-length.s b/llvm/test/DebugInfo/X86/dwarfdump-rnglists-zero-length.s new file mode 100644 index 0000000000000..05f87a1ef513c --- /dev/null +++ b/llvm/test/DebugInfo/X86/dwarfdump-rnglists-zero-length.s @@ -0,0 +1,12 @@ +## The test checks that llvm-dwarfdump can handle a malformed input file without +## crashing. + +# RUN: llvm-mc -triple x86_64 %s -filetype=obj -o %t +# RUN: not llvm-dwarfdump -debug-rnglists %t 2>&1 | FileCheck %s + +# CHECK: error: .debug_rnglists table at offset 0x0 has too small length (0x4) to contain a complete header + +## An assertion used to trigger in the debug build of the DebugInfo/DWARF +## library if the unit length field in a range list table was 0. + .section .debug_rnglists,"",@progbits + .long 0 diff --git a/llvm/test/ExecutionEngine/JITLink/X86/ELF_x86-64_relocations.s b/llvm/test/ExecutionEngine/JITLink/X86/ELF_x86-64_relocations.s index 20b26b1826ba9..0eee9a449cea9 100644 --- a/llvm/test/ExecutionEngine/JITLink/X86/ELF_x86-64_relocations.s +++ b/llvm/test/ExecutionEngine/JITLink/X86/ELF_x86-64_relocations.s @@ -28,6 +28,7 @@ test_pcrel32: .type named_data,@object .data + .globl named_data .p2align 2 named_data: .long 42 diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s index 6d9c26484f8fd..dd3cc455bd349 100644 --- a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s @@ -193,19 +193,19 @@ anon_func_addr_quad: # X86_64_RELOC_SUBTRACTOR Quad/Long in named storage with anonymous minuend # -# jitlink-check: *{8}anon_minuend_quad1 = section_addr(macho_reloc.o, __data) - anon_minuend_quad1 + 2 +# jitlink-check: *{8}anon_minuend_quad1 = section_addr(macho_reloc.o, __data) - anon_minuend_quad1 - 2 # Only the form "B: .quad LA - B + C" is tested. The form "B: .quad B - LA + C" is # invalid because the subtrahend can not be local. .globl anon_minuend_quad1 .p2align 3 anon_minuend_quad1: - .quad Lanon_data - anon_minuend_quad1 + 2 + .quad Lanon_data - anon_minuend_quad1 - 2 -# jitlink-check: *{4}anon_minuend_long1 = (section_addr(macho_reloc.o, __data) - anon_minuend_long1 + 2)[31:0] +# jitlink-check: *{4}anon_minuend_long1 = (section_addr(macho_reloc.o, __data) - anon_minuend_long1 - 2)[31:0] .globl anon_minuend_long1 .p2align 2 anon_minuend_long1: - .long Lanon_data - anon_minuend_long1 + 2 + .long Lanon_data - anon_minuend_long1 - 2 # Check X86_64_RELOC_SUBTRACTOR Quad/Long in named storage with minuend and subtrahend. # Both forms "A: .quad A - B + C" and "A: .quad B - A + C" are tested. diff --git a/llvm/test/Instrumentation/AddressSanitizer/X86/asm_cpuid.ll b/llvm/test/Instrumentation/AddressSanitizer/X86/asm_cpuid.ll index cfa91d4da633d..c842f1963d691 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/X86/asm_cpuid.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/X86/asm_cpuid.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -asan -S -o %t.ll +; RUN: opt < %s -asan -enable-new-pm=0 -S -o %t.ll +; RUN: opt < %s -passes='asan-function-pipeline' -S -o %t.ll ; RUN: FileCheck %s < %t.ll ; RUN: llc < %t.ll | FileCheck %s --check-prefix=ASM diff --git a/llvm/test/Instrumentation/AddressSanitizer/X86/asm_more_registers_than_available.ll b/llvm/test/Instrumentation/AddressSanitizer/X86/asm_more_registers_than_available.ll index 7827f3fbf278a..191917581b748 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/X86/asm_more_registers_than_available.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/X86/asm_more_registers_than_available.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -asan -S -o %t.ll +; RUN: opt < %s -asan -enable-new-pm=0 -S -o %t.ll +; RUN: opt < %s -passes='asan-function-pipeline' -S -o %t.ll ; RUN: FileCheck %s < %t.ll ; Don't do stack malloc on functions containing inline assembly on 64-bit diff --git a/llvm/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll b/llvm/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll index 027148a0acd6d..bf9cc11a2b903 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -asan -asan-module -S | llc -o /dev/null +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | llc -o /dev/null +; RUN: opt < %s -passes='asan-pipeline' -S | llc -o /dev/null ; The bug manifests as a reg alloc failure: ; error: ran out of registers during register allocation ; ModuleID = 'z.o' diff --git a/llvm/test/Instrumentation/AddressSanitizer/adaptive_global_redzones.ll b/llvm/test/Instrumentation/AddressSanitizer/adaptive_global_redzones.ll index 2c8df25e53c1c..749f4ad220158 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/adaptive_global_redzones.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/adaptive_global_redzones.ll @@ -1,5 +1,7 @@ -; RUN: opt < %s -asan -asan-module -S | FileCheck %s -; RUN: opt < %s -asan -asan-module -asan-mapping-scale=5 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-mapping-scale=5 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-mapping-scale=5 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-detect-invalid-pointer-pair.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-detect-invalid-pointer-pair.ll index 3df73e54803ff..5d746c3053c68 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/asan-detect-invalid-pointer-pair.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/asan-detect-invalid-pointer-pair.ll @@ -1,8 +1,14 @@ -; RUN: opt < %s -asan -asan-detect-invalid-pointer-cmp -S \ +; RUN: opt < %s -asan -asan-detect-invalid-pointer-cmp -S -enable-new-pm=0 \ ; RUN: | FileCheck %s --check-prefixes=CMP,NOSUB,ALL -; RUN: opt < %s -asan -asan-detect-invalid-pointer-sub -S \ +; RUN: opt < %s -passes='asan-function-pipeline' -asan-detect-invalid-pointer-cmp -S \ +; RUN: | FileCheck %s --check-prefixes=CMP,NOSUB,ALL +; RUN: opt < %s -asan -asan-detect-invalid-pointer-sub -S -enable-new-pm=0 \ +; RUN: | FileCheck %s --check-prefixes=SUB,NOCMP,ALL +; RUN: opt < %s -passes='asan-function-pipeline' -asan-detect-invalid-pointer-sub -S \ ; RUN: | FileCheck %s --check-prefixes=SUB,NOCMP,ALL -; RUN: opt < %s -asan -asan-detect-invalid-pointer-pair -S \ +; RUN: opt < %s -asan -asan-detect-invalid-pointer-pair -S -enable-new-pm=0 \ +; RUN: | FileCheck %s --check-prefixes=CMP,SUB,ALL +; RUN: opt < %s -passes='asan-function-pipeline' -asan-detect-invalid-pointer-pair -S \ ; RUN: | FileCheck %s --check-prefixes=CMP,SUB,ALL ; Support instrumentation of invalid pointer pair detection. diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll index 1b069255880a9..7a3b69bac2f26 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll @@ -1,10 +1,18 @@ -; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -S \ +; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -S -enable-new-pm=0 \ ; RUN: | FileCheck %s -check-prefix=LOAD -check-prefix=STORE -check-prefix=ALL -; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -S \ +; RUN: opt < %s -passes='asan-function-pipeline' -asan-instrumentation-with-call-threshold=0 -S \ +; RUN: | FileCheck %s -check-prefix=LOAD -check-prefix=STORE -check-prefix=ALL +; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -S -enable-new-pm=0 \ +; RUN: | FileCheck %s -check-prefix=NOLOAD -check-prefix=STORE -check-prefix=ALL +; RUN: opt < %s -passes='asan-function-pipeline' -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -S \ ; RUN: | FileCheck %s -check-prefix=NOLOAD -check-prefix=STORE -check-prefix=ALL -; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-writes=0 -S \ +; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-writes=0 -S -enable-new-pm=0 \ ; RUN: | FileCheck %s -check-prefix=LOAD -check-prefix=NOSTORE -check-prefix=ALL -; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -asan-instrument-writes=0 -S \ +; RUN: opt < %s -passes='asan-function-pipeline' -asan-instrumentation-with-call-threshold=0 -asan-instrument-writes=0 -S \ +; RUN: | FileCheck %s -check-prefix=LOAD -check-prefix=NOSTORE -check-prefix=ALL +; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -asan-instrument-writes=0 -S -enable-new-pm=0 \ +; RUN: | FileCheck %s -check-prefix=NOLOAD -check-prefix=NOSTORE -check-prefix=ALL +; RUN: opt < %s -passes='asan-function-pipeline' -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -asan-instrument-writes=0 -S \ ; RUN: | FileCheck %s -check-prefix=NOLOAD -check-prefix=NOSTORE -check-prefix=ALL ; Support ASan instrumentation for constant-mask llvm.masked.{load,store} diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll index f9cfa7af19f62..39e705354fe71 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -basic-aa -gvn -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -basic-aa -gvn -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s "-passes=function(require,gvn),asan-pipeline" -S | FileCheck %s ; ASAN conflicts with load widening iff the widened load accesses data out of bounds ; (while the original unwidened loads do not). ; https://github.com/google/sanitizers/issues/20#issuecomment-136381262 diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan_address_space_attr.ll b/llvm/test/Instrumentation/AddressSanitizer/asan_address_space_attr.ll index 87d72bbe142f7..35744a810ccc7 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/asan_address_space_attr.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/asan_address_space_attr.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -asan -S | FileCheck %s +; RUN: opt < %s -asan -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-function-pipeline' -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" diff --git a/llvm/test/Instrumentation/AddressSanitizer/basic-msvc64.ll b/llvm/test/Instrumentation/AddressSanitizer/basic-msvc64.ll index 2d59b31ef917b..2019011581de9 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/basic-msvc64.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/basic-msvc64.ll @@ -1,6 +1,7 @@ ; Test basic address sanitizer instrumentation. ; -; RUN: opt -asan -asan-module -S < %s | FileCheck %s +; RUN: opt -asan -asan-module -enable-new-pm=0 -S < %s | FileCheck %s +; RUN: opt -passes='asan-pipeline' -S < %s | FileCheck %s target triple = "x86_64-pc-windows-msvc" ; CHECK: @llvm.global_ctors = {{.*}}@asan.module_ctor diff --git a/llvm/test/Instrumentation/AddressSanitizer/basic-myriad.ll b/llvm/test/Instrumentation/AddressSanitizer/basic-myriad.ll index 9ece86e66f923..fb234ff0014f4 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/basic-myriad.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/basic-myriad.ll @@ -1,6 +1,7 @@ ; Test basic address sanitizer instrumentation for Myriad. ; -; RUN: opt -asan -asan-module -S < %s | FileCheck %s +; RUN: opt -asan -asan-module -enable-new-pm=0 -S < %s | FileCheck %s +; RUN: opt -passes='asan-pipeline' -S < %s | FileCheck %s target triple = "sparc-myriad-rtems" target datalayout = "E-m:e-p:32:32-i64:64-f128:64-n32-S64" diff --git a/llvm/test/Instrumentation/AddressSanitizer/basic.ll b/llvm/test/Instrumentation/AddressSanitizer/basic.ll index 6397338344d6d..32462bb730c6a 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/basic.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/basic.ll @@ -1,11 +1,10 @@ ; Test basic address sanitizer instrumentation. ; -; RUN: opt < %s -asan -asan-module -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s -; RUN: opt < %s -asan -asan-module -asan-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s +; RUN: opt < %s -asan -asan-module -S -enable-new-pm=0 | FileCheck --check-prefixes=CHECK,CHECK-S3 %s +; RUN: opt < %s -asan -asan-module -asan-mapping-scale=5 -S -enable-new-pm=0 | FileCheck --check-prefixes=CHECK,CHECK-S5 %s -; We need the requires since both asan and asan-module require reading module level metadata which is done once by the asan-globals-md analysis -; RUN: opt < %s -passes='require,function(asan),module(asan-module)' -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s -; RUN: opt < %s -passes='require,function(asan),module(asan-module)' -asan-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s +; RUN: opt < %s -passes='asan-pipeline' -asan-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/byval-args.ll b/llvm/test/Instrumentation/AddressSanitizer/byval-args.ll index a070cedca37d9..e2d2464013a06 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/byval-args.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/byval-args.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -asan -S | FileCheck %s +; RUN: opt < %s -asan -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-function-pipeline' -S | FileCheck %s ; Test that for call instructions, the by-value arguments are instrumented. target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Instrumentation/AddressSanitizer/debug-info-alloca.ll b/llvm/test/Instrumentation/AddressSanitizer/debug-info-alloca.ll index 48cda7d7f48c2..2fe3790af558f 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/debug-info-alloca.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/debug-info-alloca.ll @@ -3,7 +3,8 @@ ; first instruction. Breaking on the instrumented function in a debugger ; would then stop at that instruction, before the prologue is finished. -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s ; 1: void f(int *arg) { ; 2: } ; 3: int main(int argc, char **argv) { diff --git a/llvm/test/Instrumentation/AddressSanitizer/debug-info-global-var.ll b/llvm/test/Instrumentation/AddressSanitizer/debug-info-global-var.ll index 959693e086fac..50bba89f88898 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/debug-info-global-var.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/debug-info-global-var.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s source_filename = "version.c" target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.12.0" diff --git a/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll b/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll index c0389daddacd4..ce0126a08c19b 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -asan -asan-module -asan-use-after-return=0 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return=0 -S | FileCheck %s ; Checks that llvm.dbg.declare instructions are updated ; accordingly as we merge allocas. diff --git a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll index 911ef6de32db1..745ef165ecb0a 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll @@ -2,8 +2,10 @@ ; Only first-basic-block allocas are considered stack slots, and moving them ; breaks debug info. -; RUN: opt < %s -asan -asan-module -S | FileCheck %s -; RUN: opt < %s -asan -asan-module -asan-instrument-dynamic-allocas -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrument-dynamic-allocas -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-instrument-dynamic-allocas -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" diff --git a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca2.ll b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca2.ll index b6d393d0c33f0..69a4238342cd5 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca2.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca2.ll @@ -1,7 +1,9 @@ ; Make sure we don't break the IR when moving non-instrumented allocas -; RUN: opt < %s -asan -asan-module -S | FileCheck %s -; RUN: opt < %s -asan -asan-module -asan-instrument-dynamic-allocas -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrument-dynamic-allocas -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-instrument-dynamic-allocas -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll index 7d15cd9537f17..3fc42e256b3bc 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll @@ -1,6 +1,7 @@ ; This test checks that we are not instrumenting unnecessary globals ; (llvm.metadata and other llvm internal globals). -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll index fc0e676ec1391..c946c6d4ac27d 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll @@ -1,6 +1,7 @@ ; This test checks that we are not instrumenting unnecessary globals ; (llvm.metadata, init_array sections, and other llvm internal globals). -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll index cff83ab718bbb..f93d3e2ff9223 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll @@ -1,6 +1,7 @@ ; This test checks that we are not instrumenting globals ; that we created ourselves. -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-profiling-globals.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-profiling-globals.ll index f20977fc98831..2d0b952ece2d4 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-profiling-globals.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-profiling-globals.ll @@ -1,5 +1,6 @@ ; This test checks that we don't instrument globals created by profiling passes. -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s @__profc_test = private global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8 @__llvm_gcov_ctr = internal global [1 x i64] zeroinitializer diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll index 68913d321fe15..05b701731dff5 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -asan -asan-module -asan-instrument-dynamic-allocas -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrument-dynamic-allocas -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-instrument-dynamic-allocas -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-sanitizers.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-sanitizers.ll index b4407a8fc6df8..81b7ef7e0a37d 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-sanitizers.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-sanitizers.ll @@ -1,5 +1,6 @@ ; This test checks that we are not instrumenting sanitizer code. -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll index 24141ee2190c7..545adcc0d7c0c 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll @@ -1,6 +1,7 @@ ; This test checks that we instrument regular globals, but do not touch ; the COMDAT ones. -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32" target triple = "i686-pc-windows-msvc" ; no action should be taken for these globals diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll index bdcd6595a0f61..8379f634b2e13 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll @@ -1,6 +1,7 @@ ; This test checks that we instrument regular globals, but do not touch ; the linkonce_odr ones. -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" ; no action should be taken for these globals diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll index f863f44d51256..9b222452e1c5e 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" ; no action should be taken for thread locals diff --git a/llvm/test/Instrumentation/AddressSanitizer/dynamic-shadow-darwin.ll b/llvm/test/Instrumentation/AddressSanitizer/dynamic-shadow-darwin.ll index 391693c2c6f62..0e6b2c45d5108 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/dynamic-shadow-darwin.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/dynamic-shadow-darwin.ll @@ -1,15 +1,26 @@ ; Test using dynamic shadow address on darwin ; -; RUN: opt -asan -asan-module -mtriple=arm64_32-apple-watchos --data-layout="e-m:o-p:32:32-i64:64-i128:128-n32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32 -; RUN: opt -asan -asan-module -mtriple=armv7k-apple-watchos --data-layout="e-m:o-p:32:32-Fi8-i64:64-a:0:32-n32-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32 -; RUN: opt -asan -asan-module -mtriple=arm64-apple-ios --data-layout="e-m:o-i64:64-i128:128-n32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64 -; RUN: opt -asan -asan-module -mtriple=armv7s-apple-ios --data-layout="e-m:o-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32 -; RUN: opt -asan -asan-module -mtriple=i386-apple-watchos-simulator --data-layout="e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32 -; RUN: opt -asan -asan-module -mtriple=i386-apple-ios-simulator --data-layout="e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32 -; RUN: opt -asan -asan-module -mtriple=x86_64-apple-ios-simulator --data-layout="e-m:o-i64:64-f80:128-n8:16:32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64 +; RUN: opt -asan -asan-module -mtriple=arm64_32-apple-watchos --data-layout="e-m:o-p:32:32-i64:64-i128:128-n32:64-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32 +; RUN: opt -passes='asan-pipeline' -mtriple=arm64_32-apple-watchos --data-layout="e-m:o-p:32:32-i64:64-i128:128-n32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32 +; RUN: opt -asan -asan-module -mtriple=armv7k-apple-watchos --data-layout="e-m:o-p:32:32-Fi8-i64:64-a:0:32-n32-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32 +; RUN: opt -passes='asan-pipeline' -mtriple=armv7k-apple-watchos --data-layout="e-m:o-p:32:32-Fi8-i64:64-a:0:32-n32-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32 +; RUN: opt -asan -asan-module -mtriple=arm64-apple-ios --data-layout="e-m:o-i64:64-i128:128-n32:64-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64 +; RUN: opt -passes='asan-pipeline' -mtriple=arm64-apple-ios --data-layout="e-m:o-i64:64-i128:128-n32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64 +; RUN: opt -asan -asan-module -mtriple=armv7s-apple-ios --data-layout="e-m:o-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32 +; RUN: opt -passes='asan-pipeline' -mtriple=armv7s-apple-ios --data-layout="e-m:o-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32 +; RUN: opt -asan -asan-module -mtriple=i386-apple-watchos-simulator --data-layout="e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32 +; RUN: opt -passes='asan-pipeline' -mtriple=i386-apple-watchos-simulator --data-layout="e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32 +; RUN: opt -asan -asan-module -mtriple=i386-apple-ios-simulator --data-layout="e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32 +; RUN: opt -passes='asan-pipeline' -mtriple=i386-apple-ios-simulator --data-layout="e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32 +; RUN: opt -asan -asan-module -mtriple=x86_64-apple-ios-simulator --data-layout="e-m:o-i64:64-f80:128-n8:16:32:64-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64 +; RUN: opt -passes='asan-pipeline' -mtriple=x86_64-apple-ios-simulator --data-layout="e-m:o-i64:64-f80:128-n8:16:32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64 ; -; // macOS does not use dynamic shadow placement -; RUN: opt -asan -asan-module -mtriple=x86_64-apple-macosx --data-layout="e-m:o-i64:64-f80:128-n8:16:32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NONDYNAMIC -DPTR_SIZE=64 +; // macOS does not use dynamic shadow placement on x86_64 +; RUN: opt -asan -asan-module -mtriple=x86_64-apple-macosx --data-layout="e-m:o-i64:64-f80:128-n8:16:32:64-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-NONDYNAMIC -DPTR_SIZE=64 +; RUN: opt -passes='asan-pipeline' -mtriple=x86_64-apple-macosx --data-layout="e-m:o-i64:64-f80:128-n8:16:32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NONDYNAMIC -DPTR_SIZE=64 +; // macOS does use dynamic shadow placement on arm64 +; RUN: opt -asan -asan-module -mtriple=arm64-apple-macosx --data-layout="e-m:o-i64:64-i128:128-n32:64-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64 +; RUN: opt -passes='asan-pipeline' -mtriple=arm64-apple-macosx --data-layout="e-m:o-i64:64-i128:128-n32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64 define i32 @test_load(i32* %a) sanitize_address { ; First instrumentation in the function must be to load the dynamic shadow diff --git a/llvm/test/Instrumentation/AddressSanitizer/experiment-call.ll b/llvm/test/Instrumentation/AddressSanitizer/experiment-call.ll index 0e339cc8041a3..4215d54574132 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/experiment-call.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/experiment-call.ll @@ -1,6 +1,7 @@ ; Test optimization experiments. ; -asan-force-experiment flag turns all memory accesses into experiments. -; RUN: opt < %s -asan -asan-module -asan-force-experiment=42 -asan-instrumentation-with-call-threshold=0 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-force-experiment=42 -asan-instrumentation-with-call-threshold=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-force-experiment=42 -asan-instrumentation-with-call-threshold=0 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/experiment.ll b/llvm/test/Instrumentation/AddressSanitizer/experiment.ll index aaa125f5d4086..274e0fdd136c3 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/experiment.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/experiment.ll @@ -1,6 +1,7 @@ ; Test optimization experiments. ; -asan-force-experiment flag turns all memory accesses into experiments. -; RUN: opt < %s -asan -asan-module -asan-force-experiment=42 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-force-experiment=42 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-force-experiment=42 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/force-dynamic-shadow.ll b/llvm/test/Instrumentation/AddressSanitizer/force-dynamic-shadow.ll index e364c23253f2f..70927ec2f0f75 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/force-dynamic-shadow.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/force-dynamic-shadow.ll @@ -1,7 +1,9 @@ ; Test -asan-force-dynamic-shadow flag. ; -; RUN: opt -asan -asan-module -S -asan-force-dynamic-shadow=1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FDS -; RUN: opt -asan -asan-module -S -asan-force-dynamic-shadow=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NDS +; RUN: opt -asan -asan-module -enable-new-pm=0 -S -asan-force-dynamic-shadow=1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FDS +; RUN: opt -passes='asan-pipeline' -S -asan-force-dynamic-shadow=1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FDS +; RUN: opt -asan -asan-module -enable-new-pm=0 -S -asan-force-dynamic-shadow=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NDS +; RUN: opt -passes='asan-pipeline' -S -asan-force-dynamic-shadow=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NDS target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/freebsd.ll b/llvm/test/Instrumentation/AddressSanitizer/freebsd.ll index 3fbbfa3cb1ea6..b9ec93ee266e0 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/freebsd.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/freebsd.ll @@ -1,14 +1,29 @@ -; RUN: opt < %s -asan -asan-module -S \ +; RUN: opt < %s -asan -asan-module -S -enable-new-pm=0 \ ; RUN: -mtriple=i386-unknown-freebsd \ ; RUN: -data-layout="e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" | \ ; RUN: FileCheck --check-prefix=CHECK-32 %s -; RUN: opt < %s -asan -asan-module -S \ +; RUN: opt < %s -passes='asan-pipeline' -S \ +; RUN: -mtriple=i386-unknown-freebsd \ +; RUN: -data-layout="e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" | \ +; RUN: FileCheck --check-prefix=CHECK-32 %s + +; RUN: opt < %s -asan -asan-module -S -enable-new-pm=0 \ +; RUN: -mtriple=x86_64-unknown-freebsd \ +; RUN: -data-layout="e-m:e-i64:64-f80:128-n8:16:32:64-S128" | \ +; RUN: FileCheck --check-prefix=CHECK-64 %s + +; RUN: opt < %s -passes='asan-pipeline' -S \ ; RUN: -mtriple=x86_64-unknown-freebsd \ ; RUN: -data-layout="e-m:e-i64:64-f80:128-n8:16:32:64-S128" | \ ; RUN: FileCheck --check-prefix=CHECK-64 %s -; RUN: opt < %s -asan -asan-module -S \ +; RUN: opt < %s -asan -asan-module -S -enable-new-pm=0 \ +; RUN: -mtriple=mips64-unknown-freebsd \ +; RUN: -data-layout="E-m:e-i64:64-n32:64-S128" | \ +; RUN: FileCheck --check-prefix=CHECK-MIPS64 %s + +; RUN: opt < %s -passes='asan-pipeline' -S \ ; RUN: -mtriple=mips64-unknown-freebsd \ ; RUN: -data-layout="E-m:e-i64:64-n32:64-S128" | \ ; RUN: FileCheck --check-prefix=CHECK-MIPS64 %s diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_addrspace.ll b/llvm/test/Instrumentation/AddressSanitizer/global_addrspace.ll index 19b76e6512511..fbc7ae1f09a78 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/global_addrspace.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/global_addrspace.ll @@ -1,7 +1,8 @@ ; Only verify that asan don't crash on global variables of different ; address space. The global variable should be unmodified by asan. -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_cstring_darwin.ll b/llvm/test/Instrumentation/AddressSanitizer/global_cstring_darwin.ll index 0fc3205ba492c..71d83eccb334f 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/global_cstring_darwin.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/global_cstring_darwin.ll @@ -1,5 +1,6 @@ ; This test checks that instrumented global C (null terminated) strings are put into a special section on Darwin. -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_lto_merge.ll b/llvm/test/Instrumentation/AddressSanitizer/global_lto_merge.ll index aa02553f4416a..304dfb6d0496e 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/global_lto_merge.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/global_lto_merge.ll @@ -1,5 +1,7 @@ -; RUN: opt < %s -asan -asan-module -S | FileCheck %s -; RUN: opt < %s -asan -asan-module -constmerge -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -constmerge -S | FileCheck %s +; RUN: opt < %s "-passes=asan-pipeline,constmerge" -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll index 4a6f426443769..25033599b62d6 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll @@ -1,5 +1,7 @@ -; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -S | FileCheck %s -; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -asan-mapping-scale=5 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=1 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=1 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=1 -asan-mapping-scale=5 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=1 -asan-mapping-scale=5 -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -28,10 +30,6 @@ target triple = "x86_64-unknown-linux-gnu" ; during LTO. ; CHECK: @llvm.compiler.used {{.*}} @__asan_global_global {{.*}} section "llvm.metadata" -; Check that start and stop symbols will be accessed as dso_local. -; CHECK: @__start_asan_globals = external hidden global i64 -; CHECK: @__stop_asan_globals = external hidden global i64 - ; Check that location descriptors and global names were passed into __asan_register_globals: ; CHECK: call void @__asan_register_elf_globals(i64 ptrtoint (i64* @___asan_globals_registered to i64), i64 ptrtoint (i64* @__start_asan_globals to i64), i64 ptrtoint (i64* @__stop_asan_globals to i64)) diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_array.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_array.ll index b1a600f3ceb8d..f5b9e4c2408dd 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_array.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_array.ll @@ -1,7 +1,11 @@ -; RUN: opt < %s -asan -asan-module -asan-globals-live-support=0 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s -; RUN: opt < %s -asan -asan-module -asan-globals-live-support=0 -mtriple=x86_64-apple-macosx10.11.0 -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s -; RUN: opt < %s -asan -asan-module -asan-globals-live-support=0 -mtriple=x86_64-pc-windows-msvc19.0.24215 -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s -; RUN: opt < %s -asan -asan-module -asan-globals-live-support=0 -asan-mapping-scale=5 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=0 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s +; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=0 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=0 -mtriple=x86_64-apple-macosx10.11.0 -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s +; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=0 -mtriple=x86_64-apple-macosx10.11.0 -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=0 -mtriple=x86_64-pc-windows-msvc19.0.24215 -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s +; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=0 -mtriple=x86_64-pc-windows-msvc19.0.24215 -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=0 -asan-mapping-scale=5 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s +; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=0 -asan-mapping-scale=5 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_bitcasts.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_bitcasts.ll index 324a04e3b8321..3b4c8444feaf0 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_bitcasts.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_bitcasts.ll @@ -1,7 +1,8 @@ ; Test that the compiler doesn't crash when the llvm.asan.globals containts ; an entry that points to a BitCast instruction. -; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -S +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=1 -S +; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=1 -S target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll index 1723b33636226..2790ff6fc7499 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll @@ -2,7 +2,8 @@ ; allowing dead stripping to be performed, and that the appropriate runtime ; routines are invoked. -; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=1 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=1 -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_external_comdat.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_external_comdat.ll index 29725adcd039a..30f2f12f33512 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_external_comdat.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_external_comdat.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -mtriple=x86_64-linux -asan -asan-module -asan-globals-live-support=0 -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-linux -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=0 -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-linux -passes='asan-pipeline' -asan-globals-live-support=0 -S | FileCheck %s $my_var = comdat any diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_windows.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_windows.ll index 744366e4c3766..628f32df36219 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_windows.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_windows.ll @@ -4,7 +4,8 @@ ; FIXME: Later we can use this to instrument linkonce odr string literals. -; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=1 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=1 -S | FileCheck %s target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc19.0.24215" diff --git a/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll b/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll index 1414b2122d983..821bfc86a0e5b 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -asan -asan-module -asan-use-after-return -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return -S | FileCheck %s ; Source (-O0 -fsanitize=address -fsanitize-address-use-after-scope): ;; struct S { int x, y; }; diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll index 6fc52bb66ded4..434e4be4e8e6d 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll @@ -1,7 +1,8 @@ ; Test asan internal compiler flags: ; -asan-instrument-dynamic-allocas -; RUN: opt < %s -asan -asan-module -asan-instrument-dynamic-allocas -S | FileCheck %s --check-prefix=CHECK-ALLOCA +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrument-dynamic-allocas -S | FileCheck %s --check-prefix=CHECK-ALLOCA +; RUN: opt < %s -passes='asan-pipeline' -asan-instrument-dynamic-allocas -S | FileCheck %s --check-prefix=CHECK-ALLOCA target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument-no-return.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument-no-return.ll index 22ee66301de25..b255a15411ed1 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/instrument-no-return.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/instrument-no-return.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -asan -S | FileCheck %s +; RUN: opt < %s -asan -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-function-pipeline' -S | FileCheck %s ; AddressSanitizer must insert __asan_handle_no_return ; before noreturn calls that aren't inserted by sanitizers. diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument-stack.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument-stack.ll index f33481112f872..e563f702749b8 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/instrument-stack.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/instrument-stack.ll @@ -1,6 +1,8 @@ ; This test checks that we are not instrumenting direct inbound stack accesses. -; RUN: opt < %s -asan -asan-module -asan-opt-stack -S | FileCheck %s -; RUN: opt < %s -asan -asan-module -asan-opt-stack -asan-mapping-scale=5 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-opt-stack -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-opt-stack -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-opt-stack -asan-mapping-scale=5 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-opt-stack -asan-mapping-scale=5 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument_global.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument_global.ll index 5631572d93da2..18c40a503b5c5 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/instrument_global.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/instrument_global.ll @@ -1,5 +1,7 @@ -; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -S | FileCheck %s -; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -asan-mapping-scale=5 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=1 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=1 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=1 -asan-mapping-scale=5 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=1 -asan-mapping-scale=5 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" @xxx = global i32 0, align 4 diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll index d392662efc711..ae043dc0c2c59 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll @@ -1,5 +1,7 @@ -; RUN: opt < %s -asan -asan-module -S | FileCheck %s -; RUN: opt < %s -asan -asan-module -asan-mapping-scale=5 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-mapping-scale=5 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-mapping-scale=5 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" @xxx = internal global i32 0, align 4 ; With dynamic initializer. diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll index 8341697ff48c9..ea350dab4e3b8 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll @@ -1,6 +1,8 @@ ; Test that AddressSanitizer instruments "(*a)++" only once. -; RUN: opt < %s -asan -asan-module -S -asan-opt=1 | FileCheck %s -check-prefix=OPT1 -; RUN: opt < %s -asan -asan-module -S -asan-opt=0 | FileCheck %s -check-prefix=OPT0 +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S -asan-opt=1 | FileCheck %s -check-prefix=OPT1 +; RUN: opt < %s -passes='asan-pipeline' -S -asan-opt=1 | FileCheck %s -check-prefix=OPT1 +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S -asan-opt=0 | FileCheck %s -check-prefix=OPT0 +; RUN: opt < %s -passes='asan-pipeline' -S -asan-opt=0 | FileCheck %s -check-prefix=OPT0 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll b/llvm/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll index 8e0275d2c17de..82a61aabea4d9 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll @@ -2,11 +2,16 @@ ; -asan-instrumentation-with-call-threshold ; -asan-memory-access-callback-prefix -; RUN: opt < %s -asan -asan-module -asan-instrumentation-with-call-threshold=1 -S | FileCheck %s --check-prefix=CHECK-CALL -; RUN: opt < %s -asan -asan-module -asan-instrumentation-with-call-threshold=0 -S | FileCheck %s --check-prefix=CHECK-CALL -; RUN: opt < %s -asan -asan-module -asan-instrumentation-with-call-threshold=0 -asan-memory-access-callback-prefix=__foo_ -S | FileCheck %s --check-prefix=CHECK-CUSTOM-PREFIX -; RUN: opt < %s -asan -asan-module -asan-instrumentation-with-call-threshold=5 -S | FileCheck %s --check-prefix=CHECK-INLINE -; RUN: opt < %s -asan -asan-module -S | FileCheck %s --check-prefix=CHECK-INLINE +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrumentation-with-call-threshold=1 -S | FileCheck %s --check-prefix=CHECK-CALL +; RUN: opt < %s -passes='asan-pipeline' -asan-instrumentation-with-call-threshold=1 -S | FileCheck %s --check-prefix=CHECK-CALL +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrumentation-with-call-threshold=0 -S | FileCheck %s --check-prefix=CHECK-CALL +; RUN: opt < %s -passes='asan-pipeline' -asan-instrumentation-with-call-threshold=0 -S | FileCheck %s --check-prefix=CHECK-CALL +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrumentation-with-call-threshold=0 -asan-memory-access-callback-prefix=__foo_ -S | FileCheck %s --check-prefix=CHECK-CUSTOM-PREFIX +; RUN: opt < %s -passes='asan-pipeline' -asan-instrumentation-with-call-threshold=0 -asan-memory-access-callback-prefix=__foo_ -S | FileCheck %s --check-prefix=CHECK-CUSTOM-PREFIX +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrumentation-with-call-threshold=5 -S | FileCheck %s --check-prefix=CHECK-INLINE +; RUN: opt < %s -passes='asan-pipeline' -asan-instrumentation-with-call-threshold=5 -S | FileCheck %s --check-prefix=CHECK-INLINE +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK-INLINE +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s --check-prefix=CHECK-INLINE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/keep_going.ll b/llvm/test/Instrumentation/AddressSanitizer/keep_going.ll index 4bb59e74e8f14..7a3fbc39ff271 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/keep_going.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/keep_going.ll @@ -1,7 +1,8 @@ ; Test asan internal compiler flags: ; -asan-recover=1 -; RUN: opt < %s -asan -asan-recover -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -enable-new-pm=0 -asan-recover -asan-module -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-recover -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/lifetime-throw.ll b/llvm/test/Instrumentation/AddressSanitizer/lifetime-throw.ll index ff03d10c7c5d7..64473fb59f76e 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/lifetime-throw.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/lifetime-throw.ll @@ -1,5 +1,6 @@ ; Test handling of llvm.lifetime intrinsics with C++ exceptions. -; RUN: opt < %s -asan -asan-module -asan-use-after-scope -asan-use-after-return=0 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-scope -asan-use-after-return=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-scope -asan-use-after-return=0 -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/lifetime-uar-uas.ll b/llvm/test/Instrumentation/AddressSanitizer/lifetime-uar-uas.ll index 437b6a94185b8..136a8457fcf19 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/lifetime-uar-uas.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/lifetime-uar-uas.ll @@ -1,8 +1,12 @@ ; Test handling of llvm.lifetime intrinsics in UAR/UAS modes. -; RUN: opt < %s -asan -asan-module -asan-use-after-return=0 -asan-use-after-scope=0 -S | FileCheck %s -; RUN: opt < %s -asan -asan-module -asan-use-after-return=1 -asan-use-after-scope=0 -S | FileCheck %s -; RUN: opt < %s -asan -asan-module -asan-use-after-return=0 -asan-use-after-scope=1 -S | FileCheck %s --check-prefix=CHECK-UAS -; RUN: opt < %s -asan -asan-module -asan-use-after-return=1 -asan-use-after-scope=1 -S | FileCheck %s --check-prefix=CHECK-UAS +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return=0 -asan-use-after-scope=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return=0 -asan-use-after-scope=0 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return=1 -asan-use-after-scope=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return=1 -asan-use-after-scope=0 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return=0 -asan-use-after-scope=1 -S | FileCheck %s --check-prefix=CHECK-UAS +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return=0 -asan-use-after-scope=1 -S | FileCheck %s --check-prefix=CHECK-UAS +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return=1 -asan-use-after-scope=1 -S | FileCheck %s --check-prefix=CHECK-UAS +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return=1 -asan-use-after-scope=1 -S | FileCheck %s --check-prefix=CHECK-UAS target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll b/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll index b951afdc670f1..26aa65715d262 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll @@ -1,6 +1,8 @@ ; Test handling of llvm.lifetime intrinsics. -; RUN: opt < %s -asan -asan-module -asan-use-after-scope -asan-use-after-return=0 -S | FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT -; RUN: opt < %s -asan -asan-module -asan-use-after-scope -asan-use-after-return=0 -asan-instrument-dynamic-allocas=0 -S | FileCheck %s --check-prefixes=CHECK,CHECK-NO-DYNAMIC +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-scope -asan-use-after-return=0 -S | FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-scope -asan-use-after-return=0 -S | FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-scope -asan-use-after-return=0 -asan-instrument-dynamic-allocas=0 -S | FileCheck %s --check-prefixes=CHECK,CHECK-NO-DYNAMIC +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-scope -asan-use-after-return=0 -asan-instrument-dynamic-allocas=0 -S | FileCheck %s --check-prefixes=CHECK,CHECK-NO-DYNAMIC target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll b/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll index 9b95bb3fa6b04..a4c5803fc8189 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll @@ -1,7 +1,11 @@ -; RUN: opt < %s -asan -asan-module -S | FileCheck %s --check-prefixes=CHECK-NOALIAS,CHECK-NOINDICATOR -; RUN: opt < %s -asan -asan-module -asan-use-private-alias=1 -S | FileCheck %s --check-prefixes=CHECK-ALIAS,CHECK-NOINDICATOR -; RUN: opt < %s -asan -asan-module -asan-use-odr-indicator=1 -S | FileCheck %s --check-prefixes=CHECK-INDICATOR,CHECK-NOALIAS -; RUN: opt < %s -asan -asan-module -asan-use-private-alias=1 -asan-use-odr-indicator=1 -S | FileCheck %s --check-prefixes=CHECK-ALIAS,CHECK-INDICATOR +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s --check-prefixes=CHECK-NOALIAS,CHECK-NOINDICATOR +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s --check-prefixes=CHECK-NOALIAS,CHECK-NOINDICATOR +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-private-alias=1 -S | FileCheck %s --check-prefixes=CHECK-ALIAS,CHECK-NOINDICATOR +; RUN: opt < %s -passes='asan-pipeline' -asan-use-private-alias=1 -S | FileCheck %s --check-prefixes=CHECK-ALIAS,CHECK-NOINDICATOR +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-odr-indicator=1 -S | FileCheck %s --check-prefixes=CHECK-INDICATOR,CHECK-NOALIAS +; RUN: opt < %s -passes='asan-pipeline' -asan-use-odr-indicator=1 -S | FileCheck %s --check-prefixes=CHECK-INDICATOR,CHECK-NOALIAS +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-private-alias=1 -asan-use-odr-indicator=1 -S | FileCheck %s --check-prefixes=CHECK-ALIAS,CHECK-INDICATOR +; RUN: opt < %s -passes='asan-pipeline' -asan-use-private-alias=1 -asan-use-odr-indicator=1 -S | FileCheck %s --check-prefixes=CHECK-ALIAS,CHECK-INDICATOR target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll b/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll index 7bf294cb6b600..c9dec38d227bc 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll @@ -1,4 +1,5 @@ -; RUN: opt -S -asan -asan-skip-promotable-allocas=0 %s -o - | FileCheck %s +; RUN: opt -S -asan -enable-new-pm=0 -asan-skip-promotable-allocas=0 %s -o - | FileCheck %s +; RUN: opt -S -passes='asan-function-pipeline' -asan-skip-promotable-allocas=0 %s -o - | FileCheck %s ; Generated from: ; int bar(int y) { ; return y + 2; diff --git a/llvm/test/Instrumentation/AddressSanitizer/localescape.ll b/llvm/test/Instrumentation/AddressSanitizer/localescape.ll index 015b0e84ff16c..8daeb2927f935 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/localescape.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/localescape.ll @@ -1,5 +1,7 @@ -; RUN: opt < %s -asan -asan-module -asan-use-after-return -asan-stack-dynamic-alloca -S | FileCheck %s -; RUN: opt < %s -asan -asan-module -asan-use-after-return=0 -asan-stack-dynamic-alloca=0 -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return -asan-stack-dynamic-alloca -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return -asan-stack-dynamic-alloca -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return=0 -asan-stack-dynamic-alloca=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return=0 -asan-stack-dynamic-alloca=0 -S | FileCheck %s target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i686-pc-windows-msvc18.0.0" diff --git a/llvm/test/Instrumentation/AddressSanitizer/no-globals.ll b/llvm/test/Instrumentation/AddressSanitizer/no-globals.ll index 30388b1865eb5..ea84ac387a71f 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/no-globals.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/no-globals.ll @@ -1,5 +1,6 @@ ; A module with no asan-instrumented globals has no asan destructor, and has an asan constructor in a comdat. -; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -asan -asan-module -asan-with-comdat=1 -asan-globals-live-support=1 -S | FileCheck %s +; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -asan -asan-module -enable-new-pm=0 -asan-with-comdat=1 -asan-globals-live-support=1 -S | FileCheck %s +; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -passes='asan-pipeline' -asan-with-comdat=1 -asan-globals-live-support=1 -S | FileCheck %s define void @f() { ret void diff --git a/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll b/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll index cf48d19c16c2d..09b3d2f519297 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/ps4.ll b/llvm/test/Instrumentation/AddressSanitizer/ps4.ll index e160996866b4f..5930e31a4dd79 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/ps4.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/ps4.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -asan -asan-module -S -mtriple=x86_64-scei-ps4 | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S -mtriple=x86_64-scei-ps4 | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S -mtriple=x86_64-scei-ps4 | FileCheck %s define i32 @read_4_bytes(i32* %a) sanitize_address { entry: diff --git a/llvm/test/Instrumentation/AddressSanitizer/scale-offset.ll b/llvm/test/Instrumentation/AddressSanitizer/scale-offset.ll index 8345586fec4e8..f0b8fb8fc8374 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/scale-offset.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/scale-offset.ll @@ -1,8 +1,11 @@ ; Test that the scale (-asan-mapping-scale) and offset (-asan-mapping-offset) command-line options work as expected ; -; RUN: opt < %s -asan -asan-module -asan-mapping-offset 0xdeadbeef -S | FileCheck --check-prefix=CHECK-OFFSET %s -; RUN: opt < %s -asan -asan-module -asan-mapping-scale 1 -S | FileCheck --check-prefix=CHECK-SCALE %s -; RUN: opt < %s -asan -asan-module -asan-mapping-offset 0xc0ffee -asan-mapping-scale 0 -S | FileCheck --check-prefix=CHECK-BOTH %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-mapping-offset 0xdeadbeef -S | FileCheck --check-prefix=CHECK-OFFSET %s +; RUN: opt < %s -passes='asan-pipeline' -asan-mapping-offset 0xdeadbeef -S | FileCheck --check-prefix=CHECK-OFFSET %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-mapping-scale 1 -S | FileCheck --check-prefix=CHECK-SCALE %s +; RUN: opt < %s -passes='asan-pipeline' -asan-mapping-scale 1 -S | FileCheck --check-prefix=CHECK-SCALE %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-mapping-offset 0xc0ffee -asan-mapping-scale 0 -S | FileCheck --check-prefix=CHECK-BOTH %s +; RUN: opt < %s -passes='asan-pipeline' -asan-mapping-offset 0xc0ffee -asan-mapping-scale 0 -S | FileCheck --check-prefix=CHECK-BOTH %s target triple = "x86_64-unknown-linux-gnu" define i32 @read_offset(i32* %a) sanitize_address { diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime-be.ll b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime-be.ll index 2261094275123..a9181fc4c3de9 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime-be.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime-be.ll @@ -1,8 +1,10 @@ ; Regular stack poisoning. -; RUN: opt < %s -asan -asan-module -asan-use-after-scope=0 -S | FileCheck --check-prefixes=CHECK,ENTRY,EXIT %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-scope=0 -S | FileCheck --check-prefixes=CHECK,ENTRY,EXIT %s +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-scope=0 -S | FileCheck --check-prefixes=CHECK,ENTRY,EXIT %s ; Stack poisoning with stack-use-after-scope. -; RUN: opt < %s -asan -asan-module -asan-use-after-scope=1 -S | FileCheck --check-prefixes=CHECK,ENTRY-UAS,EXIT-UAS %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-scope=1 -S | FileCheck --check-prefixes=CHECK,ENTRY-UAS,EXIT-UAS %s +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-scope=1 -S | FileCheck --check-prefixes=CHECK,ENTRY-UAS,EXIT-UAS %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime.ll b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime.ll index 5523da63d373b..54128bb0c9e4c 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime.ll @@ -1,8 +1,10 @@ ; Regular stack poisoning. -; RUN: opt < %s -asan -asan-module -asan-use-after-scope=0 -S | FileCheck --check-prefixes=CHECK,ENTRY,EXIT %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-scope=0 -S | FileCheck --check-prefixes=CHECK,ENTRY,EXIT %s +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-scope=0 -S | FileCheck --check-prefixes=CHECK,ENTRY,EXIT %s ; Stack poisoning with stack-use-after-scope. -; RUN: opt < %s -asan -asan-module -asan-use-after-scope=1 -S | FileCheck --check-prefixes=CHECK,ENTRY-UAS,EXIT-UAS %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-scope=1 -S | FileCheck --check-prefixes=CHECK,ENTRY-UAS,EXIT-UAS %s +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-scope=1 -S | FileCheck --check-prefixes=CHECK,ENTRY-UAS,EXIT-UAS %s target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll index 859404b12de2e..616c18ea09d60 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll @@ -1,8 +1,12 @@ ; This check verifies that arguments passed by value get redzones. -; RUN: opt < %s -asan -asan-realign-stack=32 -S | FileCheck %s -; RUN: opt < %s -asan -asan-realign-stack=32 -asan-force-dynamic-shadow -S | FileCheck %s -; RUN: opt < %s -asan -asan-realign-stack=32 -asan-mapping-scale=5 -S | FileCheck %s -; RUN: opt < %s -asan -asan-realign-stack=32 -asan-force-dynamic-shadow -asan-mapping-scale=5 -S | FileCheck %s +; RUN: opt < %s -asan -enable-new-pm=0 -asan-realign-stack=32 -S | FileCheck %s +; RUN: opt < %s -passes='asan-function-pipeline' -asan-realign-stack=32 -S | FileCheck %s +; RUN: opt < %s -asan -enable-new-pm=0 -asan-realign-stack=32 -asan-force-dynamic-shadow -S | FileCheck %s +; RUN: opt < %s -passes='asan-function-pipeline' -asan-realign-stack=32 -asan-force-dynamic-shadow -S | FileCheck %s +; RUN: opt < %s -asan -enable-new-pm=0 -asan-realign-stack=32 -asan-mapping-scale=5 -S | FileCheck %s +; RUN: opt < %s -passes='asan-function-pipeline' -asan-realign-stack=32 -asan-mapping-scale=5 -S | FileCheck %s +; RUN: opt < %s -asan -enable-new-pm=0 -asan-realign-stack=32 -asan-force-dynamic-shadow -asan-mapping-scale=5 -S | FileCheck %s +; RUN: opt < %s -passes='asan-function-pipeline' -asan-realign-stack=32 -asan-force-dynamic-shadow -asan-mapping-scale=5 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning.ll b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning.ll index 4e5c2958ddb8f..0505f9a1e0920 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning.ll @@ -1,5 +1,7 @@ -; RUN: opt < %s -asan -asan-module -asan-use-after-return -S | FileCheck --check-prefix=CHECK-UAR %s -; RUN: opt < %s -asan -asan-module -asan-use-after-return=0 -S | FileCheck --check-prefix=CHECK-PLAIN %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return -S | FileCheck --check-prefix=CHECK-UAR %s +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return -S | FileCheck --check-prefix=CHECK-UAR %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return=0 -S | FileCheck --check-prefix=CHECK-PLAIN %s +; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return=0 -S | FileCheck --check-prefix=CHECK-PLAIN %s target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll b/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll index 90ddd7786b9bf..6140ba6b7a80e 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll @@ -1,6 +1,10 @@ ; RUN: opt < %s -asan -asan-module -asan-stack-dynamic-alloca \ +; RUN: -asan-use-after-return -S -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-stack-dynamic-alloca \ ; RUN: -asan-use-after-return -S | FileCheck %s ; RUN: opt < %s -asan -asan-module -asan-stack-dynamic-alloca -asan-mapping-scale=5 \ +; RUN: -asan-use-after-return -S -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -asan-stack-dynamic-alloca -asan-mapping-scale=5 \ ; RUN: -asan-use-after-return -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack_layout.ll b/llvm/test/Instrumentation/AddressSanitizer/stack_layout.ll index 85169d523b685..58b6714c90533 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/stack_layout.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/stack_layout.ll @@ -1,8 +1,12 @@ ; Test the ASan's stack layout. ; More tests in tests/Transforms/Utils/ASanStackFrameLayoutTest.cpp -; RUN: opt < %s -asan -asan-module -asan-stack-dynamic-alloca=0 -asan-use-after-scope -S \ +; RUN: opt < %s -asan -asan-module -asan-stack-dynamic-alloca=0 -asan-use-after-scope -S -enable-new-pm=0 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-STATIC -; RUN: opt < %s -asan -asan-module -asan-stack-dynamic-alloca=1 -asan-use-after-scope -S \ +; RUN: opt < %s -passes='asan-pipeline' -asan-stack-dynamic-alloca=0 -asan-use-after-scope -S \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-STATIC +; RUN: opt < %s -asan -asan-module -asan-stack-dynamic-alloca=1 -asan-use-after-scope -S -enable-new-pm=0 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC +; RUN: opt < %s -passes='asan-pipeline' -asan-stack-dynamic-alloca=1 -asan-use-after-scope -S \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Instrumentation/AddressSanitizer/str-nobuiltin.ll b/llvm/test/Instrumentation/AddressSanitizer/str-nobuiltin.ll index dd0132d435bd2..446e7f6793ba6 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/str-nobuiltin.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/str-nobuiltin.ll @@ -1,6 +1,7 @@ ; Test marking string functions as nobuiltin in address sanitizer. ; -; RUN: opt < %s -asan -S | FileCheck %s +; RUN: opt < %s -asan -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-function-pipeline' -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/test64.ll b/llvm/test/Instrumentation/AddressSanitizer/test64.ll index 4aab5310635fd..c6b190c861a1e 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/test64.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/test64.ll @@ -1,5 +1,7 @@ -; RUN: opt < %s -asan -asan-module -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s -; RUN: opt < %s -asan -asan-module -asan-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s +; RUN: opt < %s -passes='asan-pipeline' -asan-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" define i32 @read_4_bytes(i32* %a) sanitize_address { diff --git a/llvm/test/Instrumentation/AddressSanitizer/twice.ll b/llvm/test/Instrumentation/AddressSanitizer/twice.ll index 9f7826f739521..4b5b64080dd29 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/twice.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/twice.ll @@ -1,5 +1,6 @@ ; Check that the address sanitizer pass can be reused -; RUN: opt < %s -S -run-twice -asan +; RUN: opt < %s -S -run-twice -asan -enable-new-pm=0 +; RUN: opt < %s -S -run-twice -passes='asan-function-pipeline' define void @foo(i64* %b) nounwind uwtable sanitize_address { entry: diff --git a/llvm/test/Instrumentation/AddressSanitizer/ubsan.ll b/llvm/test/Instrumentation/AddressSanitizer/ubsan.ll index 23b7ef4537c4f..41fab72ddbea9 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/ubsan.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/ubsan.ll @@ -1,6 +1,7 @@ ; ASan shouldn't instrument code added by UBSan. -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/win-sorted-sections.ll b/llvm/test/Instrumentation/AddressSanitizer/win-sorted-sections.ll index 85f759cf740c1..a384186255f02 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/win-sorted-sections.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/win-sorted-sections.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s ; All of these globals should pass through uninstrumented because of their ; custom section name. The .CRT section is the standard way to register custom diff --git a/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll b/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll index 4d5126be87c60..3eb98508fec6b 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s +; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s ; Generated like so: ; $ clang -S -emit-llvm -Xclang -disable-llvm-passes -fsanitize=address -O1 t.cpp -o t.ll diff --git a/llvm/test/Instrumentation/AddressSanitizer/with-ifunc.ll b/llvm/test/Instrumentation/AddressSanitizer/with-ifunc.ll index 4771a9674017b..ed5df2a2dfb05 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/with-ifunc.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/with-ifunc.ll @@ -1,18 +1,30 @@ ; Test -asan-with-ifunc flag. ; -; RUN: opt -asan -asan-module -S -asan-with-ifunc=0 < %s | \ +; RUN: opt -asan -asan-module -S -asan-with-ifunc=0 < %s -enable-new-pm=0 | \ ; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOIFUNC -; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 < %s | \ +; RUN: opt -passes='asan-pipeline' -S -asan-with-ifunc=0 < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOIFUNC +; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 < %s -enable-new-pm=0 | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC +; RUN: opt -passes='asan-pipeline' -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 < %s | \ ; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC -; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=1 < %s | \ +; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=1 < %s -enable-new-pm=0 | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC-NOREMAT +; RUN: opt -passes='asan-pipeline' -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=1 < %s | \ ; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC-NOREMAT ; Pre-Lollipop Android does not support ifunc. -; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android20 < %s | \ +; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android20 < %s -enable-new-pm=0 | \ ; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOIFUNC -; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android < %s | \ +; RUN: opt -passes='asan-pipeline' -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android20 < %s | \ ; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOIFUNC -; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android21 < %s | \ +; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android < %s -enable-new-pm=0 | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOIFUNC +; RUN: opt -passes='asan-pipeline' -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOIFUNC +; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android21 < %s -enable-new-pm=0 | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC +; RUN: opt -passes='asan-pipeline' -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android21 < %s | \ ; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Instrumentation/MemorySanitizer/experimental-reduce.ll b/llvm/test/Instrumentation/MemorySanitizer/experimental-reduce.ll new file mode 100644 index 0000000000000..e81333b515e25 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/experimental-reduce.ll @@ -0,0 +1,68 @@ + +; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=1 -S -passes='module(msan-module),function(msan)' 2>&1 | \ +; RUN: FileCheck -allow-deprecated-dag-overlap -check-prefixes=CHECK,CHECK-ORIGINS %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare i32 @llvm.experimental.vector.reduce.add(<3 x i32>) +declare i32 @llvm.experimental.vector.reduce.and(<3 x i32>) +declare i32 @llvm.experimental.vector.reduce.or(<3 x i32>) + +; CHECK-LABEL: @reduce_add +define i32 @reduce_add() sanitize_memory { +; CHECK: [[P:%.*]] = inttoptr i64 0 to <3 x i32>* + %p = inttoptr i64 0 to <3 x i32> * +; CHECK: [[O:%.*]] = load <3 x i32>, <3 x i32>* [[P]] + %o = load <3 x i32>, <3 x i32> *%p +; CHECK: [[O_SHADOW:%.*]] = load <3 x i32>, <3 x i32>* +; CHECK: [[O_ORIGIN:%.*]] = load i32, i32* +; CHECK: [[R_SHADOW:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v3i32(<3 x i32> [[O_SHADOW]]) +; CHECK: [[R:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v3i32(<3 x i32> [[O]]) + %r = call i32 @llvm.experimental.vector.reduce.add(<3 x i32> %o) +; CHECK: store i32 [[R_SHADOW]], {{.*}} @__msan_retval_tls +; CHECK: store i32 [[O_ORIGIN]], {{.*}} @__msan_retval_origin_tls +; CHECK: ret i32 [[R]] + ret i32 %r +} + +; CHECK-LABEL: @reduce_and +define i32 @reduce_and() sanitize_memory { +; CHECK: [[P:%.*]] = inttoptr i64 0 to <3 x i32>* + %p = inttoptr i64 0 to <3 x i32> * +; CHECK: [[O:%.*]] = load <3 x i32>, <3 x i32>* [[P]] + %o = load <3 x i32>, <3 x i32> *%p +; CHECK: [[O_SHADOW:%.*]] = load <3 x i32>, <3 x i32>* +; CHECK: [[O_ORIGIN:%.*]] = load i32, i32* +; CHECK: [[O_SHADOW_1:%.*]] = or <3 x i32> [[O]], [[O_SHADOW]] +; CHECK: [[O_SHADOW_2:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> [[O_SHADOW_1]] +; CHECK: [[O_SHADOW_3:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v3i32(<3 x i32> [[O_SHADOW]]) +; CHECK: [[R_SHADOW:%.*]] = and i32 [[O_SHADOW_2]], [[O_SHADOW_3]] +; CHECK: [[R:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> [[O]]) + %r = call i32 @llvm.experimental.vector.reduce.and(<3 x i32> %o) +; CHECK: store i32 [[R_SHADOW]], {{.*}} @__msan_retval_tls +; CHECK: store i32 [[O_ORIGIN]], {{.*}} @__msan_retval_origin_tls +; CHECK: ret i32 [[R]] + ret i32 %r +} + +; CHECK-LABEL: @reduce_or +define i32 @reduce_or() sanitize_memory { +; CHECK: [[P:%.*]] = inttoptr i64 0 to <3 x i32>* + %p = inttoptr i64 0 to <3 x i32> * +; CHECK: [[O:%.*]] = load <3 x i32>, <3 x i32>* [[P]] + %o = load <3 x i32>, <3 x i32> *%p +; CHECK: [[O_SHADOW:%.*]] = load <3 x i32>, <3 x i32>* +; CHECK: [[O_ORIGIN:%.*]] = load i32, i32* +; CHECK: [[NOT_O:%.*]] = xor <3 x i32> [[O]], +; CHECK: [[O_SHADOW_1:%.*]] = or <3 x i32> [[NOT_O]], [[O_SHADOW]] +; CHECK: [[O_SHADOW_2:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> [[O_SHADOW_1]] +; CHECK: [[O_SHADOW_3:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v3i32(<3 x i32> [[O_SHADOW]]) +; CHECK: [[R_SHADOW:%.*]] = and i32 [[O_SHADOW_2]], [[O_SHADOW_3]] +; CHECK: [[R:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v3i32(<3 x i32> [[O]]) + %r = call i32 @llvm.experimental.vector.reduce.or(<3 x i32> %o) +; CHECK: store i32 [[R_SHADOW]], {{.*}} @__msan_retval_tls +; CHECK: store i32 [[O_ORIGIN]], {{.*}} @__msan_retval_origin_tls +; CHECK: ret i32 [[R]] + ret i32 %r +} diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_eager.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_eager.ll new file mode 100644 index 0000000000000..1c203177796e1 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/msan_eager.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=1 -msan-eager-checks -S -passes='module(msan-module),function(msan)' 2>&1 | \ +; RUN: FileCheck -allow-deprecated-dag-overlap -check-prefixes=CHECK,CHECK-ORIGINS %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define noundef i32 @NormalRet() nounwind uwtable sanitize_memory { +; CHECK-LABEL: @NormalRet( +; CHECK-NEXT: ret i32 123 +; + ret i32 123 +} + +define i32 @PartialRet() nounwind uwtable sanitize_memory { +; CHECK-LABEL: @PartialRet( +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: store i32 0, i32* @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret i32 123 +; + ret i32 123 +} + +define noundef i32 @LoadedRet() nounwind uwtable sanitize_memory { +; CHECK-LABEL: @LoadedRet( +; CHECK-NEXT: [[P:%.*]] = inttoptr i64 0 to i32* +; CHECK-NEXT: [[O:%.*]] = load i32, i32* [[P]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i32* [[P]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32* +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to i32* +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, i32* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSLD]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof !0 +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #1 +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: ret i32 [[O]] +; + %p = inttoptr i64 0 to i32 * + %o = load i32, i32 *%p + ret i32 %o +} + + +define void @NormalArg(i32 noundef %a) nounwind uwtable sanitize_memory { +; CHECK-LABEL: @NormalArg( +; CHECK-NEXT: [[P:%.*]] = inttoptr i64 0 to i32* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i32* [[P]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32* +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to i32* +; CHECK-NEXT: store i32 0, i32* [[TMP3]], align 4 +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[P]], align 4 +; CHECK-NEXT: ret void +; + %p = inttoptr i64 0 to i32 * + store i32 %a, i32 *%p + ret void +} + +define void @PartialArg(i32 %a) nounwind uwtable sanitize_memory { +; CHECK-LABEL: @PartialArg( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* bitcast ([100 x i64]* @__msan_param_tls to i32*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__msan_param_origin_tls, i32 0, i32 0), align 4 +; CHECK-NEXT: [[P:%.*]] = inttoptr i64 0 to i32* +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[P]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to i32* +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP4]], 17592186044416 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to i32* +; CHECK-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof !0 +; CHECK: 8: +; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP7]], align 4 +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[P]], align 4 +; CHECK-NEXT: ret void +; + %p = inttoptr i64 0 to i32 * + store i32 %a, i32 *%p + ret void +} + +define void @CallNormal() nounwind uwtable sanitize_memory { +; CHECK-LABEL: @CallNormal( +; CHECK-NEXT: [[R:%.*]] = call i32 @NormalRet() #0 +; CHECK-NEXT: call void @NormalArg(i32 [[R]]) #0 +; CHECK-NEXT: ret void +; + %r = call i32 @NormalRet() nounwind uwtable sanitize_memory + call void @NormalArg(i32 %r) nounwind uwtable sanitize_memory + ret void +} + +define void @CallWithLoaded() nounwind uwtable sanitize_memory { +; CHECK-LABEL: @CallWithLoaded( +; CHECK-NEXT: [[P:%.*]] = inttoptr i64 0 to i32* +; CHECK-NEXT: [[O:%.*]] = load i32, i32* [[P]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i32* [[P]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32* +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to i32* +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, i32* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSLD]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof !0 +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #1 +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: call void @NormalArg(i32 [[O]]) #0 +; CHECK-NEXT: ret void +; + %p = inttoptr i64 0 to i32 * + %o = load i32, i32 *%p + call void @NormalArg(i32 %o) nounwind uwtable sanitize_memory + ret void +} + +define void @CallPartial() nounwind uwtable sanitize_memory { +; CHECK-LABEL: @CallPartial( +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: [[R:%.*]] = call i32 @PartialRet() #0 +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store i32 [[_MSRET]], i32* bitcast ([100 x i64]* @__msan_param_tls to i32*), align 8 +; CHECK-NEXT: store i32 [[TMP1]], i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__msan_param_origin_tls, i32 0, i32 0), align 4 +; CHECK-NEXT: call void @PartialArg(i32 [[R]]) #0 +; CHECK-NEXT: ret void +; + %r = call i32 @PartialRet() nounwind uwtable sanitize_memory + call void @PartialArg(i32 %r) nounwind uwtable sanitize_memory + ret void +} diff --git a/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll b/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll index 33c4f3ab302c5..f121b0dcaf0d7 100644 --- a/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll +++ b/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll @@ -1,5 +1,7 @@ -; RUN: opt < %s -tsan -S | FileCheck %s +; RUN: opt < %s -tsan -S | FileCheck --check-prefixes=CHECK,CHECK-OPT %s ; RUN: opt < %s -tsan -tsan-instrument-read-before-write -S | FileCheck %s --check-prefixes=CHECK,CHECK-UNOPT +; RUN: opt < %s -tsan -tsan-compound-read-before-write -S | FileCheck %s --check-prefixes=CHECK,CHECK-COMPOUND +; RUN: opt < %s -tsan -tsan-distinguish-volatile -tsan-compound-read-before-write -S | FileCheck %s --check-prefixes=CHECK,CHECK-COMPOUND-VOLATILE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -10,10 +12,13 @@ entry: store i32 %inc, i32* %ptr, align 4 ret void } -; CHECK: define void @IncrementMe -; CHECK-NOT: __tsan_read -; CHECK-UNOPT: __tsan_read -; CHECK: __tsan_write +; CHECK-LABEL: define void @IncrementMe +; CHECK-OPT-NOT: __tsan_read4 +; CHECK-COMPOUND-NOT: __tsan_read4 +; CHECK-UNOPT: __tsan_read4 +; CHECK-OPT: __tsan_write4 +; CHECK-UNOPT: __tsan_write4 +; CHECK-COMPOUND: __tsan_read_write4 ; CHECK: ret void define void @IncrementMeWithCallInBetween(i32* nocapture %ptr) nounwind uwtable sanitize_thread { @@ -25,10 +30,52 @@ entry: ret void } -; CHECK: define void @IncrementMeWithCallInBetween -; CHECK: __tsan_read -; CHECK: __tsan_write +; CHECK-LABEL: define void @IncrementMeWithCallInBetween +; CHECK: __tsan_read4 +; CHECK: __tsan_write4 ; CHECK: ret void declare void @foo() +define void @VolatileLoad(i32* nocapture %ptr) nounwind uwtable sanitize_thread { +entry: + %0 = load volatile i32, i32* %ptr, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %ptr, align 4 + ret void +} +; CHECK-LABEL: define void @VolatileLoad +; CHECK-COMPOUND-NOT: __tsan_read4 +; CHECK-COMPOUND-VOLATILE: __tsan_volatile_read4 +; CHECK-COMPOUND: __tsan_read_write4 +; CHECK-COMPOUND-VOLATILE: __tsan_write4 +; CHECK: ret void + +define void @VolatileStore(i32* nocapture %ptr) nounwind uwtable sanitize_thread { +entry: + %0 = load i32, i32* %ptr, align 4 + %inc = add nsw i32 %0, 1 + store volatile i32 %inc, i32* %ptr, align 4 + ret void +} +; CHECK-LABEL: define void @VolatileStore +; CHECK-COMPOUND-NOT: __tsan_read4 +; CHECK-COMPOUND-VOLATILE: __tsan_read4 +; CHECK-COMPOUND: __tsan_read_write4 +; CHECK-COMPOUND-VOLATILE: __tsan_volatile_write4 +; CHECK: ret void + +define void @VolatileBoth(i32* nocapture %ptr) nounwind uwtable sanitize_thread { +entry: + %0 = load volatile i32, i32* %ptr, align 4 + %inc = add nsw i32 %0, 1 + store volatile i32 %inc, i32* %ptr, align 4 + ret void +} +; CHECK-LABEL: define void @VolatileBoth +; CHECK-COMPOUND-NOT: __tsan_read4 +; CHECK-COMPOUND-VOLATILE: __tsan_volatile_read4 +; CHECK-COMPOUND: __tsan_read_write4 +; CHECK-COMPOUND-VOLATILE: __tsan_volatile_write4 +; CHECK: ret void + diff --git a/llvm/test/MC/AArch64/directive-arch_extension-negative.s b/llvm/test/MC/AArch64/directive-arch_extension-negative.s index 61351e171b599..e516046d8edf7 100644 --- a/llvm/test/MC/AArch64/directive-arch_extension-negative.s +++ b/llvm/test/MC/AArch64/directive-arch_extension-negative.s @@ -78,3 +78,8 @@ at s1e1wp, x2 dc cvap, x7 // CHECK: error: DC CVAP requires ccpp // CHECK-NEXT: dc cvap, x7 + +.arch_extension norcpc +ldapr x0, [x1] +// CHECK: error: instruction requires: rcpc +// CHECK-NEXT: ldapr x0, [x1] diff --git a/llvm/test/MC/AArch64/directive-arch_extension.s b/llvm/test/MC/AArch64/directive-arch_extension.s index 6f1a651134623..e351526c871b8 100644 --- a/llvm/test/MC/AArch64/directive-arch_extension.s +++ b/llvm/test/MC/AArch64/directive-arch_extension.s @@ -59,3 +59,7 @@ at s1e1wp, x2 .arch_extension ccpp dc cvap, x7 // CHECK: dc cvap, x7 + +.arch_extension rcpc +ldapr x0, [x1] +// CHECK: ldapr x0, [x1] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_all.s b/llvm/test/MC/AMDGPU/gfx10_asm_all.s index 99fcf0ac8ed9e..d1bbde6539417 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_all.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_all.s @@ -12805,66 +12805,6 @@ s_abs_i32 s0, 0x3f717273 s_abs_i32 s0, 0xaf123456 // GFX10: encoding: [0xff,0x34,0x80,0xbe,0x56,0x34,0x12,0xaf] -s_mov_fed_b32 s0, s1 -// GFX10: encoding: [0x01,0x35,0x80,0xbe] - -s_mov_fed_b32 s105, s104 -// GFX10: encoding: [0x68,0x35,0xe9,0xbe] - -s_mov_fed_b32 s0, s104 -// GFX10: encoding: [0x68,0x35,0x80,0xbe] - -s_mov_fed_b32 s105, s1 -// GFX10: encoding: [0x01,0x35,0xe9,0xbe] - -s_mov_fed_b32 exec_lo, s1 -// GFX10: encoding: [0x01,0x35,0xfe,0xbe] - -s_mov_fed_b32 exec_hi, s1 -// GFX10: encoding: [0x01,0x35,0xff,0xbe] - -s_mov_fed_b32 vcc_lo, s1 -// GFX10: encoding: [0x01,0x35,0xea,0xbe] - -s_mov_fed_b32 vcc_hi, s1 -// GFX10: encoding: [0x01,0x35,0xeb,0xbe] - -s_mov_fed_b32 m0, s1 -// GFX10: encoding: [0x01,0x35,0xfc,0xbe] - -s_mov_fed_b32 s0, exec_lo -// GFX10: encoding: [0x7e,0x35,0x80,0xbe] - -s_mov_fed_b32 s0, exec_hi -// GFX10: encoding: [0x7f,0x35,0x80,0xbe] - -s_mov_fed_b32 s0, vcc_lo -// GFX10: encoding: [0x6a,0x35,0x80,0xbe] - -s_mov_fed_b32 s0, vcc_hi -// GFX10: encoding: [0x6b,0x35,0x80,0xbe] - -s_mov_fed_b32 s0, m0 -// GFX10: encoding: [0x7c,0x35,0x80,0xbe] - -s_mov_fed_b32 s0, 0 -// GFX10: encoding: [0x80,0x35,0x80,0xbe] - -s_mov_fed_b32 s0, -1 -// GFX10: encoding: [0xc1,0x35,0x80,0xbe] - -s_mov_fed_b32 s0, 0.5 -// GFX10: encoding: [0xf0,0x35,0x80,0xbe] - -s_mov_fed_b32 s0, -4.0 -// GFX10: encoding: [0xf7,0x35,0x80,0xbe] - -s_mov_fed_b32 s0, 0x3f717273 -// GFX10: encoding: [0xff,0x35,0x80,0xbe,0x73,0x72,0x71,0x3f] - -s_mov_fed_b32 s0, 0xaf123456 -// GFX10: encoding: [0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf] - s_andn1_saveexec_b64 s[0:1], s[2:3] // GFX10: encoding: [0x02,0x37,0x80,0xbe] @@ -22278,258 +22218,6 @@ v_cvt_i32_f32_dpp v5, v1 row_xmask:1 row_mask:0x0 bank_mask:0x0 v_cvt_i32_f32_dpp v5, v1 row_xmask:15 row_mask:0x0 bank_mask:0x0 // GFX10: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x6f,0x01,0x00] -v_mov_fed_b32_e32 v5, v1 -// GFX10: encoding: [0x01,0x13,0x0a,0x7e] - -v_mov_fed_b32_e32 v255, v1 -// GFX10: encoding: [0x01,0x13,0xfe,0x7f] - -v_mov_fed_b32_e32 v5, v255 -// GFX10: encoding: [0xff,0x13,0x0a,0x7e] - -v_mov_fed_b32_e32 v5, s1 -// GFX10: encoding: [0x01,0x12,0x0a,0x7e] - -v_mov_fed_b32_e32 v5, s103 -// GFX10: encoding: [0x67,0x12,0x0a,0x7e] - -v_mov_fed_b32_e32 v5, vcc_lo -// GFX10: encoding: [0x6a,0x12,0x0a,0x7e] - -v_mov_fed_b32_e32 v5, vcc_hi -// GFX10: encoding: [0x6b,0x12,0x0a,0x7e] - -v_mov_fed_b32_e32 v5, ttmp11 -// GFX10: encoding: [0x77,0x12,0x0a,0x7e] - -v_mov_fed_b32_e32 v5, m0 -// GFX10: encoding: [0x7c,0x12,0x0a,0x7e] - -v_mov_fed_b32_e32 v5, exec_lo -// GFX10: encoding: [0x7e,0x12,0x0a,0x7e] - -v_mov_fed_b32_e32 v5, exec_hi -// GFX10: encoding: [0x7f,0x12,0x0a,0x7e] - -v_mov_fed_b32_e32 v5, 0 -// GFX10: encoding: [0x80,0x12,0x0a,0x7e] - -v_mov_fed_b32_e32 v5, -1 -// GFX10: encoding: [0xc1,0x12,0x0a,0x7e] - -v_mov_fed_b32_e32 v5, 0.5 -// GFX10: encoding: [0xf0,0x12,0x0a,0x7e] - -v_mov_fed_b32_e32 v5, -4.0 -// GFX10: encoding: [0xf7,0x12,0x0a,0x7e] - -v_mov_fed_b32_e32 v5, 0xaf123456 -// GFX10: encoding: [0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf] - -v_mov_fed_b32_e32 v5, 0x3f717273 -// GFX10: encoding: [0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f] - -v_mov_fed_b32_e64 v5, v1 -// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x01,0x01,0x00,0x00] - -v_mov_fed_b32_e64 v255, v1 -// GFX10: encoding: [0xff,0x00,0x89,0xd5,0x01,0x01,0x00,0x00] - -v_mov_fed_b32_e64 v5, v255 -// GFX10: encoding: [0x05,0x00,0x89,0xd5,0xff,0x01,0x00,0x00] - -v_mov_fed_b32_e64 v5, s1 -// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x01,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, s101 -// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x65,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, vcc_lo -// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x6a,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, vcc_hi -// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x6b,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, m0 -// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x7c,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, exec_lo -// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x7e,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, exec_hi -// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x7f,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, 0 -// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x80,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, -1 -// GFX10: encoding: [0x05,0x00,0x89,0xd5,0xc1,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, 0.5 -// GFX10: encoding: [0x05,0x00,0x89,0xd5,0xf0,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, -4.0 -// GFX10: encoding: [0x05,0x00,0x89,0xd5,0xf7,0x00,0x00,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00] - -v_mov_fed_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v255 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00] - -v_mov_fed_b32_sdwa v5, s1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, s101 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x65,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x6a,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, vcc_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x6b,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, m0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x7c,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, exec_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x7e,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, exec_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x7f,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00] - -v_mov_fed_b32_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00] - -v_mov_fed_b32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00] - -v_mov_fed_b32_dpp v5, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00] - -v_mov_fed_b32_dpp v5, v1 row_mirror row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_half_mirror row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_shl:1 row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_shl:15 row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_shr:1 row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_shr:15 row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_ror:1 row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00] - -v_mov_fed_b32_dpp v5, v1 row_share:1 row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x51,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x5f,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_xmask:1 row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x61,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_xmask:15 row_mask:0x0 bank_mask:0x0 -// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x6f,0x01,0x00] - v_cvt_f16_f32 v5, v1 // GFX10: encoding: [0x01,0x15,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s index 70d779a047ba2..b8ede28ec0763 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s @@ -18,9 +18,6 @@ v_cvt_u32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] v_cvt_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] // GFX10: encoding: [0xe9,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_mov_fed_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] -// GFX10: encoding: [0xe9,0x12,0x0a,0x7e,0x01,0x88,0xc6,0xfa] - v_cvt_f16_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] // GFX10: encoding: [0xe9,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa] @@ -273,9 +270,6 @@ v_cvt_u32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 v_cvt_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 // GFX10: encoding: [0xea,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_mov_fed_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 -// GFX10: encoding: [0xea,0x12,0x0a,0x7e,0x01,0x88,0xc6,0xfa] - v_cvt_f16_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 // GFX10: encoding: [0xea,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_err.s b/llvm/test/MC/AMDGPU/gfx10_asm_err.s index eff0cff4272da..251dde827b71c 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_err.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_err.s @@ -130,9 +130,6 @@ s_set_gpr_idx_idx s0 s_cbranch_join s0 // GFX10: error: instruction not supported on this GPU -s_mov_regrd_b32 s0, s1 -// GFX10: error: instruction not supported on this GPU - //===----------------------------------------------------------------------===// // ENC_SOP2. //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/gfx7_asm_all.s b/llvm/test/MC/AMDGPU/gfx7_asm_all.s index 434c64b72ff11..73247d17aed1a 100644 --- a/llvm/test/MC/AMDGPU/gfx7_asm_all.s +++ b/llvm/test/MC/AMDGPU/gfx7_asm_all.s @@ -14148,105 +14148,6 @@ s_abs_i32 s5, 0xaf123456 s_abs_i32 s5, 0x3f717273 // CHECK: [0xff,0x34,0x85,0xbe,0x73,0x72,0x71,0x3f] -s_mov_fed_b32 s5, s1 -// CHECK: [0x01,0x35,0x85,0xbe] - -s_mov_fed_b32 s103, s1 -// CHECK: [0x01,0x35,0xe7,0xbe] - -s_mov_fed_b32 flat_scratch_lo, s1 -// CHECK: [0x01,0x35,0xe8,0xbe] - -s_mov_fed_b32 flat_scratch_hi, s1 -// CHECK: [0x01,0x35,0xe9,0xbe] - -s_mov_fed_b32 vcc_lo, s1 -// CHECK: [0x01,0x35,0xea,0xbe] - -s_mov_fed_b32 vcc_hi, s1 -// CHECK: [0x01,0x35,0xeb,0xbe] - -s_mov_fed_b32 tba_lo, s1 -// CHECK: [0x01,0x35,0xec,0xbe] - -s_mov_fed_b32 tba_hi, s1 -// CHECK: [0x01,0x35,0xed,0xbe] - -s_mov_fed_b32 tma_lo, s1 -// CHECK: [0x01,0x35,0xee,0xbe] - -s_mov_fed_b32 tma_hi, s1 -// CHECK: [0x01,0x35,0xef,0xbe] - -s_mov_fed_b32 ttmp11, s1 -// CHECK: [0x01,0x35,0xfb,0xbe] - -s_mov_fed_b32 m0, s1 -// CHECK: [0x01,0x35,0xfc,0xbe] - -s_mov_fed_b32 exec_lo, s1 -// CHECK: [0x01,0x35,0xfe,0xbe] - -s_mov_fed_b32 exec_hi, s1 -// CHECK: [0x01,0x35,0xff,0xbe] - -s_mov_fed_b32 s5, s103 -// CHECK: [0x67,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, flat_scratch_lo -// CHECK: [0x68,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, flat_scratch_hi -// CHECK: [0x69,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, vcc_lo -// CHECK: [0x6a,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, vcc_hi -// CHECK: [0x6b,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, tba_lo -// CHECK: [0x6c,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, tba_hi -// CHECK: [0x6d,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, tma_lo -// CHECK: [0x6e,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, tma_hi -// CHECK: [0x6f,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, ttmp11 -// CHECK: [0x7b,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, m0 -// CHECK: [0x7c,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, exec_lo -// CHECK: [0x7e,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, exec_hi -// CHECK: [0x7f,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, 0 -// CHECK: [0x80,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, -1 -// CHECK: [0xc1,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, 0.5 -// CHECK: [0xf0,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, -4.0 -// CHECK: [0xf7,0x35,0x85,0xbe] - -s_mov_fed_b32 s5, 0xaf123456 -// CHECK: [0xff,0x35,0x85,0xbe,0x56,0x34,0x12,0xaf] - -s_mov_fed_b32 s5, 0x3f717273 -// CHECK: [0xff,0x35,0x85,0xbe,0x73,0x72,0x71,0x3f] - s_add_u32 s5, s1, s2 // CHECK: [0x01,0x02,0x05,0x80] @@ -24318,138 +24219,6 @@ v_cvt_i32_f32_e64 v5, -v1 v_cvt_i32_f32_e64 v5, |v1| // CHECK: [0x05,0x01,0x10,0xd3,0x01,0x01,0x00,0x00] -v_mov_fed_b32 v5, v1 -// CHECK: [0x01,0x13,0x0a,0x7e] - -v_mov_fed_b32 v255, v1 -// CHECK: [0x01,0x13,0xfe,0x7f] - -v_mov_fed_b32 v5, v255 -// CHECK: [0xff,0x13,0x0a,0x7e] - -v_mov_fed_b32 v5, s1 -// CHECK: [0x01,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, s103 -// CHECK: [0x67,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, flat_scratch_lo -// CHECK: [0x68,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, flat_scratch_hi -// CHECK: [0x69,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, vcc_lo -// CHECK: [0x6a,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, vcc_hi -// CHECK: [0x6b,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, tba_lo -// CHECK: [0x6c,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, tba_hi -// CHECK: [0x6d,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, tma_lo -// CHECK: [0x6e,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, tma_hi -// CHECK: [0x6f,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, ttmp11 -// CHECK: [0x7b,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, m0 -// CHECK: [0x7c,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, exec_lo -// CHECK: [0x7e,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, exec_hi -// CHECK: [0x7f,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, 0 -// CHECK: [0x80,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, -1 -// CHECK: [0xc1,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, 0.5 -// CHECK: [0xf0,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, -4.0 -// CHECK: [0xf7,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, 0xaf123456 -// CHECK: [0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf] - -v_mov_fed_b32 v5, 0x3f717273 -// CHECK: [0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f] - -v_mov_fed_b32_e64 v5, v1 -// CHECK: [0x05,0x00,0x12,0xd3,0x01,0x01,0x00,0x00] - -v_mov_fed_b32_e64 v255, v1 -// CHECK: [0xff,0x00,0x12,0xd3,0x01,0x01,0x00,0x00] - -v_mov_fed_b32_e64 v5, v255 -// CHECK: [0x05,0x00,0x12,0xd3,0xff,0x01,0x00,0x00] - -v_mov_fed_b32_e64 v5, s1 -// CHECK: [0x05,0x00,0x12,0xd3,0x01,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, s103 -// CHECK: [0x05,0x00,0x12,0xd3,0x67,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, flat_scratch_lo -// CHECK: [0x05,0x00,0x12,0xd3,0x68,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, flat_scratch_hi -// CHECK: [0x05,0x00,0x12,0xd3,0x69,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, vcc_lo -// CHECK: [0x05,0x00,0x12,0xd3,0x6a,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, vcc_hi -// CHECK: [0x05,0x00,0x12,0xd3,0x6b,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, tba_lo -// CHECK: [0x05,0x00,0x12,0xd3,0x6c,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, tba_hi -// CHECK: [0x05,0x00,0x12,0xd3,0x6d,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, tma_lo -// CHECK: [0x05,0x00,0x12,0xd3,0x6e,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, tma_hi -// CHECK: [0x05,0x00,0x12,0xd3,0x6f,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, ttmp11 -// CHECK: [0x05,0x00,0x12,0xd3,0x7b,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, m0 -// CHECK: [0x05,0x00,0x12,0xd3,0x7c,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, exec_lo -// CHECK: [0x05,0x00,0x12,0xd3,0x7e,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, exec_hi -// CHECK: [0x05,0x00,0x12,0xd3,0x7f,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, 0 -// CHECK: [0x05,0x00,0x12,0xd3,0x80,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, -1 -// CHECK: [0x05,0x00,0x12,0xd3,0xc1,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, 0.5 -// CHECK: [0x05,0x00,0x12,0xd3,0xf0,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, -4.0 -// CHECK: [0x05,0x00,0x12,0xd3,0xf7,0x00,0x00,0x00] - v_cvt_f16_f32 v5, v1 // CHECK: [0x01,0x15,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx8_asm_all.s b/llvm/test/MC/AMDGPU/gfx8_asm_all.s index 369faf09a6385..1610bfa7d92a9 100644 --- a/llvm/test/MC/AMDGPU/gfx8_asm_all.s +++ b/llvm/test/MC/AMDGPU/gfx8_asm_all.s @@ -14926,105 +14926,6 @@ s_abs_i32 s5, 0xaf123456 s_abs_i32 s5, 0x3f717273 // CHECK: [0xff,0x30,0x85,0xbe,0x73,0x72,0x71,0x3f] -s_mov_fed_b32 s5, s1 -// CHECK: [0x01,0x31,0x85,0xbe] - -s_mov_fed_b32 s101, s1 -// CHECK: [0x01,0x31,0xe5,0xbe] - -s_mov_fed_b32 flat_scratch_lo, s1 -// CHECK: [0x01,0x31,0xe6,0xbe] - -s_mov_fed_b32 flat_scratch_hi, s1 -// CHECK: [0x01,0x31,0xe7,0xbe] - -s_mov_fed_b32 vcc_lo, s1 -// CHECK: [0x01,0x31,0xea,0xbe] - -s_mov_fed_b32 vcc_hi, s1 -// CHECK: [0x01,0x31,0xeb,0xbe] - -s_mov_fed_b32 tba_lo, s1 -// CHECK: [0x01,0x31,0xec,0xbe] - -s_mov_fed_b32 tba_hi, s1 -// CHECK: [0x01,0x31,0xed,0xbe] - -s_mov_fed_b32 tma_lo, s1 -// CHECK: [0x01,0x31,0xee,0xbe] - -s_mov_fed_b32 tma_hi, s1 -// CHECK: [0x01,0x31,0xef,0xbe] - -s_mov_fed_b32 ttmp11, s1 -// CHECK: [0x01,0x31,0xfb,0xbe] - -s_mov_fed_b32 m0, s1 -// CHECK: [0x01,0x31,0xfc,0xbe] - -s_mov_fed_b32 exec_lo, s1 -// CHECK: [0x01,0x31,0xfe,0xbe] - -s_mov_fed_b32 exec_hi, s1 -// CHECK: [0x01,0x31,0xff,0xbe] - -s_mov_fed_b32 s5, s101 -// CHECK: [0x65,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, flat_scratch_lo -// CHECK: [0x66,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, flat_scratch_hi -// CHECK: [0x67,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, vcc_lo -// CHECK: [0x6a,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, vcc_hi -// CHECK: [0x6b,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, tba_lo -// CHECK: [0x6c,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, tba_hi -// CHECK: [0x6d,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, tma_lo -// CHECK: [0x6e,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, tma_hi -// CHECK: [0x6f,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, ttmp11 -// CHECK: [0x7b,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, m0 -// CHECK: [0x7c,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, exec_lo -// CHECK: [0x7e,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, exec_hi -// CHECK: [0x7f,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, 0 -// CHECK: [0x80,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, -1 -// CHECK: [0xc1,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, 0.5 -// CHECK: [0xf0,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, -4.0 -// CHECK: [0xf7,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, 0xaf123456 -// CHECK: [0xff,0x31,0x85,0xbe,0x56,0x34,0x12,0xaf] - -s_mov_fed_b32 s5, 0x3f717273 -// CHECK: [0xff,0x31,0x85,0xbe,0x73,0x72,0x71,0x3f] - s_set_gpr_idx_idx s1 // CHECK: [0x01,0x32,0x80,0xbe] @@ -25642,138 +25543,6 @@ v_cvt_i32_f32_e64 v5, -v1 v_cvt_i32_f32_e64 v5, |v1| // CHECK: [0x05,0x01,0x48,0xd1,0x01,0x01,0x00,0x00] -v_mov_fed_b32 v5, v1 -// CHECK: [0x01,0x13,0x0a,0x7e] - -v_mov_fed_b32 v255, v1 -// CHECK: [0x01,0x13,0xfe,0x7f] - -v_mov_fed_b32 v5, v255 -// CHECK: [0xff,0x13,0x0a,0x7e] - -v_mov_fed_b32 v5, s1 -// CHECK: [0x01,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, s101 -// CHECK: [0x65,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, flat_scratch_lo -// CHECK: [0x66,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, flat_scratch_hi -// CHECK: [0x67,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, vcc_lo -// CHECK: [0x6a,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, vcc_hi -// CHECK: [0x6b,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, tba_lo -// CHECK: [0x6c,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, tba_hi -// CHECK: [0x6d,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, tma_lo -// CHECK: [0x6e,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, tma_hi -// CHECK: [0x6f,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, ttmp11 -// CHECK: [0x7b,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, m0 -// CHECK: [0x7c,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, exec_lo -// CHECK: [0x7e,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, exec_hi -// CHECK: [0x7f,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, 0 -// CHECK: [0x80,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, -1 -// CHECK: [0xc1,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, 0.5 -// CHECK: [0xf0,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, -4.0 -// CHECK: [0xf7,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, 0xaf123456 -// CHECK: [0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf] - -v_mov_fed_b32 v5, 0x3f717273 -// CHECK: [0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f] - -v_mov_fed_b32_e64 v5, v1 -// CHECK: [0x05,0x00,0x49,0xd1,0x01,0x01,0x00,0x00] - -v_mov_fed_b32_e64 v255, v1 -// CHECK: [0xff,0x00,0x49,0xd1,0x01,0x01,0x00,0x00] - -v_mov_fed_b32_e64 v5, v255 -// CHECK: [0x05,0x00,0x49,0xd1,0xff,0x01,0x00,0x00] - -v_mov_fed_b32_e64 v5, s1 -// CHECK: [0x05,0x00,0x49,0xd1,0x01,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, s101 -// CHECK: [0x05,0x00,0x49,0xd1,0x65,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, flat_scratch_lo -// CHECK: [0x05,0x00,0x49,0xd1,0x66,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, flat_scratch_hi -// CHECK: [0x05,0x00,0x49,0xd1,0x67,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, vcc_lo -// CHECK: [0x05,0x00,0x49,0xd1,0x6a,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, vcc_hi -// CHECK: [0x05,0x00,0x49,0xd1,0x6b,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, tba_lo -// CHECK: [0x05,0x00,0x49,0xd1,0x6c,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, tba_hi -// CHECK: [0x05,0x00,0x49,0xd1,0x6d,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, tma_lo -// CHECK: [0x05,0x00,0x49,0xd1,0x6e,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, tma_hi -// CHECK: [0x05,0x00,0x49,0xd1,0x6f,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, ttmp11 -// CHECK: [0x05,0x00,0x49,0xd1,0x7b,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, m0 -// CHECK: [0x05,0x00,0x49,0xd1,0x7c,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, exec_lo -// CHECK: [0x05,0x00,0x49,0xd1,0x7e,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, exec_hi -// CHECK: [0x05,0x00,0x49,0xd1,0x7f,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, 0 -// CHECK: [0x05,0x00,0x49,0xd1,0x80,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, -1 -// CHECK: [0x05,0x00,0x49,0xd1,0xc1,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, 0.5 -// CHECK: [0x05,0x00,0x49,0xd1,0xf0,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, -4.0 -// CHECK: [0x05,0x00,0x49,0xd1,0xf7,0x00,0x00,0x00] - v_cvt_f16_f32 v5, v1 // CHECK: [0x01,0x15,0x0a,0x7e] @@ -95656,150 +95425,6 @@ v_cvt_i32_f32_dpp v5, -v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 v_cvt_i32_f32_dpp v5, |v1| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // CHECK: [0xfa,0x10,0x0a,0x7e,0x01,0xe4,0x20,0x00] -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00] - -v_mov_fed_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v255 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00] - -v_mov_fed_b32_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00] - -v_mov_fed_b32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00] - -v_mov_fed_b32_dpp v5, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00] - -v_mov_fed_b32_dpp v5, v1 row_mirror row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_half_mirror row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_bcast:15 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x42,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_bcast:31 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x43,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 wave_shl:1 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x30,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 wave_rol:1 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x34,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 wave_shr:1 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x38,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 wave_ror:1 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x3c,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_shl:1 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_shl:15 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_shr:1 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_shr:15 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_ror:1 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00] - v_cvt_f16_f32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // CHECK: [0xf9,0x14,0x0a,0x7e,0x01,0x06,0x06,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_all.s b/llvm/test/MC/AMDGPU/gfx9_asm_all.s index 70771291545f9..b3b8bf86a131b 100644 --- a/llvm/test/MC/AMDGPU/gfx9_asm_all.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_all.s @@ -14273,75 +14273,6 @@ s_abs_i32 s5, 0xaf123456 s_abs_i32 s5, 0x3f717273 // CHECK: [0xff,0x30,0x85,0xbe,0x73,0x72,0x71,0x3f] -s_mov_fed_b32 s5, s1 -// CHECK: [0x01,0x31,0x85,0xbe] - -s_mov_fed_b32 s101, s1 -// CHECK: [0x01,0x31,0xe5,0xbe] - -s_mov_fed_b32 flat_scratch_lo, s1 -// CHECK: [0x01,0x31,0xe6,0xbe] - -s_mov_fed_b32 flat_scratch_hi, s1 -// CHECK: [0x01,0x31,0xe7,0xbe] - -s_mov_fed_b32 vcc_lo, s1 -// CHECK: [0x01,0x31,0xea,0xbe] - -s_mov_fed_b32 vcc_hi, s1 -// CHECK: [0x01,0x31,0xeb,0xbe] - -s_mov_fed_b32 m0, s1 -// CHECK: [0x01,0x31,0xfc,0xbe] - -s_mov_fed_b32 exec_lo, s1 -// CHECK: [0x01,0x31,0xfe,0xbe] - -s_mov_fed_b32 exec_hi, s1 -// CHECK: [0x01,0x31,0xff,0xbe] - -s_mov_fed_b32 s5, s101 -// CHECK: [0x65,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, flat_scratch_lo -// CHECK: [0x66,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, flat_scratch_hi -// CHECK: [0x67,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, vcc_lo -// CHECK: [0x6a,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, vcc_hi -// CHECK: [0x6b,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, m0 -// CHECK: [0x7c,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, exec_lo -// CHECK: [0x7e,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, exec_hi -// CHECK: [0x7f,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, 0 -// CHECK: [0x80,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, -1 -// CHECK: [0xc1,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, 0.5 -// CHECK: [0xf0,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, -4.0 -// CHECK: [0xf7,0x31,0x85,0xbe] - -s_mov_fed_b32 s5, 0xaf123456 -// CHECK: [0xff,0x31,0x85,0xbe,0x56,0x34,0x12,0xaf] - -s_mov_fed_b32 s5, 0x3f717273 -// CHECK: [0xff,0x31,0x85,0xbe,0x73,0x72,0x71,0x3f] - s_set_gpr_idx_idx s1 // CHECK: [0x01,0x32,0x80,0xbe] @@ -22541,108 +22472,6 @@ v_cvt_i32_f32_e64 v5, |v1| v_cvt_i32_f32_e64 v5, v1 clamp // CHECK: [0x05,0x80,0x48,0xd1,0x01,0x01,0x00,0x00] -v_mov_fed_b32 v5, v1 -// CHECK: [0x01,0x13,0x0a,0x7e] - -v_mov_fed_b32 v255, v1 -// CHECK: [0x01,0x13,0xfe,0x7f] - -v_mov_fed_b32 v5, v255 -// CHECK: [0xff,0x13,0x0a,0x7e] - -v_mov_fed_b32 v5, s1 -// CHECK: [0x01,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, s101 -// CHECK: [0x65,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, flat_scratch_lo -// CHECK: [0x66,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, flat_scratch_hi -// CHECK: [0x67,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, vcc_lo -// CHECK: [0x6a,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, vcc_hi -// CHECK: [0x6b,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, m0 -// CHECK: [0x7c,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, exec_lo -// CHECK: [0x7e,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, exec_hi -// CHECK: [0x7f,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, 0 -// CHECK: [0x80,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, -1 -// CHECK: [0xc1,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, 0.5 -// CHECK: [0xf0,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, -4.0 -// CHECK: [0xf7,0x12,0x0a,0x7e] - -v_mov_fed_b32 v5, 0xaf123456 -// CHECK: [0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf] - -v_mov_fed_b32 v5, 0x3f717273 -// CHECK: [0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f] - -v_mov_fed_b32_e64 v5, v1 -// CHECK: [0x05,0x00,0x49,0xd1,0x01,0x01,0x00,0x00] - -v_mov_fed_b32_e64 v255, v1 -// CHECK: [0xff,0x00,0x49,0xd1,0x01,0x01,0x00,0x00] - -v_mov_fed_b32_e64 v5, v255 -// CHECK: [0x05,0x00,0x49,0xd1,0xff,0x01,0x00,0x00] - -v_mov_fed_b32_e64 v5, s1 -// CHECK: [0x05,0x00,0x49,0xd1,0x01,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, s101 -// CHECK: [0x05,0x00,0x49,0xd1,0x65,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, flat_scratch_lo -// CHECK: [0x05,0x00,0x49,0xd1,0x66,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, flat_scratch_hi -// CHECK: [0x05,0x00,0x49,0xd1,0x67,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, vcc_lo -// CHECK: [0x05,0x00,0x49,0xd1,0x6a,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, vcc_hi -// CHECK: [0x05,0x00,0x49,0xd1,0x6b,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, m0 -// CHECK: [0x05,0x00,0x49,0xd1,0x7c,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, exec_lo -// CHECK: [0x05,0x00,0x49,0xd1,0x7e,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, exec_hi -// CHECK: [0x05,0x00,0x49,0xd1,0x7f,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, 0 -// CHECK: [0x05,0x00,0x49,0xd1,0x80,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, -1 -// CHECK: [0x05,0x00,0x49,0xd1,0xc1,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, 0.5 -// CHECK: [0x05,0x00,0x49,0xd1,0xf0,0x00,0x00,0x00] - -v_mov_fed_b32_e64 v5, -4.0 -// CHECK: [0x05,0x00,0x49,0xd1,0xf7,0x00,0x00,0x00] - v_cvt_f16_f32 v5, v1 // CHECK: [0x01,0x15,0x0a,0x7e] @@ -84467,189 +84296,6 @@ v_cvt_i32_f32_dpp v5, -v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 v_cvt_i32_f32_dpp v5, |v1| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // CHECK: [0xfa,0x10,0x0a,0x7e,0x01,0xe4,0x20,0x00] -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00] - -v_mov_fed_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v255 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00] - -v_mov_fed_b32_sdwa v5, s1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, s101 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x65,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, flat_scratch_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x66,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, flat_scratch_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x67,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x6a,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, vcc_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x6b,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, m0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x7c,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, exec_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x7e,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, exec_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x7f,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, 0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x80,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, -1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0xc1,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, 0.5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0xf0,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, -4.0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0xf7,0x06,0x86,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00] - -v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00] - -v_mov_fed_b32_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00] - -v_mov_fed_b32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00] - -v_mov_fed_b32_dpp v5, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00] - -v_mov_fed_b32_dpp v5, v1 row_mirror row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_half_mirror row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_bcast:15 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x42,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_bcast:31 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x43,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 wave_shl:1 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x30,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 wave_rol:1 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x34,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 wave_shr:1 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x38,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 wave_ror:1 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x3c,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_shl:1 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_shl:15 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_shr:1 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_shr:15 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_ror:1 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] bank_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f] - -v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 -// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00] - v_cvt_f16_f32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // CHECK: [0xf9,0x14,0x0a,0x7e,0x01,0x06,0x06,0x00] diff --git a/llvm/test/MC/AMDGPU/lds_direct.s b/llvm/test/MC/AMDGPU/lds_direct.s index 6a879ee03d187..e677f59ad7484 100644 --- a/llvm/test/MC/AMDGPU/lds_direct.s +++ b/llvm/test/MC/AMDGPU/lds_direct.s @@ -16,12 +16,6 @@ v_cvt_f64_i32 v[0:1], src_lds_direct v_cvt_f64_i32_e64 v[0:1], src_lds_direct // GFX9: v_cvt_f64_i32_e64 v[0:1], src_lds_direct ; encoding: [0x00,0x00,0x44,0xd1,0xfe,0x00,0x00,0x00] -v_mov_fed_b32 v0, src_lds_direct -// GFX9: v_mov_fed_b32_e32 v0, src_lds_direct ; encoding: [0xfe,0x12,0x00,0x7e] - -v_mov_fed_b32_e64 v0, src_lds_direct -// GFX9: v_mov_fed_b32_e64 v0, src_lds_direct ; encoding: [0x00,0x00,0x49,0xd1,0xfe,0x00,0x00,0x00] - v_fract_f32 v0, src_lds_direct // GFX9: v_fract_f32_e32 v0, src_lds_direct ; encoding: [0xfe,0x36,0x00,0x7e] diff --git a/llvm/test/MC/AMDGPU/mtbuf-gfx10.s b/llvm/test/MC/AMDGPU/mtbuf-gfx10.s index ea3f9df9bb5e7..40f082d02ebce 100644 --- a/llvm/test/MC/AMDGPU/mtbuf-gfx10.s +++ b/llvm/test/MC/AMDGPU/mtbuf-gfx10.s @@ -1,4 +1,5 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s // GFX10: tbuffer_load_format_d16_x v0, off, s[0:3], format:22, 0 ; encoding: [0x00,0x00,0xb0,0xe8,0x00,0x00,0x20,0x80] tbuffer_load_format_d16_x v0, off, s[0:3], format:22, 0 @@ -66,3 +67,49 @@ tbuffer_store_format_xyzw v[0:3], v6, s[0:3], format:46, 0 idxen tbuffer_store_format_x v0, v1, s[0:3], format:125, 0 idxen // GFX10: tbuffer_store_format_xy v[0:1], v2, s[0:3], format:33, 0 idxen ; encoding: [0x00,0x20,0x0d,0xe9,0x02,0x00,0x00,0x80] tbuffer_store_format_xy v[0:1], v2, s[0:3], format:33, 0 idxen + +// GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:127, 0 idxen ; encoding: [0x00,0x20,0xfc,0xeb,0x01,0x00,0x00,0x80] +tbuffer_store_format_x v0, v1, s[0:3], format:127, 0 idxen + +// GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:127, 0 idxen ; encoding: [0x00,0x20,0xfc,0xeb,0x01,0x00,0x00,0x80] +tbuffer_store_format_x v0, v1, s[0:3] format:127 0 idxen + +// GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:0, s0 idxen ; encoding: [0x00,0x20,0x04,0xe8,0x01,0x00,0x00,0x00] +tbuffer_store_format_x v0, v1, s[0:3] format:0 s0 idxen + +// GFX10: tbuffer_store_format_x v0, v1, s[0:3], s0 idxen ; encoding: [0x00,0x20,0x0c,0xe8,0x01,0x00,0x00,0x00] +tbuffer_store_format_x v0, v1, s[0:3] format:1 s0 idxen + +// GFX10: tbuffer_store_format_x v0, v1, s[0:3], 0 idxen ; encoding: [0x00,0x20,0x0c,0xe8,0x01,0x00,0x00,0x80] +tbuffer_store_format_x v0, v1, s[0:3], 0 idxen + +// GFX10: tbuffer_load_format_d16_x v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0x08,0xe8,0x00,0x00,0x20,0x00] +tbuffer_load_format_d16_x v0, off, s[0:3] s0 + +//===----------------------------------------------------------------------===// +// Errors handling. +//===----------------------------------------------------------------------===// + +// GFX10-ERR: error: out of range format +tbuffer_load_format_d16_x v0, off, s[0:3], format:-1, 0 + +// GFX10-ERR: error: out of range format +tbuffer_load_format_d16_x v0, off, s[0:3], format:128, s0 + +// GFX10-ERR: error: too few operands for instruction +tbuffer_load_format_d16_x v0, off, s[0:3], format:127 + +// GFX10-ERR: error: too few operands for instruction +tbuffer_load_format_d16_x v0, off, s[0:3] + +// GFX10-ERR: error: invalid operand for instruction +tbuffer_load_format_d16_x v0, off, s[0:3] idxen + +// GFX10-ERR: error: unknown token in expression +tbuffer_load_format_d16_x v0, off, s[0:3], format:1,, s0 + +// GFX10-ERR: error: unknown token in expression +tbuffer_load_format_d16_x v0, off, s[0:3], format:1:, s0 + +// GFX10-ERR: error: not a valid operand +tbuffer_load_format_d16_x v0, off, s[0:3],, format:1, s0 diff --git a/llvm/test/MC/AMDGPU/mtbuf.s b/llvm/test/MC/AMDGPU/mtbuf.s index bfffb67bd7620..9d207ff326060 100644 --- a/llvm/test/MC/AMDGPU/mtbuf.s +++ b/llvm/test/MC/AMDGPU/mtbuf.s @@ -1,6 +1,10 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICI %s -// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICI %s -// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN-ERR %s //===----------------------------------------------------------------------===// // Test for dfmt and nfmt (tbuffer only) @@ -45,11 +49,81 @@ tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, ttmp1 // dfmt is optional: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], nfmt:2, ttmp1 -// SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:0, nfmt:2, ttmp1 ; encoding: [0x00,0x00,0x07,0xe9,0x00,0x01,0x1d,0x71] -// VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:0, nfmt:2, ttmp1 ; encoding: [0x00,0x80,0x03,0xe9,0x00,0x01,0x1d,0x71] +// SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:1, nfmt:2, ttmp1 ; encoding: [0x00,0x00,0x0f,0xe9,0x00,0x01,0x1d,0x71] +// VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:1, nfmt:2, ttmp1 ; encoding: [0x00,0x80,0x0b,0xe9,0x00,0x01,0x1d,0x71] // nfmt and dfmt can be in either order: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], nfmt:2, dfmt:15, ttmp1 // SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:2, ttmp1 ; encoding: [0x00,0x00,0x7f,0xe9,0x00,0x01,0x1d,0x71] // VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:2, ttmp1 ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x01,0x1d,0x71] +// nfmt and dfmt may be omitted: +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 +// SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x0f,0xe8,0x00,0x01,0x1d,0x71] +// VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x80,0x0b,0xe8,0x00,0x01,0x1d,0x71] + +// Check dfmt/nfmt min values +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:0, nfmt:0, ttmp1 +// SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:0, nfmt:0, ttmp1 ; encoding: [0x00,0x00,0x07,0xe8,0x00,0x01,0x1d,0x71] +// VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:0, nfmt:0, ttmp1 ; encoding: [0x00,0x80,0x03,0xe8,0x00,0x01,0x1d,0x71] + +// Check dfmt/nfmt max values +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:7, ttmp1 +// SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:7, ttmp1 ; encoding: [0x00,0x00,0xff,0xeb,0x00,0x01,0x1d,0x71] +// VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:7, ttmp1 ; encoding: [0x00,0x80,0xfb,0xeb,0x00,0x01,0x1d,0x71] + +// Check default dfmt/nfmt values +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:1, nfmt:0, ttmp1 +// SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x0f,0xe8,0x00,0x01,0x1d,0x71] +// VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x80,0x0b,0xe8,0x00,0x01,0x1d,0x71] + +// Check that comma separators are optional +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:15 nfmt:7 ttmp1 +// SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:7, ttmp1 ; encoding: [0x00,0x00,0xff,0xeb,0x00,0x01,0x1d,0x71] +// VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:7, ttmp1 ; encoding: [0x00,0x80,0xfb,0xeb,0x00,0x01,0x1d,0x71] + +//===----------------------------------------------------------------------===// +// Errors handling. +//===----------------------------------------------------------------------===// + +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:-1 nfmt:1 s0 +// GCN-ERR: error: out of range dfmt + +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:16 nfmt:1 s0 +// GCN-ERR: error: out of range dfmt + +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1 nfmt:-1 s0 +// GCN-ERR: error: out of range nfmt + +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1 nfmt:8 s0 +// GCN-ERR: error: out of range nfmt + +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] +// GCN-ERR: error: too few operands for instruction + +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7],, dfmt:1 nfmt:1 s0 +// GCN-ERR: error: not a valid operand + +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1,, nfmt:1 s0 +// GCN-ERR: error: unknown token in expression + +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1 nfmt:1,, s0 +// GCN-ERR: error: unknown token in expression + +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1 dfmt:1 s0 +// GCN-ERR: error: not a valid operand + +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] nfmt:1 nfmt:1 s0 +// GCN-ERR: error: not a valid operand + +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1 nfmt:1 dfmt:1 s0 +// GCN-ERR: error: not a valid operand + +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] nfmt:1 dfmt:1 nfmt:1 s0 +// GCN-ERR: error: not a valid operand + +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1: nfmt:1 s0 +// GCN-ERR: error: unknown token in expression + +tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1 nfmt:1: s0 +// GCN-ERR: error: unknown token in expression diff --git a/llvm/test/MC/AMDGPU/sop1.s b/llvm/test/MC/AMDGPU/sop1.s index 97920ac70517f..76525b943cad1 100644 --- a/llvm/test/MC/AMDGPU/sop1.s +++ b/llvm/test/MC/AMDGPU/sop1.s @@ -259,9 +259,6 @@ s_abs_i32 s1, s2 // SICI: s_abs_i32 s1, s2 ; encoding: [0x02,0x34,0x81,0xbe] // GFX89: s_abs_i32 s1, s2 ; encoding: [0x02,0x30,0x81,0xbe] -s_mov_fed_b32 s1, s2 -// SICI: s_mov_fed_b32 s1, s2 ; encoding: [0x02,0x35,0x81,0xbe] - s_set_gpr_idx_idx s0 // GFX89: s_set_gpr_idx_idx s0 ; encoding: [0x00,0x32,0x80,0xbe] // NOSICI: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/vop1.s b/llvm/test/MC/AMDGPU/vop1.s index 8ef640caa6154..e9d288418c42a 100644 --- a/llvm/test/MC/AMDGPU/vop1.s +++ b/llvm/test/MC/AMDGPU/vop1.s @@ -55,10 +55,6 @@ v_cvt_u32_f32_e32 v1, v2 // GCN: v_cvt_i32_f32_e32 v1, v2 ; encoding: [0x02,0x11,0x02,0x7e] v_cvt_i32_f32_e32 v1, v2 -// SICI: v_mov_fed_b32_e32 v1, v2 ; encoding: [0x02,0x13,0x02,0x7e] -// VI: v_mov_fed_b32_e32 v1, v2 ; encoding: [0x02,0x13,0x02,0x7e] -v_mov_fed_b32_e32 v1, v2 - // GCN: v_cvt_f16_f32_e32 v1, v2 ; encoding: [0x02,0x15,0x02,0x7e] v_cvt_f16_f32_e32 v1, v2 diff --git a/llvm/test/MC/AMDGPU/wave32.s b/llvm/test/MC/AMDGPU/wave32.s index f7a0835f0a17e..b9532aebd1579 100644 --- a/llvm/test/MC/AMDGPU/wave32.s +++ b/llvm/test/MC/AMDGPU/wave32.s @@ -9,46 +9,46 @@ v_cmp_ge_i32_e32 s0, v0 v_cmp_ge_i32_e32 vcc_lo, s0, v1 // GFX1032: v_cmp_ge_i32_e32 vcc_lo, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d] -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_cmp_ge_i32_e32 vcc, s0, v2 -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_cmp_ge_i32_e32 vcc, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d] v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD // GFX1032: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:19: error: invalid operand for instruction // GFX1064: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06] v_cmp_class_f32_e32 vcc_lo, s0, v0 // GFX1032: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d] -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_cmp_class_f32_e32 vcc, s0, v0 -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_cmp_class_f32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d] // TODO-GFX10: The following encoding does not match SP3's encoding, which is: // [0xf9,0x04,0x1e,0x7d,0x01,0x06,0x06,0x06] v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD // GFX1032: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction // TODO-GFX10: The following encoding does not match SP3's encoding, which is: // [0xf9,0x04,0x1e,0x7d,0x01,0x06,0x06,0x06] v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06] v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD // GFX1032: v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction // GFX1064: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06] v_cndmask_b32_e32 v1, v2, v3, @@ -57,10 +57,10 @@ v_cndmask_b32_e32 v1, v2, v3, v_cndmask_b32_e32 v1, v2, v3, vcc_lo // GFX1032: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02] -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_cndmask_b32_e32 v1, v2, v3, vcc -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02] v_cndmask_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD @@ -69,10 +69,10 @@ v_cndmask_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel: v_cndmask_b32_sdwa v5, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD // GFX1032: v_cndmask_b32_sdwa v5, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06] -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06] v_cndmask_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 @@ -81,26 +81,26 @@ v_cndmask_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX1032: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00] -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00] v_add_co_u32_e32 v2, vcc_lo, s0, v2 -// GFX1032-ERR: error: instruction not supported on this GPU -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction v_add_co_u32_e32 v2, vcc, s0, v2 -// GFX1032-ERR: error: instruction not supported on this GPU -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo // GFX1032: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50] -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50] v_add_co_ci_u32_e32 v3, v3, v4 @@ -108,27 +108,27 @@ v_add_co_ci_u32_e32 v3, v3, v4 // GFX1064: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50] v_sub_co_u32_e32 v2, vcc_lo, s0, v2 -// GFX1032-ERR: error: instruction not supported on this GPU -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction v_sub_co_u32_e32 v2, vcc, s0, v2 -// GFX1032-ERR: error: instruction not supported on this GPU -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_subrev_co_u32_e32 v2, vcc_lo, s0, v2 -// GFX1032-ERR: error: instruction not supported on this GPU -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:33: error: invalid operand for instruction v_subrev_co_u32_e32 v2, vcc, s0, v2 -// GFX1032-ERR: error: instruction not supported on this GPU -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo // GFX1032: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52] -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52] v_sub_co_ci_u32_e32 v3, v3, v4 @@ -137,10 +137,10 @@ v_sub_co_ci_u32_e32 v3, v3, v4 v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo // GFX1032: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54] -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54] v_subrev_co_ci_u32_e32 v1, 0, v1 @@ -148,23 +148,23 @@ v_subrev_co_ci_u32_e32 v1, 0, v1 // GFX1064: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54] v_add_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: error: invalid operand -// GFX1064-ERR: error: invalid operand +// GFX1032-ERR: :[[@LINE-1]]:38: error: invalid operand for instruction{{$}} +// GFX1064-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction{{$}} v_add_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: error: instruction not supported -// GFX1064-ERR: error: instruction not supported +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_add_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: error: not a valid operand -// GFX1064-ERR: error: not a valid operand +// GFX1032-ERR: :[[@LINE-1]]:30: error: not a valid operand.{{$}} +// GFX1064-ERR: :[[@LINE-2]]:30: error: not a valid operand.{{$}} v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06] -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06] v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -172,35 +172,35 @@ v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYT // GFX1064: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06] v_sub_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: error: invalid operand -// GFX1064-ERR: error: invalid operand +// GFX1032-ERR: :[[@LINE-1]]:38: error: invalid operand for instruction{{$}} +// GFX1064-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction{{$}} v_sub_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: error: instruction not supported -// GFX1064-ERR: error: instruction not supported +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_sub_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: error: not a valid operand -// GFX1064-ERR: error: not a valid operand +// GFX1032-ERR: :[[@LINE-1]]:30: error: not a valid operand.{{$}} +// GFX1064-ERR: :[[@LINE-2]]:30: error: not a valid operand.{{$}} v_subrev_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: error: invalid operand -// GFX1064-ERR: error: invalid operand +// GFX1032-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction{{$}} +// GFX1064-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction{{$}} v_subrev_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: error: instruction not supported -// GFX1064-ERR: error: instruction not supported +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_subrev_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -// GFX1032-ERR: error: not a valid operand -// GFX1064-ERR: error: not a valid operand +// GFX1032-ERR: :[[@LINE-1]]:33: error: not a valid operand.{{$}} +// GFX1064-ERR: :[[@LINE-2]]:33: error: not a valid operand.{{$}} v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX1032: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06] -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06] v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -209,10 +209,10 @@ v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYT v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX1032: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06] -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06] v_subrev_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -225,23 +225,23 @@ v_add_co_ci_u32 v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_ v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e] -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e] v_add_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: error: not a valid operand -// GFX1064-ERR: error: not a valid operand +// GFX1032-ERR: :[[@LINE-1]]:29: error: not a valid operand.{{$}} +// GFX1064-ERR: :[[@LINE-2]]:29: error: not a valid operand.{{$}} v_add_co_u32_dpp v5, vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: error: not a valid operand -// GFX1064-ERR: error: not a valid operand +// GFX1032-ERR: :[[@LINE-1]]:37: error: not a valid operand.{{$}} +// GFX1064-ERR: :[[@LINE-2]]:37: error: not a valid operand.{{$}} v_add_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: error: not a valid operand -// GFX1064-ERR: error: not a valid operand +// GFX1032-ERR: :[[@LINE-1]]:34: error: not a valid operand.{{$}} +// GFX1064-ERR: :[[@LINE-2]]:34: error: not a valid operand.{{$}} v_add_co_ci_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00] @@ -249,170 +249,170 @@ v_add_co_ci_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00] -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00] v_sub_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: error: not a valid operand -// GFX1064-ERR: error: not a valid operand +// GFX1032-ERR: :[[@LINE-1]]:29: error: not a valid operand.{{$}} +// GFX1064-ERR: :[[@LINE-2]]:29: error: not a valid operand.{{$}} v_sub_co_u32_dpp v5, vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: error: not a valid operand -// GFX1064-ERR: error: not a valid operand +// GFX1032-ERR: :[[@LINE-1]]:37: error: not a valid operand.{{$}} +// GFX1064-ERR: :[[@LINE-2]]:37: error: not a valid operand.{{$}} v_sub_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: error: not a valid operand -// GFX1064-ERR: error: not a valid operand +// GFX1032-ERR: :[[@LINE-1]]:34: error: not a valid operand.{{$}} +// GFX1064-ERR: :[[@LINE-2]]:34: error: not a valid operand.{{$}} v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX1032: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00] -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00] v_subrev_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: error: not a valid operand -// GFX1064-ERR: error: not a valid operand +// GFX1032-ERR: :[[@LINE-1]]:32: error: not a valid operand.{{$}} +// GFX1064-ERR: :[[@LINE-2]]:32: error: not a valid operand.{{$}} v_subrev_co_u32_dpp v5, vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: error: not a valid operand -// GFX1064-ERR: error: not a valid operand +// GFX1032-ERR: :[[@LINE-1]]:40: error: not a valid operand.{{$}} +// GFX1064-ERR: :[[@LINE-2]]:40: error: not a valid operand.{{$}} v_subrev_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: error: not a valid operand -// GFX1064-ERR: error: not a valid operand +// GFX1032-ERR: :[[@LINE-1]]:37: error: not a valid operand.{{$}} +// GFX1064-ERR: :[[@LINE-2]]:37: error: not a valid operand.{{$}} v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX1032: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00] -// GFX1064-ERR: error: instruction not supported on this GPU +// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1064: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00] v_add_co_u32 v0, s0, v0, v2 // GFX1032: v_add_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction v_add_co_u32_e64 v0, s0, v0, v2 // GFX1032: v_add_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction v_add_co_ci_u32_e64 v4, s0, v1, v5, s2 // GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction v_sub_co_u32 v0, s0, v0, v2 // GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction v_sub_co_u32_e64 v0, s0, v0, v2 // GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2 // GFX1032: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction v_subrev_co_u32 v0, s0, v0, v2 // GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction v_subrev_co_u32_e64 v0, s0, v0, v2 // GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:33: error: invalid operand for instruction v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2 // GFX1032: v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:28: error: invalid operand for instruction v_add_co_u32 v0, s[0:1], v0, v2 -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:18: error: invalid operand for instruction // GFX1064: v_add_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] v_add_co_u32_e64 v0, s[0:1], v0, v2 -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction // GFX1064: v_add_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction // GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00] v_sub_co_u32 v0, s[0:1], v0, v2 -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:18: error: invalid operand for instruction // GFX1064: v_sub_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] v_sub_co_u32_e64 v0, s[0:1], v0, v2 -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction // GFX1064: v_sub_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction // GFX1064: v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00] v_subrev_co_u32 v0, s[0:1], v0, v2 -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:21: error: invalid operand for instruction // GFX1064: v_subrev_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] v_subrev_co_u32_e64 v0, s[0:1], v0, v2 -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction // GFX1064: v_subrev_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:28: error: invalid operand for instruction // GFX1064: v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00] v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2 // GFX1032: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2 ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3] -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:38: error: invalid operand for instruction // GFX1064: v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3] ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00] v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo // GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction // GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01] v_div_scale_f32 v2, s2, v0, v0, v2 // GFX1032: v_div_scale_f32 v2, s2, v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction v_div_scale_f32 v2, s[2:3], v0, v0, v2 -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:21: error: invalid operand for instruction // GFX1064: v_div_scale_f32 v2, s[2:3], v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04] v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3] // GFX1032: v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3] ; encoding: [0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3] -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction // GFX1064: v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3] ; encoding: [0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04] v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3] // GFX1032: v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3] ; encoding: [0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3] -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:23: error: invalid operand for instruction // GFX1064: v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3] ; encoding: [0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04] v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3] // GFX1032: v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3] ; encoding: [0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04] -// GFX1064-ERR: error: invalid operand for instruction +// GFX1064-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3] -// GFX1032-ERR: error: invalid operand for instruction +// GFX1032-ERR: :[[@LINE-1]]:23: error: invalid operand for instruction // GFX1064: v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3] ; encoding: [0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04] v_cmpx_neq_f32_e32 v0, v1 diff --git a/llvm/test/MC/ARM/neon-bitwise-encoding.s b/llvm/test/MC/ARM/neon-bitwise-encoding.s index d142dbabec9e9..7f3a5a02c9ead 100644 --- a/llvm/test/MC/ARM/neon-bitwise-encoding.s +++ b/llvm/test/MC/ARM/neon-bitwise-encoding.s @@ -101,10 +101,17 @@ vbsl d18, d17, d16 vbsl q8, q10, q9 + vbit d18, d17, d16 + vbit q8, q10, q9 + vbif d18, d17, d16 + vbif q8, q10, q9 @ CHECK: vbsl d18, d17, d16 @ encoding: [0xb0,0x21,0x51,0xf3] @ CHECK: vbsl q8, q10, q9 @ encoding: [0xf2,0x01,0x54,0xf3] - +@ CHECK: vbit d18, d17, d16 @ encoding: [0xb0,0x21,0x61,0xf3] +@ CHECK: vbit q8, q10, q9 @ encoding: [0xf2,0x01,0x64,0xf3] +@ CHECK: vbif d18, d17, d16 @ encoding: [0xb0,0x21,0x71,0xf3] +@ CHECK: vbif q8, q10, q9 @ encoding: [0xf2,0x01,0x74,0xf3] @ Size suffices are optional. veor q4, q7, q3 diff --git a/llvm/test/MC/ARM/neont2-bitwise-encoding.s b/llvm/test/MC/ARM/neont2-bitwise-encoding.s index 175873b69718c..f5c2a90f915a2 100644 --- a/llvm/test/MC/ARM/neont2-bitwise-encoding.s +++ b/llvm/test/MC/ARM/neont2-bitwise-encoding.s @@ -50,6 +50,14 @@ vbsl d18, d17, d16 vbsl q8, q10, q9 + vbit d18, d17, d16 + vbit q8, q10, q9 + vbif d18, d17, d16 + vbif q8, q10, q9 @ CHECK: vbsl d18, d17, d16 @ encoding: [0x51,0xff,0xb0,0x21] @ CHECK: vbsl q8, q10, q9 @ encoding: [0x54,0xff,0xf2,0x01] +@ CHECK: vbit d18, d17, d16 @ encoding: [0x61,0xff,0xb0,0x21] +@ CHECK: vbit q8, q10, q9 @ encoding: [0x64,0xff,0xf2,0x01] +@ CHECK: vbif d18, d17, d16 @ encoding: [0x71,0xff,0xb0,0x21] +@ CHECK: vbif q8, q10, q9 @ encoding: [0x74,0xff,0xf2,0x01] diff --git a/llvm/test/MC/AVR/hex-immediates.s b/llvm/test/MC/AVR/hex-immediates.s new file mode 100644 index 0000000000000..ca4c8b9f33551 --- /dev/null +++ b/llvm/test/MC/AVR/hex-immediates.s @@ -0,0 +1,7 @@ +; RUN: llvm-mc -filetype=obj -triple=avr %s -o %t +; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=DEC +; RUN: llvm-objdump -d --print-imm-hex %t | FileCheck %s --check-prefix=HEX + +; DEC: ldi r24, 66 +; HEX: ldi r24, 0x42 + ldi r24, 0x42 diff --git a/llvm/test/MC/AsmParser/layout-interdependency.s b/llvm/test/MC/AsmParser/layout-interdependency.s index 6e275e00d9ec7..6310610a718c1 100644 --- a/llvm/test/MC/AsmParser/layout-interdependency.s +++ b/llvm/test/MC/AsmParser/layout-interdependency.s @@ -1,4 +1,5 @@ # RUN: not llvm-mc --filetype=obj %s -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: defaut_triple fct_end: diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt index 72477fecdb16a..fe9986c05ee00 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt @@ -15707,66 +15707,6 @@ # GFX10: s_mov_b64 vcc, s[2:3] ; encoding: [0x02,0x04,0xea,0xbe] 0x02,0x04,0xea,0xbe -# GFX10: s_mov_fed_b32 exec_hi, s1 ; encoding: [0x01,0x35,0xff,0xbe] -0x01,0x35,0xff,0xbe - -# GFX10: s_mov_fed_b32 exec_lo, s1 ; encoding: [0x01,0x35,0xfe,0xbe] -0x01,0x35,0xfe,0xbe - -# GFX10: s_mov_fed_b32 m0, s1 ; encoding: [0x01,0x35,0xfc,0xbe] -0x01,0x35,0xfc,0xbe - -# GFX10: s_mov_fed_b32 s0, -1 ; encoding: [0xc1,0x35,0x80,0xbe] -0xc1,0x35,0x80,0xbe - -# GFX10: s_mov_fed_b32 s0, -4.0 ; encoding: [0xf7,0x35,0x80,0xbe] -0xf7,0x35,0x80,0xbe - -# GFX10: s_mov_fed_b32 s0, 0 ; encoding: [0x80,0x35,0x80,0xbe] -0x80,0x35,0x80,0xbe - -# GFX10: s_mov_fed_b32 s0, 0.5 ; encoding: [0xf0,0x35,0x80,0xbe] -0xf0,0x35,0x80,0xbe - -# GFX10: s_mov_fed_b32 s0, 0x3f717273 ; encoding: [0xff,0x35,0x80,0xbe,0x73,0x72,0x71,0x3f] -0xff,0x35,0x80,0xbe,0x73,0x72,0x71,0x3f - -# GFX10: s_mov_fed_b32 s0, 0xaf123456 ; encoding: [0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf] -0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf - -# GFX10: s_mov_fed_b32 s0, exec_hi ; encoding: [0x7f,0x35,0x80,0xbe] -0x7f,0x35,0x80,0xbe - -# GFX10: s_mov_fed_b32 s0, exec_lo ; encoding: [0x7e,0x35,0x80,0xbe] -0x7e,0x35,0x80,0xbe - -# GFX10: s_mov_fed_b32 s0, m0 ; encoding: [0x7c,0x35,0x80,0xbe] -0x7c,0x35,0x80,0xbe - -# GFX10: s_mov_fed_b32 s0, s1 ; encoding: [0x01,0x35,0x80,0xbe] -0x01,0x35,0x80,0xbe - -# GFX10: s_mov_fed_b32 s0, s104 ; encoding: [0x68,0x35,0x80,0xbe] -0x68,0x35,0x80,0xbe - -# GFX10: s_mov_fed_b32 s0, vcc_hi ; encoding: [0x6b,0x35,0x80,0xbe] -0x6b,0x35,0x80,0xbe - -# GFX10: s_mov_fed_b32 s0, vcc_lo ; encoding: [0x6a,0x35,0x80,0xbe] -0x6a,0x35,0x80,0xbe - -# GFX10: s_mov_fed_b32 s105, s1 ; encoding: [0x01,0x35,0xe9,0xbe] -0x01,0x35,0xe9,0xbe - -# GFX10: s_mov_fed_b32 s105, s104 ; encoding: [0x68,0x35,0xe9,0xbe] -0x68,0x35,0xe9,0xbe - -# GFX10: s_mov_fed_b32 vcc_hi, s1 ; encoding: [0x01,0x35,0xeb,0xbe] -0x01,0x35,0xeb,0xbe - -# GFX10: s_mov_fed_b32 vcc_lo, s1 ; encoding: [0x01,0x35,0xea,0xbe] -0x01,0x35,0xea,0xbe - # GFX10: s_movk_i32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0x7f,0xb0] 0x34,0x12,0x7f,0xb0 @@ -86004,243 +85944,6 @@ # GFX10: v_mov_b32_sdwa v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0x6a,0x06,0x86,0x00] 0xf9,0x02,0x0a,0x7e,0x6a,0x06,0x86,0x00 -# GFX10: v_mov_fed_b32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00] -0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01 - -# GFX10: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03 - -# GFX10: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f - -# GFX10: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10 - -# GFX10: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30 - -# GFX10: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0 - -# GFX10: v_mov_fed_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v1 row_half_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v1 row_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v1 row_ror:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v1 row_share:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x51,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x51,0x01,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x5f,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x5f,0x01,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v1 row_shl:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v1 row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v1 row_shr:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v1 row_shr:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v1 row_xmask:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x61,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x61,0x01,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v1 row_xmask:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x6f,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x6f,0x01,0x00 - -# GFX10: v_mov_fed_b32_dpp v5, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00] -0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00 - -# GFX10: v_mov_fed_b32_e32 v255, v1 ; encoding: [0x01,0x13,0xfe,0x7f] -0x01,0x13,0xfe,0x7f - -# GFX10: v_mov_fed_b32_e32 v5, -1 ; encoding: [0xc1,0x12,0x0a,0x7e] -0xc1,0x12,0x0a,0x7e - -# GFX10: v_mov_fed_b32_e32 v5, -4.0 ; encoding: [0xf7,0x12,0x0a,0x7e] -0xf7,0x12,0x0a,0x7e - -# GFX10: v_mov_fed_b32_e32 v5, 0 ; encoding: [0x80,0x12,0x0a,0x7e] -0x80,0x12,0x0a,0x7e - -# GFX10: v_mov_fed_b32_e32 v5, 0.5 ; encoding: [0xf0,0x12,0x0a,0x7e] -0xf0,0x12,0x0a,0x7e - -# GFX10: v_mov_fed_b32_e32 v5, 0x3f717273 ; encoding: [0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f] -0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f - -# GFX10: v_mov_fed_b32_e32 v5, 0xaf123456 ; encoding: [0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf] -0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf - -# GFX10: v_mov_fed_b32_e32 v5, exec_hi ; encoding: [0x7f,0x12,0x0a,0x7e] -0x7f,0x12,0x0a,0x7e - -# GFX10: v_mov_fed_b32_e32 v5, exec_lo ; encoding: [0x7e,0x12,0x0a,0x7e] -0x7e,0x12,0x0a,0x7e - -# GFX10: v_mov_fed_b32_e32 v5, m0 ; encoding: [0x7c,0x12,0x0a,0x7e] -0x7c,0x12,0x0a,0x7e - -# GFX10: v_mov_fed_b32_e32 v5, s1 ; encoding: [0x01,0x12,0x0a,0x7e] -0x01,0x12,0x0a,0x7e - -# GFX10: v_mov_fed_b32_e32 v5, s103 ; encoding: [0x67,0x12,0x0a,0x7e] -0x67,0x12,0x0a,0x7e - -# GFX10: v_mov_fed_b32_e32 v5, ttmp11 ; encoding: [0x77,0x12,0x0a,0x7e] -0x77,0x12,0x0a,0x7e - -# GFX10: v_mov_fed_b32_e32 v5, v1 ; encoding: [0x01,0x13,0x0a,0x7e] -0x01,0x13,0x0a,0x7e - -# GFX10: v_mov_fed_b32_e32 v5, v255 ; encoding: [0xff,0x13,0x0a,0x7e] -0xff,0x13,0x0a,0x7e - -# GFX10: v_mov_fed_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x12,0x0a,0x7e] -0x6b,0x12,0x0a,0x7e - -# GFX10: v_mov_fed_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x12,0x0a,0x7e] -0x6a,0x12,0x0a,0x7e - -# GFX10: v_mov_fed_b32_e64 v255, v1 ; encoding: [0xff,0x00,0x89,0xd5,0x01,0x01,0x00,0x00] -0xff,0x00,0x89,0xd5,0x01,0x01,0x00,0x00 - -# GFX10: v_mov_fed_b32_e64 v5, -1 ; encoding: [0x05,0x00,0x89,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x89,0xd5,0xc1,0x00,0x00,0x00 - -# GFX10: v_mov_fed_b32_e64 v5, -4.0 ; encoding: [0x05,0x00,0x89,0xd5,0xf7,0x00,0x00,0x00] -0x05,0x00,0x89,0xd5,0xf7,0x00,0x00,0x00 - -# GFX10: v_mov_fed_b32_e64 v5, 0 ; encoding: [0x05,0x00,0x89,0xd5,0x80,0x00,0x00,0x00] -0x05,0x00,0x89,0xd5,0x80,0x00,0x00,0x00 - -# GFX10: v_mov_fed_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x89,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0x89,0xd5,0xf0,0x00,0x00,0x00 - -# GFX10: v_mov_fed_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x89,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x89,0xd5,0x7f,0x00,0x00,0x00 - -# GFX10: v_mov_fed_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x89,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x89,0xd5,0x7e,0x00,0x00,0x00 - -# GFX10: v_mov_fed_b32_e64 v5, m0 ; encoding: [0x05,0x00,0x89,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x89,0xd5,0x7c,0x00,0x00,0x00 - -# GFX10: v_mov_fed_b32_e64 v5, s1 ; encoding: [0x05,0x00,0x89,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x89,0xd5,0x01,0x00,0x00,0x00 - -# GFX10: v_mov_fed_b32_e64 v5, s101 ; encoding: [0x05,0x00,0x89,0xd5,0x65,0x00,0x00,0x00] -0x05,0x00,0x89,0xd5,0x65,0x00,0x00,0x00 - -# GFX10: v_mov_fed_b32_e64 v5, v1 ; encoding: [0x05,0x00,0x89,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x89,0xd5,0x01,0x01,0x00,0x00 - -# GFX10: v_mov_fed_b32_e64 v5, v255 ; encoding: [0x05,0x00,0x89,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x89,0xd5,0xff,0x01,0x00,0x00 - -# GFX10: v_mov_fed_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x89,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x89,0xd5,0x6b,0x00,0x00,0x00 - -# GFX10: v_mov_fed_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x89,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x89,0xd5,0x6a,0x00,0x00,0x00 - -# GFX10: v_mov_fed_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00] -0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, exec_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x7f,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x7f,0x06,0x86,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, exec_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x7e,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x7e,0x06,0x86,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, m0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x7c,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x7c,0x06,0x86,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, s1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x86,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, s101 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x65,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x65,0x06,0x86,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, v255 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, vcc_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x6b,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x6b,0x06,0x86,0x00 - -# GFX10: v_mov_fed_b32_sdwa v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x6a,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x6a,0x06,0x86,0x00 - # GFX10: v_movreld_b32_e32 v255, v1 ; encoding: [0x01,0x85,0xfe,0x7f] 0x01,0x85,0xfe,0x7f diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt index afd0e63f3b645..f77ac1247b918 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt @@ -16,9 +16,6 @@ # GFX10: v_cvt_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa] 0xe9,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa -# GFX10: v_mov_fed_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x12,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -0xe9,0x12,0x0a,0x7e,0x01,0x88,0xc6,0xfa - # GFX10: v_cvt_f16_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa] 0xe9,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa @@ -268,9 +265,6 @@ # GFX10: v_cvt_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa] 0xea,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa -# GFX10: v_mov_fed_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x12,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -0xea,0x12,0x0a,0x7e,0x01,0x88,0xc6,0xfa - # GFX10: v_cvt_f16_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa] 0xea,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt index 3171355ed3d06..d790254a7ae1a 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt @@ -14295,105 +14295,6 @@ # CHECK: s_abs_i32 s5, 0x3f717273 ; encoding: [0xff,0x30,0x85,0xbe,0x73,0x72,0x71,0x3f] 0xff,0x30,0x85,0xbe,0x73,0x72,0x71,0x3f -# CHECK: s_mov_fed_b32 s5, s1 ; encoding: [0x01,0x31,0x85,0xbe] -0x01,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s101, s1 ; encoding: [0x01,0x31,0xe5,0xbe] -0x01,0x31,0xe5,0xbe - -# CHECK: s_mov_fed_b32 flat_scratch_lo, s1 ; encoding: [0x01,0x31,0xe6,0xbe] -0x01,0x31,0xe6,0xbe - -# CHECK: s_mov_fed_b32 flat_scratch_hi, s1 ; encoding: [0x01,0x31,0xe7,0xbe] -0x01,0x31,0xe7,0xbe - -# CHECK: s_mov_fed_b32 vcc_lo, s1 ; encoding: [0x01,0x31,0xea,0xbe] -0x01,0x31,0xea,0xbe - -# CHECK: s_mov_fed_b32 vcc_hi, s1 ; encoding: [0x01,0x31,0xeb,0xbe] -0x01,0x31,0xeb,0xbe - -# CHECK: s_mov_fed_b32 tba_lo, s1 ; encoding: [0x01,0x31,0xec,0xbe] -0x01,0x31,0xec,0xbe - -# CHECK: s_mov_fed_b32 tba_hi, s1 ; encoding: [0x01,0x31,0xed,0xbe] -0x01,0x31,0xed,0xbe - -# CHECK: s_mov_fed_b32 tma_lo, s1 ; encoding: [0x01,0x31,0xee,0xbe] -0x01,0x31,0xee,0xbe - -# CHECK: s_mov_fed_b32 tma_hi, s1 ; encoding: [0x01,0x31,0xef,0xbe] -0x01,0x31,0xef,0xbe - -# CHECK: s_mov_fed_b32 ttmp11, s1 ; encoding: [0x01,0x31,0xfb,0xbe] -0x01,0x31,0xfb,0xbe - -# CHECK: s_mov_fed_b32 m0, s1 ; encoding: [0x01,0x31,0xfc,0xbe] -0x01,0x31,0xfc,0xbe - -# CHECK: s_mov_fed_b32 exec_lo, s1 ; encoding: [0x01,0x31,0xfe,0xbe] -0x01,0x31,0xfe,0xbe - -# CHECK: s_mov_fed_b32 exec_hi, s1 ; encoding: [0x01,0x31,0xff,0xbe] -0x01,0x31,0xff,0xbe - -# CHECK: s_mov_fed_b32 s5, s101 ; encoding: [0x65,0x31,0x85,0xbe] -0x65,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, flat_scratch_lo ; encoding: [0x66,0x31,0x85,0xbe] -0x66,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, flat_scratch_hi ; encoding: [0x67,0x31,0x85,0xbe] -0x67,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, vcc_lo ; encoding: [0x6a,0x31,0x85,0xbe] -0x6a,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, vcc_hi ; encoding: [0x6b,0x31,0x85,0xbe] -0x6b,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, tba_lo ; encoding: [0x6c,0x31,0x85,0xbe] -0x6c,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, tba_hi ; encoding: [0x6d,0x31,0x85,0xbe] -0x6d,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, tma_lo ; encoding: [0x6e,0x31,0x85,0xbe] -0x6e,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, tma_hi ; encoding: [0x6f,0x31,0x85,0xbe] -0x6f,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, ttmp11 ; encoding: [0x7b,0x31,0x85,0xbe] -0x7b,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, m0 ; encoding: [0x7c,0x31,0x85,0xbe] -0x7c,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, exec_lo ; encoding: [0x7e,0x31,0x85,0xbe] -0x7e,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, exec_hi ; encoding: [0x7f,0x31,0x85,0xbe] -0x7f,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, 0 ; encoding: [0x80,0x31,0x85,0xbe] -0x80,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, -1 ; encoding: [0xc1,0x31,0x85,0xbe] -0xc1,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, 0.5 ; encoding: [0xf0,0x31,0x85,0xbe] -0xf0,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, -4.0 ; encoding: [0xf7,0x31,0x85,0xbe] -0xf7,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, 0xaf123456 ; encoding: [0xff,0x31,0x85,0xbe,0x56,0x34,0x12,0xaf] -0xff,0x31,0x85,0xbe,0x56,0x34,0x12,0xaf - -# CHECK: s_mov_fed_b32 s5, 0x3f717273 ; encoding: [0xff,0x31,0x85,0xbe,0x73,0x72,0x71,0x3f] -0xff,0x31,0x85,0xbe,0x73,0x72,0x71,0x3f - # CHECK: s_set_gpr_idx_idx s1 ; encoding: [0x01,0x32,0x80,0xbe] 0x01,0x32,0x80,0xbe @@ -25011,138 +24912,6 @@ # CHECK: v_cvt_i32_f32_e64 v5, |v1| ; encoding: [0x05,0x01,0x48,0xd1,0x01,0x01,0x00,0x00] 0x05,0x01,0x48,0xd1,0x01,0x01,0x00,0x00 -# CHECK: v_mov_fed_b32_e32 v5, v1 ; encoding: [0x01,0x13,0x0a,0x7e] -0x01,0x13,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v255, v1 ; encoding: [0x01,0x13,0xfe,0x7f] -0x01,0x13,0xfe,0x7f - -# CHECK: v_mov_fed_b32_e32 v5, v255 ; encoding: [0xff,0x13,0x0a,0x7e] -0xff,0x13,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, s1 ; encoding: [0x01,0x12,0x0a,0x7e] -0x01,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, s101 ; encoding: [0x65,0x12,0x0a,0x7e] -0x65,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, flat_scratch_lo ; encoding: [0x66,0x12,0x0a,0x7e] -0x66,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, flat_scratch_hi ; encoding: [0x67,0x12,0x0a,0x7e] -0x67,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x12,0x0a,0x7e] -0x6a,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x12,0x0a,0x7e] -0x6b,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, tba_lo ; encoding: [0x6c,0x12,0x0a,0x7e] -0x6c,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, tba_hi ; encoding: [0x6d,0x12,0x0a,0x7e] -0x6d,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, tma_lo ; encoding: [0x6e,0x12,0x0a,0x7e] -0x6e,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, tma_hi ; encoding: [0x6f,0x12,0x0a,0x7e] -0x6f,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, ttmp11 ; encoding: [0x7b,0x12,0x0a,0x7e] -0x7b,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, m0 ; encoding: [0x7c,0x12,0x0a,0x7e] -0x7c,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, exec_lo ; encoding: [0x7e,0x12,0x0a,0x7e] -0x7e,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, exec_hi ; encoding: [0x7f,0x12,0x0a,0x7e] -0x7f,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, 0 ; encoding: [0x80,0x12,0x0a,0x7e] -0x80,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, -1 ; encoding: [0xc1,0x12,0x0a,0x7e] -0xc1,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, 0.5 ; encoding: [0xf0,0x12,0x0a,0x7e] -0xf0,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, -4.0 ; encoding: [0xf7,0x12,0x0a,0x7e] -0xf7,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, 0xaf123456 ; encoding: [0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf] -0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf - -# CHECK: v_mov_fed_b32_e32 v5, 0x3f717273 ; encoding: [0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f] -0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f - -# CHECK: v_mov_fed_b32_e64 v5, v1 ; encoding: [0x05,0x00,0x49,0xd1,0x01,0x01,0x00,0x00] -0x05,0x00,0x49,0xd1,0x01,0x01,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v255, v1 ; encoding: [0xff,0x00,0x49,0xd1,0x01,0x01,0x00,0x00] -0xff,0x00,0x49,0xd1,0x01,0x01,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, v255 ; encoding: [0x05,0x00,0x49,0xd1,0xff,0x01,0x00,0x00] -0x05,0x00,0x49,0xd1,0xff,0x01,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, s1 ; encoding: [0x05,0x00,0x49,0xd1,0x01,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x01,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, s101 ; encoding: [0x05,0x00,0x49,0xd1,0x65,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x65,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, flat_scratch_lo ; encoding: [0x05,0x00,0x49,0xd1,0x66,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x66,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, flat_scratch_hi ; encoding: [0x05,0x00,0x49,0xd1,0x67,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x67,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x49,0xd1,0x6a,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x6a,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x49,0xd1,0x6b,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x6b,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, tba_lo ; encoding: [0x05,0x00,0x49,0xd1,0x6c,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x6c,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, tba_hi ; encoding: [0x05,0x00,0x49,0xd1,0x6d,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x6d,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, tma_lo ; encoding: [0x05,0x00,0x49,0xd1,0x6e,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x6e,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, tma_hi ; encoding: [0x05,0x00,0x49,0xd1,0x6f,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x6f,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, ttmp11 ; encoding: [0x05,0x00,0x49,0xd1,0x7b,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x7b,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, m0 ; encoding: [0x05,0x00,0x49,0xd1,0x7c,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x7c,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x49,0xd1,0x7e,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x7e,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x49,0xd1,0x7f,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x7f,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, 0 ; encoding: [0x05,0x00,0x49,0xd1,0x80,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x80,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, -1 ; encoding: [0x05,0x00,0x49,0xd1,0xc1,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0xc1,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x49,0xd1,0xf0,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0xf0,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, -4.0 ; encoding: [0x05,0x00,0x49,0xd1,0xf7,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0xf7,0x00,0x00,0x00 - # CHECK: v_cvt_f16_f32_e32 v5, v1 ; encoding: [0x01,0x15,0x0a,0x7e] 0x01,0x15,0x0a,0x7e @@ -94950,135 +94719,6 @@ # CHECK: v_cvt_i32_f32_dpp v5, |v1| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x10,0x0a,0x7e,0x01,0xe4,0x20,0x00] 0xfa,0x10,0x0a,0x7e,0x01,0xe4,0x20,0x00 -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00] -0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v255 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00 - -# CHECK: v_mov_fed_b32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00] -0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00] -0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_half_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_bcast:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x42,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x42,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_bcast:31 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x43,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x43,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 wave_shl:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x30,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x30,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 wave_rol:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x34,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x34,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 wave_shr:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x38,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x38,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 wave_ror:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x3c,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x3c,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_shl:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_shr:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_shr:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_ror:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00 - # CHECK: v_cvt_f16_f32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x14,0x0a,0x7e,0x01,0x06,0x06,0x00] 0xf9,0x14,0x0a,0x7e,0x01,0x06,0x06,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt index d4552dceb7778..c60d4850279ef 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt @@ -13383,75 +13383,6 @@ # CHECK: s_abs_i32 s5, 0x3f717273 ; encoding: [0xff,0x30,0x85,0xbe,0x73,0x72,0x71,0x3f] 0xff,0x30,0x85,0xbe,0x73,0x72,0x71,0x3f -# CHECK: s_mov_fed_b32 s5, s1 ; encoding: [0x01,0x31,0x85,0xbe] -0x01,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s101, s1 ; encoding: [0x01,0x31,0xe5,0xbe] -0x01,0x31,0xe5,0xbe - -# CHECK: s_mov_fed_b32 flat_scratch_lo, s1 ; encoding: [0x01,0x31,0xe6,0xbe] -0x01,0x31,0xe6,0xbe - -# CHECK: s_mov_fed_b32 flat_scratch_hi, s1 ; encoding: [0x01,0x31,0xe7,0xbe] -0x01,0x31,0xe7,0xbe - -# CHECK: s_mov_fed_b32 vcc_lo, s1 ; encoding: [0x01,0x31,0xea,0xbe] -0x01,0x31,0xea,0xbe - -# CHECK: s_mov_fed_b32 vcc_hi, s1 ; encoding: [0x01,0x31,0xeb,0xbe] -0x01,0x31,0xeb,0xbe - -# CHECK: s_mov_fed_b32 m0, s1 ; encoding: [0x01,0x31,0xfc,0xbe] -0x01,0x31,0xfc,0xbe - -# CHECK: s_mov_fed_b32 exec_lo, s1 ; encoding: [0x01,0x31,0xfe,0xbe] -0x01,0x31,0xfe,0xbe - -# CHECK: s_mov_fed_b32 exec_hi, s1 ; encoding: [0x01,0x31,0xff,0xbe] -0x01,0x31,0xff,0xbe - -# CHECK: s_mov_fed_b32 s5, s101 ; encoding: [0x65,0x31,0x85,0xbe] -0x65,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, flat_scratch_lo ; encoding: [0x66,0x31,0x85,0xbe] -0x66,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, flat_scratch_hi ; encoding: [0x67,0x31,0x85,0xbe] -0x67,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, vcc_lo ; encoding: [0x6a,0x31,0x85,0xbe] -0x6a,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, vcc_hi ; encoding: [0x6b,0x31,0x85,0xbe] -0x6b,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, m0 ; encoding: [0x7c,0x31,0x85,0xbe] -0x7c,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, exec_lo ; encoding: [0x7e,0x31,0x85,0xbe] -0x7e,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, exec_hi ; encoding: [0x7f,0x31,0x85,0xbe] -0x7f,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, 0 ; encoding: [0x80,0x31,0x85,0xbe] -0x80,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, -1 ; encoding: [0xc1,0x31,0x85,0xbe] -0xc1,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, 0.5 ; encoding: [0xf0,0x31,0x85,0xbe] -0xf0,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, -4.0 ; encoding: [0xf7,0x31,0x85,0xbe] -0xf7,0x31,0x85,0xbe - -# CHECK: s_mov_fed_b32 s5, 0xaf123456 ; encoding: [0xff,0x31,0x85,0xbe,0x56,0x34,0x12,0xaf] -0xff,0x31,0x85,0xbe,0x56,0x34,0x12,0xaf - -# CHECK: s_mov_fed_b32 s5, 0x3f717273 ; encoding: [0xff,0x31,0x85,0xbe,0x73,0x72,0x71,0x3f] -0xff,0x31,0x85,0xbe,0x73,0x72,0x71,0x3f - # CHECK: s_set_gpr_idx_idx s1 ; encoding: [0x01,0x32,0x80,0xbe] 0x01,0x32,0x80,0xbe @@ -21651,108 +21582,6 @@ # CHECK: v_cvt_i32_f32_e64 v5, v1 clamp ; encoding: [0x05,0x80,0x48,0xd1,0x01,0x01,0x00,0x00] 0x05,0x80,0x48,0xd1,0x01,0x01,0x00,0x00 -# CHECK: v_mov_fed_b32_e32 v5, v1 ; encoding: [0x01,0x13,0x0a,0x7e] -0x01,0x13,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v255, v1 ; encoding: [0x01,0x13,0xfe,0x7f] -0x01,0x13,0xfe,0x7f - -# CHECK: v_mov_fed_b32_e32 v5, v255 ; encoding: [0xff,0x13,0x0a,0x7e] -0xff,0x13,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, s1 ; encoding: [0x01,0x12,0x0a,0x7e] -0x01,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, s101 ; encoding: [0x65,0x12,0x0a,0x7e] -0x65,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, flat_scratch_lo ; encoding: [0x66,0x12,0x0a,0x7e] -0x66,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, flat_scratch_hi ; encoding: [0x67,0x12,0x0a,0x7e] -0x67,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x12,0x0a,0x7e] -0x6a,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x12,0x0a,0x7e] -0x6b,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, m0 ; encoding: [0x7c,0x12,0x0a,0x7e] -0x7c,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, exec_lo ; encoding: [0x7e,0x12,0x0a,0x7e] -0x7e,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, exec_hi ; encoding: [0x7f,0x12,0x0a,0x7e] -0x7f,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, 0 ; encoding: [0x80,0x12,0x0a,0x7e] -0x80,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, -1 ; encoding: [0xc1,0x12,0x0a,0x7e] -0xc1,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, 0.5 ; encoding: [0xf0,0x12,0x0a,0x7e] -0xf0,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, -4.0 ; encoding: [0xf7,0x12,0x0a,0x7e] -0xf7,0x12,0x0a,0x7e - -# CHECK: v_mov_fed_b32_e32 v5, 0xaf123456 ; encoding: [0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf] -0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf - -# CHECK: v_mov_fed_b32_e32 v5, 0x3f717273 ; encoding: [0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f] -0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f - -# CHECK: v_mov_fed_b32_e64 v5, v1 ; encoding: [0x05,0x00,0x49,0xd1,0x01,0x01,0x00,0x00] -0x05,0x00,0x49,0xd1,0x01,0x01,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v255, v1 ; encoding: [0xff,0x00,0x49,0xd1,0x01,0x01,0x00,0x00] -0xff,0x00,0x49,0xd1,0x01,0x01,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, v255 ; encoding: [0x05,0x00,0x49,0xd1,0xff,0x01,0x00,0x00] -0x05,0x00,0x49,0xd1,0xff,0x01,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, s1 ; encoding: [0x05,0x00,0x49,0xd1,0x01,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x01,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, s101 ; encoding: [0x05,0x00,0x49,0xd1,0x65,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x65,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, flat_scratch_lo ; encoding: [0x05,0x00,0x49,0xd1,0x66,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x66,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, flat_scratch_hi ; encoding: [0x05,0x00,0x49,0xd1,0x67,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x67,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x49,0xd1,0x6a,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x6a,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x49,0xd1,0x6b,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x6b,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, m0 ; encoding: [0x05,0x00,0x49,0xd1,0x7c,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x7c,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x49,0xd1,0x7e,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x7e,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x49,0xd1,0x7f,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x7f,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, 0 ; encoding: [0x05,0x00,0x49,0xd1,0x80,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0x80,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, -1 ; encoding: [0x05,0x00,0x49,0xd1,0xc1,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0xc1,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x49,0xd1,0xf0,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0xf0,0x00,0x00,0x00 - -# CHECK: v_mov_fed_b32_e64 v5, -4.0 ; encoding: [0x05,0x00,0x49,0xd1,0xf7,0x00,0x00,0x00] -0x05,0x00,0x49,0xd1,0xf7,0x00,0x00,0x00 - # CHECK: v_cvt_f16_f32_e32 v5, v1 ; encoding: [0x01,0x15,0x0a,0x7e] 0x01,0x15,0x0a,0x7e @@ -83355,174 +83184,6 @@ # CHECK: v_cvt_i32_f32_dpp v5, |v1| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x10,0x0a,0x7e,0x01,0xe4,0x20,0x00] 0xfa,0x10,0x0a,0x7e,0x01,0xe4,0x20,0x00 -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00] -0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v255 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, s1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x86,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, s101 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x65,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x65,0x06,0x86,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, flat_scratch_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x66,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x66,0x06,0x86,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, flat_scratch_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x67,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x67,0x06,0x86,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x6a,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x6a,0x06,0x86,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, vcc_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x6b,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x6b,0x06,0x86,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, m0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x7c,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x7c,0x06,0x86,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, exec_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x7e,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x7e,0x06,0x86,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, exec_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x7f,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x7f,0x06,0x86,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, 0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x80,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0x80,0x06,0x86,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, -1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0xc1,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0xc1,0x06,0x86,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, 0.5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0xf0,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0xf0,0x06,0x86,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, -4.0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0xf7,0x06,0x86,0x00] -0xf9,0x12,0x0a,0x7e,0xf7,0x06,0x86,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00 - -# CHECK: v_mov_fed_b32_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00] -0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00 - -# CHECK: v_mov_fed_b32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00] -0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00] -0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_half_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_bcast:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x42,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x42,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_bcast:31 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x43,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x43,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 wave_shl:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x30,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x30,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 wave_rol:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x34,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x34,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 wave_shr:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x38,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x38,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 wave_ror:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x3c,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x3c,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_shl:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_shr:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_shr:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_ror:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03 - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f - -# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00] -0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00 - # CHECK: v_cvt_f16_f32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x14,0x0a,0x7e,0x01,0x06,0x06,0x00] 0xf9,0x14,0x0a,0x7e,0x01,0x06,0x06,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/mtbuf_gfx10.txt b/llvm/test/MC/Disassembler/AMDGPU/mtbuf_gfx10.txt index 86fcdfc3d6b17..10d92d9990dc0 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/mtbuf_gfx10.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/mtbuf_gfx10.txt @@ -67,3 +67,20 @@ # GFX10: tbuffer_store_format_xy v[0:1], v2, s[0:3], format:33, 0 idxen 0x00,0x20,0x0d,0xe9,0x02,0x00,0x00,0x80 +# GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:127, 0 idxen ; encoding: [0x00,0x20,0xfc,0xeb,0x01,0x00,0x00,0x80] +0x00,0x20,0xfc,0xeb,0x01,0x00,0x00,0x80 + +# GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:0, 0 idxen ; encoding: [0x00,0x20,0x04,0xe8,0x01,0x00,0x00,0x80] +0x00,0x20,0x04,0xe8,0x01,0x00,0x00,0x80 + +# GFX10: tbuffer_load_format_d16_x v0, off, s[0:3], format:0, s0 ; encoding: [0x00,0x00,0x00,0xe8,0x00,0x00,0x20,0x00] +0x00,0x00,0x00,0xe8,0x00,0x00,0x20,0x00 + +# GFX10: tbuffer_store_format_x v0, v1, s[0:3], 0 idxen ; encoding: [0x00,0x20,0x0c,0xe8,0x01,0x00,0x00,0x80] +0x00,0x20,0x0c,0xe8,0x01,0x00,0x00,0x80 + +# GFX10: tbuffer_load_format_d16_x v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0x08,0xe8,0x00,0x00,0x20,0x00] +0x00,0x00,0x08,0xe8,0x00,0x00,0x20,0x00 + +# GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:2, s0 idxen ; encoding: [0x00,0x20,0x14,0xe8,0x01,0x00,0x00,0x00] +0x00,0x20,0x14,0xe8,0x01,0x00,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/mtbuf_vi.txt b/llvm/test/MC/Disassembler/AMDGPU/mtbuf_vi.txt index 519e03ede69eb..35f9d3bfd18f4 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/mtbuf_vi.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/mtbuf_vi.txt @@ -20,3 +20,21 @@ # VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:2, ttmp1 ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x01,0x1d,0x71] 0x00 0x80 0x7b 0xe9 0x00 0x01 0x1d 0x71 + +# VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:0, nfmt:2, ttmp1 ; encoding: [0x00,0x80,0x03,0xe9,0x00,0x01,0x1d,0x71] +0x00,0x80,0x03,0xe9,0x00,0x01,0x1d,0x71 + +# VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:0, ttmp1 ; encoding: [0x00,0x80,0x7b,0xe8,0x00,0x01,0x1d,0x71] +0x00,0x80,0x7b,0xe8,0x00,0x01,0x1d,0x71 + +# VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x80,0x0b,0xe8,0x00,0x01,0x1d,0x71] +0x00,0x80,0x0b,0xe8,0x00,0x01,0x1d,0x71 + +# VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:0, nfmt:0, ttmp1 ; encoding: [0x00,0x80,0x03,0xe8,0x00,0x01,0x1d,0x71] +0x00,0x80,0x03,0xe8,0x00,0x01,0x1d,0x71 + +# VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:1, nfmt:1, ttmp1 ; encoding: [0x00,0x80,0x8b,0xe8,0x00,0x01,0x1d,0x71] +0x00,0x80,0x8b,0xe8,0x00,0x01,0x1d,0x71 + +# VI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:7, ttmp1 ; encoding: [0x00,0x80,0xfb,0xeb,0x00,0x01,0x1d,0x71] +0x00,0x80,0xfb,0xeb,0x00,0x01,0x1d,0x71 diff --git a/llvm/test/MC/Disassembler/ARM/neon-tests.txt b/llvm/test/MC/Disassembler/ARM/neon-tests.txt index 65e9954ac68b5..515697b74705b 100644 --- a/llvm/test/MC/Disassembler/ARM/neon-tests.txt +++ b/llvm/test/MC/Disassembler/ARM/neon-tests.txt @@ -3,6 +3,9 @@ # CHECK: vbif q15, q7, q0 0x50 0xe1 0x7e 0xf3 +# CHECK: vbit q15, q7, q0 +0x50 0xe1 0x6e 0xf3 + # CHECK: vcvt.f32.s32 q15, q0, #1 0x50 0xee 0xff 0xf2 diff --git a/llvm/test/MC/Disassembler/ARM/neon.txt b/llvm/test/MC/Disassembler/ARM/neon.txt index cd5f418b56c0b..134a535452870 100644 --- a/llvm/test/MC/Disassembler/ARM/neon.txt +++ b/llvm/test/MC/Disassembler/ARM/neon.txt @@ -326,6 +326,15 @@ 0xf2 0x01 0x54 0xf3 # CHECK: vbsl q8, q10, q9 +0xb0 0x21 0x61 0xf3 +# CHECK: vbit d18, d17, d16 +0xf2 0x01 0x64 0xf3 +# CHECK: vbit q8, q10, q9 + +0xb0 0x21 0x71 0xf3 +# CHECK: vbif d18, d17, d16 +0xf2 0x01 0x74 0xf3 +# CHECK: vbif q8, q10, q9 # CHECK: vceq.i8 d16, d16, d17 # CHECK: vceq.i16 d16, d16, d17 diff --git a/llvm/test/MC/Disassembler/ARM/neont2.txt b/llvm/test/MC/Disassembler/ARM/neont2.txt index 536095f6a37b3..fb80967af1aa0 100644 --- a/llvm/test/MC/Disassembler/ARM/neont2.txt +++ b/llvm/test/MC/Disassembler/ARM/neont2.txt @@ -320,6 +320,16 @@ 0x54 0xff 0xf2 0x01 # CHECK: vbsl q8, q10, q9 +0x61 0xff 0xb0 0x21 +# CHECK: vbit d18, d17, d16 +0x64 0xff 0xf2 0x01 +# CHECK: vbit q8, q10, q9 + +0x71 0xff 0xb0 0x21 +# CHECK: vbif d18, d17, d16 +0x74 0xff 0xf2 0x01 +# CHECK: vbif q8, q10, q9 + 0xfb 0xff 0x20 0x07 # CHECK: vcvt.s32.f32 d16, d16 0xfb 0xff 0xa0 0x07 diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt index f8d310fa7e147..15ea5e9f1138e 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt @@ -302,3 +302,85 @@ # CHECK: stxvrdx 35, 3, 1 0x7c 0x63 0x09 0xdb + +# CHECK: vmulesd 1, 2, 3 +0x10 0x22 0x1b 0xc8 + +# CHECK: vmulosd 1, 2, 3 +0x10 0x22 0x19 0xc8 + +# CHECK: vmuleud 1, 2, 3 +0x10 0x22 0x1a 0xc8 + +# CHECK: vmuloud 1, 2, 3 +0x10 0x22 0x18 0xc8 + +# CHECK: vmsumcud 1, 2, 3, 4 +0x10 0x22 0x19 0x17 + +# CHECK: vdivsq 3, 4, 5 +0x10 0x64 0x29 0x0b + +# CHECK: vdivuq 3, 4, 5 +0x10 0x64 0x28 0x0b + +# CHECK: vdivesq 3, 4, 5 +0x10 0x64 0x2b 0x0b + +# CHECK: vdiveuq 3, 4, 5 +0x10 0x64 0x2a 0x0b + +# CHECK: vcmpequq 4, 5, 6 +0x10 0x85 0x31 0xc7 + +# CHECK: vcmpequq. 4, 5, 6 +0x10 0x85 0x35 0xc7 + +# CHECK: vcmpgtsq 4, 5, 6 +0x10 0x85 0x33 0x87 + +# CHECK: vcmpgtsq. 4, 5, 6 +0x10 0x85 0x37 0x87 + +# CHECK: vcmpgtuq 4, 5, 6 +0x10 0x85 0x32 0x87 + +# CHECK: vcmpgtuq. 4, 5, 6 +0x10 0x85 0x36 0x87 + +# CHECK: vmoduq 3, 4, 5 +0x10 0x64 0x2e 0x0b + +# CHECK: vextsd2q 20, 25 +0x12 0x9b 0xce 0x02 + +# CHECK: vrlq 4, 5, 6 +0x10 0x85 0x30 0x05 + +# CHECK: vrlqnm 4, 5, 6 +0x10 0x85 0x31 0x45 + +# CHECK: vrlqmi 4, 5, 6 +0x10 0x85 0x30 0x45 + +# CHECK: vslq 4, 5, 6 +0x10 0x85 0x31 0x05 + +# CHECK: vsrq 4, 5, 6 +0x10 0x85 0x32 0x05 + +# CHECK: vsraq 4, 5, 6 +0x10 0x85 0x33 0x05 + +# CHECK: xscvqpuqz 8, 28 +0xfd 0x00 0xe6 0x88 + +# CHECK: xscvqpsqz 8, 28 +0xfd 0x08 0xe6 0x88 + +# CHECK: xscvuqqp 8, 28 +0xfd 0x03 0xe6 0x88 + +# CHECK: xscvsqqp 8, 28 +0xfd 0xb 0xe6 0x88 + diff --git a/llvm/test/MC/ELF/reloc-directive.s b/llvm/test/MC/ELF/reloc-directive.s new file mode 100644 index 0000000000000..59d7ace40d7e0 --- /dev/null +++ b/llvm/test/MC/ELF/reloc-directive.s @@ -0,0 +1,61 @@ +## Target specific relocation support is tested in MC/$target/*reloc-directive*.s +# RUN: llvm-mc -triple=x86_64 %s | FileCheck %s --check-prefix=ASM +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-readobj -r %t | FileCheck %s + +# ASM: .Ltmp0: +# ASM-NEXT: .reloc (.Ltmp0+3)-2, R_X86_64_NONE, foo +# ASM-NEXT: .Ltmp1: +# ASM-NEXT: .reloc .Ltmp1-1, R_X86_64_NONE, foo +# ASM-NEXT: .Ltmp2: +# ASM-NEXT: .reloc 2+.Ltmp2, R_X86_64_NONE, foo +# ASM-NEXT: .reloc (1+foo)+3, R_X86_64_NONE, data+1 + +# CHECK: 0x2 R_X86_64_NONE foo 0x0 +# CHECK-NEXT: 0x0 R_X86_64_NONE foo 0x0 +# CHECK-NEXT: 0x3 R_X86_64_NONE foo 0x0 +# CHECK-NEXT: 0x4 R_X86_64_NONE data 0x1 + +.text +.globl foo +foo: + ret + .reloc .+3-2, R_X86_64_NONE, foo + .reloc .-1, R_X86_64_NONE, foo + .reloc 2+., R_X86_64_NONE, foo + .reloc 1+foo+3, R_X86_64_NONE, data+1 + +.data +.globl data +data: + .long 0 + +# RUN: not llvm-mc -filetype=obj -triple=x86_64 --defsym=ERR=1 %s 2>&1 | FileCheck %s --check-prefix=ERR + +.ifdef ERR +.text +.globl a, b +a: ret +b: ret +x: ret +y: ret + +# ERR: {{.*}}.s:[[#@LINE+1]]:10: error: expected comma +.reloc 0 R_X86_64_NONE, a + +# ERR: {{.*}}.s:[[#@LINE+1]]:8: error: .reloc offset is negative +.reloc -1, R_X86_64_NONE, a +# ERR: {{.*}}.s:[[#@LINE+1]]:8: error: .reloc offset is not relocatable +.reloc 2*., R_X86_64_NONE, a +# ERR: {{.*}}.s:[[#@LINE+1]]:8: error: .reloc offset is not relocatable +.reloc a+a, R_X86_64_NONE, a +## GNU as accepts a-a but rejects b-a. +# ERR: {{.*}}.s:[[#@LINE+1]]:8: error: .reloc offset is not representable +.reloc a-a, R_X86_64_NONE, a +## TODO GNU as accepts x-x and y-x. +# ERR: {{.*}}.s:[[#@LINE+1]]:8: error: .reloc offset is not representable +.reloc x-x, R_X86_64_NONE, a + +# ERR: {{.*}}.s:[[#@LINE+1]]:8: error: directional label undefined +.reloc 1f, R_X86_64_NONE, a +.endif diff --git a/llvm/test/MC/Mips/eh-frame.s b/llvm/test/MC/Mips/eh-frame.s index 5be0d709a896c..024b9e6ac4889 100644 --- a/llvm/test/MC/Mips/eh-frame.s +++ b/llvm/test/MC/Mips/eh-frame.s @@ -68,7 +68,8 @@ func: // DWARF32-EMPTY: // DWARF32-NEXT: DW_CFA_def_cfa_register: reg29 // -// DWARF32: 00000014 00000010 00000018 FDE cie=00000000 pc=00000000...00000000 +// DWARF32_ABS: 00000014 00000010 00000018 FDE cie=00000000 pc=00000000...00000000 +// DWARF32_PIC: 00000014 00000010 00000018 FDE cie=00000000 pc=0000001c...0000001c // DWARF32-NEXT: Format: DWARF32 // DWARF32-NEXT: DW_CFA_nop: // DWARF32-NEXT: DW_CFA_nop: diff --git a/llvm/test/MC/Mips/reloc-directive-bad.s b/llvm/test/MC/Mips/reloc-directive-bad.s index 929643b914afa..bb056b752fb9f 100644 --- a/llvm/test/MC/Mips/reloc-directive-bad.s +++ b/llvm/test/MC/Mips/reloc-directive-bad.s @@ -2,12 +2,6 @@ # RUN: -target-abi=o32 2>&1 | FileCheck %s .text foo: - .reloc foo+4, R_MIPS_32, .text # CHECK: :[[@LINE]]:9: error: expected non-negative number or a label - .reloc foo+foo, R_MIPS_32, .text # CHECK: :[[@LINE]]:9: error: expected non-negative number or a label .reloc 0, R_MIPS_32, .text+.text # CHECK: :[[@LINE]]:23: error: expression must be relocatable - .reloc 0 R_MIPS_32, .text # CHECK: :[[@LINE]]:11: error: expected comma .reloc 0, 0, R_MIPS_32, .text # CHECK: :[[@LINE]]:12: error: expected relocation name - .reloc -1, R_MIPS_32, .text # CHECK: :[[@LINE]]:9: error: expression is negative - .reloc 1b, R_MIPS_32, .text # CHECK: :[[@LINE]]:9: error: directional label undefined - .reloc 1f, R_MIPS_32, .text # CHECK: :[[@LINE]]:9: error: directional label undefined nop diff --git a/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s b/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s index 5ed6b14d38aea..b144493eb2a9e 100644 --- a/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s +++ b/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s @@ -429,3 +429,84 @@ # CHECK-BE: stxvrdx 35, 3, 1 # encoding: [0x7c,0x63,0x09,0xdb] # CHECK-LE: stxvrdx 35, 3, 1 # encoding: [0xdb,0x09,0x63,0x7c] stxvrdx 35, 3, 1 +# CHECK-BE: vmulesd 1, 2, 3 # encoding: [0x10,0x22,0x1b,0xc8] +# CHECK-LE: vmulesd 1, 2, 3 # encoding: [0xc8,0x1b,0x22,0x10] + vmulesd 1, 2, 3 +# CHECK-BE: vmulosd 1, 2, 3 # encoding: [0x10,0x22,0x19,0xc8] +# CHECK-LE: vmulosd 1, 2, 3 # encoding: [0xc8,0x19,0x22,0x10] + vmulosd 1, 2, 3 +# CHECK-BE: vmuleud 1, 2, 3 # encoding: [0x10,0x22,0x1a,0xc8] +# CHECK-LE: vmuleud 1, 2, 3 # encoding: [0xc8,0x1a,0x22,0x10] + vmuleud 1, 2, 3 +# CHECK-BE: vmuloud 1, 2, 3 # encoding: [0x10,0x22,0x18,0xc8] +# CHECK-LE: vmuloud 1, 2, 3 # encoding: [0xc8,0x18,0x22,0x10] + vmuloud 1, 2, 3 +# CHECK-BE: vmsumcud 1, 2, 3, 4 # encoding: [0x10,0x22,0x19,0x17] +# CHECK-LE: vmsumcud 1, 2, 3, 4 # encoding: [0x17,0x19,0x22,0x10] + vmsumcud 1, 2, 3, 4 +# CHECK-BE: vdivsq 3, 4, 5 # encoding: [0x10,0x64,0x29,0x0b] +# CHECK-LE: vdivsq 3, 4, 5 # encoding: [0x0b,0x29,0x64,0x10] + vdivsq 3, 4, 5 +# CHECK-BE: vdivuq 3, 4, 5 # encoding: [0x10,0x64,0x28,0x0b] +# CHECK-LE: vdivuq 3, 4, 5 # encoding: [0x0b,0x28,0x64,0x10] + vdivuq 3, 4, 5 +# CHECK-BE: vdivesq 3, 4, 5 # encoding: [0x10,0x64,0x2b,0x0b] +# CHECK-LE: vdivesq 3, 4, 5 # encoding: [0x0b,0x2b,0x64,0x10] + vdivesq 3, 4, 5 +# CHECK-BE: vdiveuq 3, 4, 5 # encoding: [0x10,0x64,0x2a,0x0b] +# CHECK-LE: vdiveuq 3, 4, 5 # encoding: [0x0b,0x2a,0x64,0x10] + vdiveuq 3, 4, 5 +# CHECK-BE: vcmpequq 4, 5, 6 # encoding: [0x10,0x85,0x31,0xc7] +# CHECK-LE: vcmpequq 4, 5, 6 # encoding: [0xc7,0x31,0x85,0x10] + vcmpequq 4, 5, 6 +# CHECK-BE: vcmpequq. 4, 5, 6 # encoding: [0x10,0x85,0x35,0xc7] +# CHECK-LE: vcmpequq. 4, 5, 6 # encoding: [0xc7,0x35,0x85,0x10] + vcmpequq. 4, 5, 6 +# CHECK-BE: vcmpgtsq 4, 5, 6 # encoding: [0x10,0x85,0x33,0x87] +# CHECK-LE: vcmpgtsq 4, 5, 6 # encoding: [0x87,0x33,0x85,0x10] + vcmpgtsq 4, 5, 6 +# CHECK-BE: vcmpgtsq. 4, 5, 6 # encoding: [0x10,0x85,0x37,0x87] +# CHECK-LE: vcmpgtsq. 4, 5, 6 # encoding: [0x87,0x37,0x85,0x10] + vcmpgtsq. 4, 5, 6 +# CHECK-BE: vcmpgtuq 4, 5, 6 # encoding: [0x10,0x85,0x32,0x87] +# CHECK-LE: vcmpgtuq 4, 5, 6 # encoding: [0x87,0x32,0x85,0x10] + vcmpgtuq 4, 5, 6 +# CHECK-BE: vcmpgtuq. 4, 5, 6 # encoding: [0x10,0x85,0x36,0x87] +# CHECK-LE: vcmpgtuq. 4, 5, 6 # encoding: [0x87,0x36,0x85,0x10] + vcmpgtuq. 4, 5, 6 +# CHECK-BE: vmoduq 3, 4, 5 # encoding: [0x10,0x64,0x2e,0x0b] +# CHECK-LE: vmoduq 3, 4, 5 # encoding: [0x0b,0x2e,0x64,0x10] + vmoduq 3, 4, 5 +# CHECK-BE: vextsd2q 20, 25 # encoding: [0x12,0x9b,0xce,0x02] +# CHECK-LE: vextsd2q 20, 25 # encoding: [0x02,0xce,0x9b,0x12] + vextsd2q 20, 25 +# CHECK-BE: vrlq 4, 5, 6 # encoding: [0x10,0x85,0x30,0x05] +# CHECK-LE: vrlq 4, 5, 6 # encoding: [0x05,0x30,0x85,0x10] + vrlq 4, 5, 6 +# CHECK-BE: vrlqnm 4, 5, 6 # encoding: [0x10,0x85,0x31,0x45] +# CHECK-LE: vrlqnm 4, 5, 6 # encoding: [0x45,0x31,0x85,0x10] + vrlqnm 4, 5, 6 +# CHECK-BE: vrlqmi 4, 5, 6 # encoding: [0x10,0x85,0x30,0x45] +# CHECK-LE: vrlqmi 4, 5, 6 # encoding: [0x45,0x30,0x85,0x10] + vrlqmi 4, 5, 6 +# CHECK-BE: vslq 4, 5, 6 # encoding: [0x10,0x85,0x31,0x05] +# CHECK-LE: vslq 4, 5, 6 # encoding: [0x05,0x31,0x85,0x10] + vslq 4, 5, 6 +# CHECK-BE: vsrq 4, 5, 6 # encoding: [0x10,0x85,0x32,0x05] +# CHECK-LE: vsrq 4, 5, 6 # encoding: [0x05,0x32,0x85,0x10] + vsrq 4, 5, 6 +# CHECK-BE: vsraq 4, 5, 6 # encoding: [0x10,0x85,0x33,0x05] +# CHECK-LE: vsraq 4, 5, 6 # encoding: [0x05,0x33,0x85,0x10] + vsraq 4, 5, 6 +# CHECK-BE: xscvqpuqz 8, 28 # encoding: [0xfd,0x00,0xe6,0x88] +# CHECK-LE: xscvqpuqz 8, 28 # encoding: [0x88,0xe6,0x00,0xfd] + xscvqpuqz 8, 28 +# CHECK-BE: xscvqpsqz 8, 28 # encoding: [0xfd,0x08,0xe6,0x88] +# CHECK-LE: xscvqpsqz 8, 28 # encoding: [0x88,0xe6,0x08,0xfd] + xscvqpsqz 8, 28 +# CHECK-BE: xscvuqqp 8, 28 # encoding: [0xfd,0x03,0xe6,0x88] +# CHECK-LE: xscvuqqp 8, 28 # encoding: [0x88,0xe6,0x03,0xfd] + xscvuqqp 8, 28 +# CHECK-BE: xscvsqqp 8, 28 # encoding: [0xfd,0x0b,0xe6,0x88] +# CHECK-LE: xscvsqqp 8, 28 # encoding: [0x88,0xe6,0x0b,0xfd] + xscvsqqp 8, 28 diff --git a/llvm/test/MC/X86/I286-32.s b/llvm/test/MC/X86/I286-32.s index 0d463669f34ad..648de019127f9 100644 --- a/llvm/test/MC/X86/I286-32.s +++ b/llvm/test/MC/X86/I286-32.s @@ -24,7 +24,7 @@ larl 485498096(%edx), %eax // CHECK: encoding: [0x0f,0x02,0x44,0x02,0x40] larl 64(%edx,%eax), %eax -// CHECK: larl %eax, %eax +// CHECK: larl %ax, %eax // CHECK: encoding: [0x0f,0x02,0xc0] larl %eax, %eax @@ -100,7 +100,7 @@ lsll 485498096(%edx), %eax // CHECK: encoding: [0x0f,0x03,0x44,0x02,0x40] lsll 64(%edx,%eax), %eax -// CHECK: lsll %eax, %eax +// CHECK: lsll %ax, %eax // CHECK: encoding: [0x0f,0x03,0xc0] lsll %eax, %eax diff --git a/llvm/test/MC/X86/I286-64.s b/llvm/test/MC/X86/I286-64.s index 73376de978875..7707d7ba4d587 100644 --- a/llvm/test/MC/X86/I286-64.s +++ b/llvm/test/MC/X86/I286-64.s @@ -24,7 +24,7 @@ larl -64(%rdx,%rax,4), %r13d // CHECK: encoding: [0x44,0x0f,0x02,0x6c,0x02,0x40] larl 64(%rdx,%rax), %r13d -// CHECK: larl %r13d, %r13d +// CHECK: larl %r13w, %r13d // CHECK: encoding: [0x45,0x0f,0x02,0xed] larl %r13d, %r13d @@ -32,6 +32,14 @@ larl %r13d, %r13d // CHECK: encoding: [0x44,0x0f,0x02,0x2a] larl (%rdx), %r13d +// CHECK: larq %ax, %rax +// CHECK: encoding: [0x48,0x0f,0x02,0xc0] +lar %ax, %rax + +// CHECK: larq %ax, %rax +// CHECK: encoding: [0x48,0x0f,0x02,0xc0] +lar %rax, %rax + // CHECK: lgdtq 485498096 // CHECK: encoding: [0x0f,0x01,0x14,0x25,0xf0,0x1c,0xf0,0x1c] lgdtq 485498096 @@ -156,7 +164,7 @@ lsll -64(%rdx,%rax,4), %r13d // CHECK: encoding: [0x44,0x0f,0x03,0x6c,0x02,0x40] lsll 64(%rdx,%rax), %r13d -// CHECK: lsll %r13d, %r13d +// CHECK: lsll %r13w, %r13d // CHECK: encoding: [0x45,0x0f,0x03,0xed] lsll %r13d, %r13d @@ -164,6 +172,14 @@ lsll %r13d, %r13d // CHECK: encoding: [0x44,0x0f,0x03,0x2a] lsll (%rdx), %r13d +// CHECK: lslq %ax, %rax +// CHECK: encoding: [0x48,0x0f,0x03,0xc0] +lsl %ax, %rax + +// CHECK: lslq %ax, %rax +// CHECK: encoding: [0x48,0x0f,0x03,0xc0] +lsl %rax, %rax + // CHECK: ltrw 485498096 // CHECK: encoding: [0x0f,0x00,0x1c,0x25,0xf0,0x1c,0xf0,0x1c] ltrw 485498096 diff --git a/llvm/test/Object/Inputs/invalid-phdr.elf b/llvm/test/Object/Inputs/invalid-phdr.elf deleted file mode 100644 index 8a5cc53cc94bd..0000000000000 Binary files a/llvm/test/Object/Inputs/invalid-phdr.elf and /dev/null differ diff --git a/llvm/test/Object/elf-invalid-phdr.test b/llvm/test/Object/elf-invalid-phdr.test deleted file mode 100644 index 1b47f8d66cc41..0000000000000 --- a/llvm/test/Object/elf-invalid-phdr.test +++ /dev/null @@ -1,26 +0,0 @@ -# invalid-phdr.elf is generated by creating a simple elf file with yaml2obj: -# !ELF -# FileHeader: -# Class: ELFCLASS64 -# Data: ELFDATA2LSB -# Type: ET_EXEC -# Machine: EM_X86_64 -# Sections: -# - Name: .text -# Type: SHT_PROGBITS -# Flags: [ SHF_ALLOC, SHF_EXECINSTR ] -# AddressAlign: 0x0000000000001000 -# Content: "00000000" -# ProgramHeaders: -# - Type: PT_LOAD -# Flags: [ PF_X, PF_R ] -# VAddr: 0xAAAA1000 -# PAddr: 0xFFFF1000 -# Sections: -# - Section: .text -# -# Then editing the e_phoff in with a hexeditor to set it to 0xffffff -RUN: not --crash llvm-objdump --private-headers %p/Inputs/invalid-phdr.elf 2>&1 \ -RUN: | FileCheck %s - -CHECK: LLVM ERROR: program headers are longer than binary of size 4162: e_phoff = 0xffffff, e_phnum = 1, e_phentsize = 56 diff --git a/llvm/test/Object/invalid.test b/llvm/test/Object/invalid.test index 3a7ddab8d043a..a930a6a9dbe68 100644 --- a/llvm/test/Object/invalid.test +++ b/llvm/test/Object/invalid.test @@ -181,11 +181,11 @@ Sections: --- !ELF FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - SHEntSize: 1 + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + EShEntSize: 1 ## Check that llvm-readobj reports a warning if .symtab has sh_size ## that is not a multiple of sh_entsize. @@ -228,12 +228,12 @@ Sections: Link: .symtab Symbols: [] -## Check that llvm-readobj reports an error if the e_phentsize field is broken. +## Check that llvm-readobj reports a warning when the e_phentsize field is broken. -# RUN: not llvm-readobj --program-headers %p/Inputs/invalid-e_shnum.elf 2>&1 | \ +# RUN: llvm-readobj --program-headers %p/Inputs/invalid-e_shnum.elf 2>&1 | \ # RUN: FileCheck -DFILE=%p/Inputs/invalid-e_shnum.elf --check-prefix=INVALID-PH-ENTSIZE %s -# INVALID-PH-ENTSIZE: error: '[[FILE]]': invalid e_phentsize: 12336 +# INVALID-PH-ENTSIZE: warning: '[[FILE]]': unable to dump program headers: invalid e_phentsize: 12336 ## Check that llvm-readobj reports a warning when we have no SHT_SYMTAB_SHNDX section, ## but have a symbol referencing it. @@ -306,11 +306,11 @@ Symbols: [] --- !ELF FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - SHNum: 0xFF + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + EShNum: 0xFF ## Check llvm-readobj does not crash on a truncated ELF. @@ -409,10 +409,10 @@ DynamicSymbols: ## ELF header contains e_phentsize field with a value != sizeof(Elf_Phdr). ## Check llvm-readobj reports it. -# RUN: not llvm-readobj -l %p/Inputs/corrupt-invalid-phentsize.elf.x86-64 2>&1 \ +# RUN: llvm-readobj -l %p/Inputs/corrupt-invalid-phentsize.elf.x86-64 2>&1 \ # RUN: | FileCheck -DFILE=%p/Inputs/corrupt-invalid-phentsize.elf.x86-64 --check-prefix=PHENTSIZE %s -# PHENTSIZE: error: '[[FILE]]': invalid e_phentsize: 57 +# PHENTSIZE: warning: '[[FILE]]': unable to read program headers to locate the PT_DYNAMIC segment: invalid e_phentsize: 57 ## The dynamic table contains DT_STRTAB with a value that is not in any loadable segment. ## Check llvm-readobj reports it. @@ -521,11 +521,11 @@ ProgramHeaders: --- !ELF FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - SHStrNdx: 0xFF + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + EShStrNdx: 0xFF Sections: - Name: .foo Type: SHT_PROGBITS @@ -541,11 +541,11 @@ Sections: --- !ELF FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - SHNum: 0x0 + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + EShNum: 0x0 Sections: - Type: SHT_NULL Size: 288230376151711743 @@ -560,11 +560,11 @@ Sections: --- !ELF FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - SHNum: 0x0 + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + EShNum: 0x0 Sections: - Type: SHT_NULL Size: 288230376151711744 @@ -578,11 +578,11 @@ Sections: --- !ELF FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - SHOff: 0xffffffffffffffff + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + EShOff: 0xffffffffffffffff ## Check that llvm-objdump reports an error when it tries to dump a ## symbol name and .strtab is empty. @@ -641,12 +641,26 @@ DynamicSymbols: --- !ELF FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 ## SHN_XINDEX == 0xffff. - SHStrNdx: 0xffff + EShStrNdx: 0xffff Sections: - Type: SHT_NULL Link: 0xff + +## Check the case when the e_phoff field is invalid. +# RUN: yaml2obj --docnum=31 %s -o %t31 +# RUN: not llvm-objdump --private-headers %t31 2>&1 | FileCheck -DFILE=%t31 %s --check-prefix=INVALID-PHOFF + +# INVALID-PHOFF: error: '[[FILE]]': program headers are longer than binary of size 280: e_phoff = 0xffffff, e_phnum = 0, e_phentsize = 0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 + EPhOff: 0xffffff diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index 29263f633a8da..0283bc8d0a107 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -99,6 +99,7 @@ ; CHECK-O2-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O2-NEXT: Running pass: CrossDSOCFIPass ; CHECK-O2-NEXT: Running pass: LowerTypeTestsPass +; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}SimplifyCFGPass> ; CHECK-O2-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-O2-NEXT: Running pass: GlobalDCEPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll index 4a1a96ce64b60..78f9022c741dc 100644 --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -79,6 +79,7 @@ ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. +; CHECK-POSTLINK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 2dc90ebb75965..cbb6d8ac081cb 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -48,6 +48,7 @@ ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. +; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index f4afe56fd85ad..295a65eb76683 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -59,6 +59,7 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion +; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Other/opt-O0-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O0-pipeline-enable-matrix.ll new file mode 100644 index 0000000000000..5e2d2726eb874 --- /dev/null +++ b/llvm/test/Other/opt-O0-pipeline-enable-matrix.ll @@ -0,0 +1,24 @@ +; RUN: opt -O0 -enable-matrix -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s + +; REQUIRES: asserts + +; CHECK: Pass Arguments: +; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Assumption Cache Tracker +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Module Verifier +; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (pre inlining) +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Lower the matrix intrinsics + + +define void @f() { + ret void +} diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll new file mode 100644 index 0000000000000..a0b7a8f5e1e3d --- /dev/null +++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -0,0 +1,346 @@ +; RUN: opt -O3 -enable-matrix -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s + +; REQUIRES: asserts + +; CHECK-LABEL: Pass Arguments: +; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Type-Based Alias Analysis +; CHECK-NEXT: Scoped NoAlias Alias Analysis +; CHECK-NEXT: Assumption Cache Tracker +; CHECK-NEXT: Target Library Information +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Module Verifier +; CHECK-EXT: Good Bye World Pass +; CHECK-NOEXT-NOT: Good Bye World Pass +; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (pre inlining) +; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: SROA +; CHECK-NEXT: Early CSE +; CHECK-NEXT: Lower 'expect' Intrinsics +; CHECK-NEXT: Pass Arguments: +; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Target Transform Information +; Target Pass Configuration +; CHECK: Type-Based Alias Analysis +; CHECK-NEXT: Scoped NoAlias Alias Analysis +; CHECK-NEXT: Assumption Cache Tracker +; CHECK-NEXT: Profile summary info +; CHECK-NEXT: ModulePass Manager +; CHECK-NEXT: Force set function attributes +; CHECK-NEXT: Infer set function attributes +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Call-site splitting +; CHECK-NEXT: Interprocedural Sparse Conditional Constant Propagation +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Called Value Propagation +; CHECK-NEXT: Global Variable Optimizer +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Block Frequency Analysis +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Promote Memory to Register +; CHECK-NEXT: Dead Argument Elimination +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: CallGraph Construction +; CHECK-NEXT: Globals Alias Analysis +; CHECK-NEXT: Call Graph SCC Pass Manager +; CHECK-NEXT: Remove unused exception handling info +; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: OpenMP specific optimizations +; CHECK-NEXT: Deduce function attributes +; CHECK-NEXT: Promote 'by reference' arguments to scalars +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: SROA +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Early CSE w/ MemorySSA +; CHECK-NEXT: Speculatively execute instructions if target has divergent branches +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Lazy Value Information Analysis +; CHECK-NEXT: Jump Threading +; CHECK-NEXT: Value Propagation +; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Combine pattern based expressions +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Conditionally eliminate dead library calls +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Block Frequency Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: PGOMemOPSize +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Tail Call Elimination +; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: Reassociate expressions +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Canonicalize natural loops +; CHECK-NEXT: LCSSA Verifier +; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Rotate Loops +; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Loop Invariant Code Motion +; CHECK-NEXT: Unswitch loops +; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Canonicalize natural loops +; CHECK-NEXT: LCSSA Verifier +; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Induction Variable Simplification +; CHECK-NEXT: Recognize loop idioms +; CHECK-NEXT: Delete dead loops +; CHECK-NEXT: Unroll loops +; CHECK-NEXT: MergedLoadStoreMotion +; CHECK-NEXT: Phi Values Analysis +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Memory Dependence Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Global Value Numbering +; CHECK-NEXT: Phi Values Analysis +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Memory Dependence Analysis +; CHECK-NEXT: MemCpy Optimization +; CHECK-NEXT: Sparse Conditional Constant Propagation +; CHECK-NEXT: Demanded bits analysis +; CHECK-NEXT: Bit-Tracking Dead Code Elimination +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Lazy Value Information Analysis +; CHECK-NEXT: Jump Threading +; CHECK-NEXT: Value Propagation +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Phi Values Analysis +; CHECK-NEXT: Memory Dependence Analysis +; CHECK-NEXT: Dead Store Elimination +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Canonicalize natural loops +; CHECK-NEXT: LCSSA Verifier +; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Loop Invariant Code Motion +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Aggressive Dead Code Elimination +; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: A No-Op Barrier Pass +; CHECK-NEXT: Eliminate Available Externally Globals +; CHECK-NEXT: CallGraph Construction +; CHECK-NEXT: Deduce function attributes in RPO +; CHECK-NEXT: Global Variable Optimizer +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Block Frequency Analysis +; CHECK-NEXT: Dead Global Elimination +; CHECK-NEXT: CallGraph Construction +; CHECK-NEXT: Globals Alias Analysis +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Float to int +; CHECK-NEXT: Lower constant intrinsics +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Lower the matrix intrinsics +; CHECK-NEXT: Early CSE +; CHECK-NEXT: Canonicalize natural loops +; CHECK-NEXT: LCSSA Verifier +; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Rotate Loops +; CHECK-NEXT: Loop Access Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Loop Distribution +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Block Frequency Analysis +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Loop Access Analysis +; CHECK-NEXT: Demanded bits analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Inject TLI Mappings +; CHECK-NEXT: Loop Vectorization +; CHECK-NEXT: Canonicalize natural loops +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Loop Access Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Loop Load Elimination +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Demanded bits analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Inject TLI Mappings +; CHECK-NEXT: SLP Vectorizer +; CHECK-NEXT: Optimize scalar/vector ops +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Canonicalize natural loops +; CHECK-NEXT: LCSSA Verifier +; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Unroll loops +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Memory SSA +; CHECK-NEXT: Canonicalize natural loops +; CHECK-NEXT: LCSSA Verifier +; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Loop Invariant Code Motion +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Warn about non-applied transformations +; CHECK-NEXT: Alignment from assumptions +; CHECK-NEXT: Strip Unused Function Prototypes +; CHECK-NEXT: Dead Global Elimination +; CHECK-NEXT: Merge Duplicate Global Constants +; CHECK-NEXT: Call Graph Profile +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Block Frequency Analysis +; CHECK-NEXT: Canonicalize natural loops +; CHECK-NEXT: LCSSA Verifier +; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Block Frequency Analysis +; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Loop Sink +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Remove redundant instructions +; CHECK-NEXT: Hoist/decompose integer division and remainder +; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: Module Verifier +; CHECK-NEXT: Bitcode Writer +; CHECK-NEXT: Pass Arguments: +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Pass Arguments: +; CHECK-NEXT: Target Library Information +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Block Frequency Analysis +; CHECK-NEXT: Pass Arguments: +; CHECK-NEXT: Target Library Information +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Block Frequency Analysis + +define void @f() { + ret void +} diff --git a/llvm/test/Other/pass-pipeline-parsing.ll b/llvm/test/Other/pass-pipeline-parsing.ll index 2e8bc7c873025..902bd9b3eabb1 100644 --- a/llvm/test/Other/pass-pipeline-parsing.ll +++ b/llvm/test/Other/pass-pipeline-parsing.ll @@ -173,6 +173,37 @@ ; CHECK-NESTED-FP-LP: Finished llvm::Function pass manager run ; CHECK-NESTED-FP-LP: Finished llvm::Module pass manager run +; RUN: opt -disable-output -debug-pass-manager \ +; RUN: -passes='module(no-op-function,no-op-loop,no-op-cgscc,cgscc(no-op-function,no-op-loop),function(no-op-loop))' %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-ADAPTORS +; CHECK-ADAPTORS: Starting llvm::Module pass manager run +; CHECK-ADAPTORS: Starting llvm::Module pass manager run +; CHECK-ADAPTORS: Running pass: ModuleToFunctionPassAdaptor<{{.*}}NoOpFunctionPass> +; CHECK-ADAPTORS: Running pass: ModuleToFunctionPassAdaptor<{{.*}}FunctionToLoopPassAdaptor<{{.*}}NoOpLoopPass>{{.*}}> +; CHECK-ADAPTORS: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}NoOpCGSCCPass> +; CHECK-ADAPTORS: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> +; CHECK-ADAPTORS: Starting CGSCC pass manager run +; CHECK-ADAPTORS: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}NoOpFunctionPass> +; CHECK-ADAPTORS: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}FunctionToLoopPassAdaptor<{{.*}}NoOpLoopPass>{{.*}}> +; CHECK-ADAPTORS: Finished CGSCC pass manager run +; CHECK-ADAPTORS: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> +; CHECK-ADAPTORS: Starting llvm::Function pass manager run +; CHECK-ADAPTORS: Running pass: FunctionToLoopPassAdaptor<{{.*}}NoOpLoopPass> +; CHECK-ADAPTORS: Finished llvm::Function pass manager run +; CHECK-ADAPTORS: Finished llvm::Module pass manager run +; CHECK-ADAPTORS: Finished llvm::Module pass manager run + +; RUN: opt -disable-output -debug-pass-manager \ +; RUN: -passes='cgscc(print)' %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-PRINT-IN-CGSCC +; CHECK-PRINT-IN-CGSCC: Starting llvm::Module pass manager run +; CHECK-PRINT-IN-CGSCC: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> +; CHECK-PRINT-IN-CGSCC: Starting CGSCC pass manager run +; CHECK-PRINT-IN-CGSCC: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PrintFunctionPass> +; CHECK-PRINT-IN-CGSCC: Finished CGSCC pass manager run +; CHECK-PRINT-IN-CGSCC: Running pass: VerifierPass +; CHECK-PRINT-IN-CGSCC: Finished llvm::Module pass manager run + ; RUN: not opt -disable-output -debug-pass-manager \ ; RUN: -passes='function(no-op-function)function(no-op-function)' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-MISSING-COMMA1 diff --git a/llvm/test/Reduce/remove-args-2.ll b/llvm/test/Reduce/remove-args-2.ll new file mode 100644 index 0000000000000..fddcfc75195cf --- /dev/null +++ b/llvm/test/Reduce/remove-args-2.ll @@ -0,0 +1,20 @@ +; Test that llvm-reduce can remove uninteresting function arguments from function definitions as well as their calls. +; This test checks that functions with different argument types are handled correctly +; +; RUN: llvm-reduce --test %python --test-arg %p/Inputs/remove-args.py %s -o %t +; RUN: cat %t | FileCheck -implicit-check-not=uninteresting %s + +%struct.foo = type { %struct.foo*, i32, i32, i8* } + +define dso_local void @bar() { +entry: + ; CHECK: call void @interesting(%struct.foo* null) + call void @interesting(i32 0, i8* null, %struct.foo* null, i8* null, i64 0) + ret void +} + +; CHECK: define internal void @interesting(%struct.foo* %interesting) { +define internal void @interesting(i32 %uninteresting1, i8* %uninteresting2, %struct.foo* %interesting, i8* %uninteresting3, i64 %uninteresting4) { +entry: + ret void +} diff --git a/llvm/test/TableGen/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter.td index 6eb84925db790..ed7bed3f711f0 100644 --- a/llvm/test/TableGen/GlobalISelEmitter.td +++ b/llvm/test/TableGen/GlobalISelEmitter.td @@ -339,12 +339,10 @@ def : Pat<(select GPR32:$src1, (complex_rr GPR32:$src2a, GPR32:$src2b), // R21C-NEXT: // Label [[PREV_NUM]]: @[[PREV]] // R21C-NEXT: GIM_Try, /*On fail goto*//*Label [[LABEL_NUM:[0-9]+]]*/ [[LABEL:[0-9]+]], // Rule ID 21 // // -// R21O-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_frag, // R21O-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/MyTarget::GPR32RegClassID, // R21O-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/1, /*RC*/MyTarget::GPR32RegClassID, // R21N-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/4, // R21N-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_SELECT, -// R21N-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_frag, // R21N-NEXT: // MIs[0] dst // R21N-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32, // R21N-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/MyTarget::GPR32RegClassID, @@ -354,10 +352,12 @@ def : Pat<(select GPR32:$src1, (complex_rr GPR32:$src2a, GPR32:$src2b), // R21N-NEXT: // MIs[0] src2 // R21N-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32, // +// R21O-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_frag, // R21C-NEXT: GIM_CheckComplexPattern, /*MI*/0, /*Op*/2, /*Renderer*/0, GICP_gi_complex, // R21N-NEXT: // MIs[0] src3 // R21N-NEXT: GIM_CheckType, /*MI*/0, /*Op*/3, /*Type*/GILLT_s32, // R21C-NEXT: GIM_CheckComplexPattern, /*MI*/0, /*Op*/3, /*Renderer*/1, GICP_gi_complex, +// R21N-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_frag, // R21C-NEXT: // (select:{ *:[i32] } GPR32:{ *:[i32] }:$src1, complex:{ *:[i32] }:$src2, complex:{ *:[i32] }:$src3)<> => (INSN2:{ *:[i32] } GPR32:{ *:[i32] }:$src1, complex:{ *:[i32] }:$src3, complex:{ *:[i32] }:$src2) // R21C-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::INSN2, @@ -1120,7 +1120,7 @@ def MUL : I<(outs GPR32:$dst), (ins GPR32:$src2, GPR32:$src1), // NOOPT-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/1, /*RC*/MyTarget::FPR32RegClassID, // NOOPT-NEXT: // (bitconvert:{ *:[i32] } FPR32:{ *:[f32] }:$src1) => (COPY_TO_REGCLASS:{ *:[i32] } FPR32:{ *:[f32] }:$src1, GPR32:{ *:[i32] }) // NOOPT-NEXT: GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/TargetOpcode::COPY, -// NOOPT-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC GPR32*/1, +// NOOPT-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, MyTarget::GPR32RegClassID, // NOOPT-NEXT: // GIR_Coverage, 25, // NOOPT-NEXT: GIR_Done, // NOOPT-NEXT: // Label [[LABEL_NUM]]: @[[LABEL]] diff --git a/llvm/test/TableGen/GlobalISelEmitterCustomPredicate.td b/llvm/test/TableGen/GlobalISelEmitterCustomPredicate.td new file mode 100644 index 0000000000000..d985ef5da9245 --- /dev/null +++ b/llvm/test/TableGen/GlobalISelEmitterCustomPredicate.td @@ -0,0 +1,148 @@ +// RUN: llvm-tblgen %s -gen-global-isel -optimize-match-table=false -I %p/../../include -I %p/Common -o - | FileCheck %s + +include "llvm/Target/Target.td" +include "GlobalISelEmitterCommon.td" + +// Boilerplate code for setting up some registers with subregs. +class MyReg subregs = []> + : Register { + let SubRegs = subregs; +} + +class MyClass types, dag registers> + : RegisterClass<"Test", types, size, registers> { + let Size = size; +} + +def sub0 : SubRegIndex<16>; +def sub1 : SubRegIndex<16, 16>; +def S0 : MyReg<"s0">; +def S1 : MyReg<"s1">; +def SRegs : MyClass<16, [i16], (sequence "S%u", 0, 1)>; + +let SubRegIndices = [sub0, sub1] in { +def D0 : MyReg<"d0", [S0, S1]>; +} + +def DRegs : MyClass<32, [i32], (sequence "D%u", 0, 0)>; +def DOP : RegisterOperand; +def AND_OR : I<(outs DRegs:$dst), (ins DOP:$src0, DOP:$src1, DOP:$src2), []>; + + +def or_oneuse : PatFrag< + (ops node:$x, node:$y), + (or node:$x, node:$y), [{ return foo(); }]> { + let GISelPredicateCode = [{ + return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); + }]; +} + + +// FIXME: GISelPredicateCode ignored if DAG predicate not set. +def and_or_pat : PatFrag< + (ops node:$x, node:$y, node:$z), + (and (or node:$x, node:$y), node:$z), [{ return foo(); }]> { + let GISelPredicateCode = [{ + return doesComplexCheck(MI); + }]; +} + +// CHECK: GIM_Try, /*On fail goto*//*Label 0*/ {{[0-9]+}}, // Rule ID 1 // +// CHECK-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/3, +// CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_AND, +// CHECK-NEXT: // MIs[0] dst +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/Test::DRegsRegClassID, +// CHECK-NEXT: // MIs[0] Operand 1 +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_RecordInsn, /*DefineMI*/1, /*MI*/0, /*OpIdx*/1, // MIs[1] +// CHECK-NEXT: GIM_CheckNumOperands, /*MI*/1, /*Expected*/3, +// CHECK-NEXT: GIM_CheckOpcode, /*MI*/1, TargetOpcode::G_OR, +// CHECK-NEXT: // MIs[1] Operand 0 +// CHECK-NEXT:GIM_CheckType, /*MI*/1, /*Op*/0, /*Type*/GILLT_s32, +// CHECK-NEXT: // MIs[1] src0 +// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/1, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/1, /*Op*/1, /*RC*/Test::DRegsRegClassID, +// CHECK-NEXT: // MIs[1] src1 +// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/2, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/1, /*Op*/2, /*RC*/Test::DRegsRegClassID, +// CHECK-NEXT: // MIs[0] src2 +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/2, /*RC*/Test::DRegsRegClassID, +// CHECK-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_and_or_pat, +// CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/1, +// CHECK-NEXT: // (and:{ *:[i32] } (or:{ *:[i32] } DOP:{ *:[i32] }:$src0, DOP:{ *:[i32] }:$src1), DOP:{ *:[i32] }:$src2)<> => (AND_OR:{ *:[i32] } DOP:{ *:[i32] }:$src0, DOP:{ *:[i32] }:$src1, DOP:{ *:[i32] }:$src2) +// CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::AND_OR, + + +// CHECK: GIM_Try, /*On fail goto*//*Label 1*/ {{[0-9]+}}, // Rule ID 2 // +// CHECK-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/3, +// CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_AND, +// CHECK-NEXT: // MIs[0] dst +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/Test::DRegsRegClassID, +// CHECK-NEXT: // MIs[0] src2 +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/1, /*RC*/Test::DRegsRegClassID, +// CHECK-NEXT: // MIs[0] Operand 2 +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_RecordInsn, /*DefineMI*/1, /*MI*/0, /*OpIdx*/2, // MIs[1] +// CHECK-NEXT: GIM_CheckNumOperands, /*MI*/1, /*Expected*/3, +// CHECK-NEXT: GIM_CheckOpcode, /*MI*/1, TargetOpcode::G_OR, +// CHECK-NEXT: // MIs[1] Operand 0 +// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/0, /*Type*/GILLT_s32, +// CHECK-NEXT: // MIs[1] src0 +// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/1, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/1, /*Op*/1, /*RC*/Test::DRegsRegClassID, +// CHECK-NEXT: // MIs[1] src1 +// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/2, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/1, /*Op*/2, /*RC*/Test::DRegsRegClassID, +// CHECK-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_and_or_pat, +// CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/1, +// CHECK-NEXT: // (and:{ *:[i32] } DOP:{ *:[i32] }:$src2, (or:{ *:[i32] } DOP:{ *:[i32] }:$src0, DOP:{ *:[i32] }:$src1))<> => (AND_OR:{ *:[i32] } DOP:{ *:[i32] }:$src0, DOP:{ *:[i32] }:$src1, DOP:{ *:[i32] }:$src2) +// CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::AND_OR, + +// Test commutative, standalone pattern. +def : Pat< + (i32 (and_or_pat DOP:$src0, DOP:$src1, DOP:$src2)), + (AND_OR DOP:$src0, DOP:$src1, DOP:$src2) +>; + + +def sub3_pat : PatFrag< + (ops node:$x, node:$y, node:$z), + (sub (sub node:$x, node:$y), node:$z), [{ return foo(); }]> { + let GISelPredicateCode = [{ + return doesComplexCheck(MI); + }]; +} + +// CHECK: GIM_Try, /*On fail goto*//*Label 2*/ {{[0-9]+}}, // Rule ID 0 // +// CHECK-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/3, +// CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_SUB, +// CHECK-NEXT: // MIs[0] dst +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/Test::DRegsRegClassID, +// CHECK-NEXT: // MIs[0] Operand 1 +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_RecordInsn, /*DefineMI*/1, /*MI*/0, /*OpIdx*/1, // MIs[1] +// CHECK-NEXT: GIM_CheckNumOperands, /*MI*/1, /*Expected*/3, +// CHECK-NEXT: GIM_CheckOpcode, /*MI*/1, TargetOpcode::G_SUB, +// CHECK-NEXT: // MIs[1] Operand 0 +// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/0, /*Type*/GILLT_s32, +// CHECK-NEXT: // MIs[1] src0 +// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/1, /*Type*/GILLT_s32, +// CHECK-NEXT: // MIs[1] src1 +// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/2, /*Type*/GILLT_s32, +// CHECK-NEXT: // MIs[0] src2 +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_sub3_pat, +// CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/1, +// CHECK-NEXT: // (sub:{ *:[i32] } (sub:{ *:[i32] } i32:{ *:[i32] }:$src0, i32:{ *:[i32] }:$src1), i32:{ *:[i32] }:$src2)<> => (SUB3:{ *:[i32] } i32:{ *:[i32] }:$src0, i32:{ *:[i32] }:$src1, i32:{ *:[i32] }:$src2) +// CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SUB3, + +// Test a non-commutative pattern. +def SUB3 : I<(outs DRegs:$dst), + (ins DOP:$src0, DOP:$src1, DOP:$src2), + [(set DRegs:$dst, (sub3_pat i32:$src0, i32:$src1, i32:$src2))] +>; diff --git a/llvm/test/TableGen/GlobalISelEmitterOverloadedPtr.td b/llvm/test/TableGen/GlobalISelEmitterOverloadedPtr.td index 03adab086da77..047af1ca0524f 100644 --- a/llvm/test/TableGen/GlobalISelEmitterOverloadedPtr.td +++ b/llvm/test/TableGen/GlobalISelEmitterOverloadedPtr.td @@ -11,11 +11,13 @@ let TargetPrefix = "mytarget" in { // Ensure that llvm_anyptr_ty on an intrinsic results in a // GIM_CheckPointerToAny rather than a GIM_CheckType. // -// CHECK: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_frag_anyptr, +// CHECK: GIM_CheckIntrinsicID, /*MI*/0, /*Op*/1, Intrinsic::mytarget_anyptr, +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32, // CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/MyTarget::GPR32RegClassID, // CHECK-NEXT: // MIs[0] src // CHECK-NEXT: GIM_CheckPointerToAny, /*MI*/0, /*Op*/2, /*SizeInBits*/32, // CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/2, /*RC*/MyTarget::GPR32RegClassID, +// CHECK-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_frag_anyptr, // CHECK-NEXT: // (intrinsic_w_chain:{ *:[i32] } {{[0-9]+}}:{ *:[iPTR] }, GPR32:{ *:[i32] }:$src)<> => (ANYLOAD:{ *:[i32] } GPR32:{ *:[i32] }:$src) // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::ANYLOAD, let hasSideEffects = 1 in { diff --git a/llvm/test/TableGen/GlobalISelEmitterRegSequence.td b/llvm/test/TableGen/GlobalISelEmitterRegSequence.td index 6556bc3cdf29d..1b7391497f125 100644 --- a/llvm/test/TableGen/GlobalISelEmitterRegSequence.td +++ b/llvm/test/TableGen/GlobalISelEmitterRegSequence.td @@ -56,9 +56,9 @@ def SUBSOME_INSN : I<(outs SRegs:$dst), (ins SOP:$src), []>; // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/1, /*TempRegFlags*/0, // CHECK-NEXT: GIR_AddImm, /*InsnID*/0, /*SubRegIndex*/2, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC DRegs*/1, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, /*RC SRegs*/0, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/3, /*RC SRegs*/0, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, Test::DRegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, Test::SRegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/3, Test::SRegsRegClassID, def : Pat<(i32 (sext SOP:$src)), (REG_SEQUENCE DRegs, (SUBSOME_INSN SOP:$src), sub0, (SUBSOME_INSN SOP:$src), sub1)>; @@ -71,9 +71,9 @@ def : Pat<(i32 (sext SOP:$src)), // CHECK-NEXT: GIR_AddImm, /*InsnID*/1, /*SubRegIndex*/1, // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/2, /*TempRegFlags*/0, // CHECK-NEXT: GIR_AddImm, /*InsnID*/1, /*SubRegIndex*/2, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, /*RC DRegs*/1, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, /*RC SRegs*/0, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/3, /*RC SRegs*/0, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, Test::DRegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, Test::SRegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/3, Test::SRegsRegClassID, // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SOME_INSN, // Make sure operands are constrained when REG_SEQUENCE isn't the root instruction. def : Pat<(i32 (zext SOP:$src)), diff --git a/llvm/test/TableGen/GlobalISelEmitterSubreg.td b/llvm/test/TableGen/GlobalISelEmitterSubreg.td index aae996e8e2242..e8dc4a9ac4a07 100644 --- a/llvm/test/TableGen/GlobalISelEmitterSubreg.td +++ b/llvm/test/TableGen/GlobalISelEmitterSubreg.td @@ -57,9 +57,9 @@ def : Pat<(i32 (anyext i16:$src)), (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SOP:$src // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/1, // src // CHECK-NEXT: GIR_AddImm, /*InsnID*/0, /*Imm*/1, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC DRegs*/1, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, /*RC DRegs*/1, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, /*RC SRegs*/0, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, Test::DRegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, Test::DRegsRegClassID +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, Test::SRegsRegClassID, // Test that we can import INSERT_SUBREG when it is a subinstruction of another @@ -76,9 +76,9 @@ def : Pat<(i32 (anyext i16:$src)), (SOME_INSN (INSERT_SUBREG (i32 (IMPLICIT_DEF) // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/1, /*TempRegFlags*/0, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/1, /*OldInsnID*/0, /*OpIdx*/1, // src // CHECK-NEXT: GIR_AddImm, /*InsnID*/1, /*Imm*/1, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, /*RC DRegs*/1, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, /*RC DRegs*/1, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/2, /*RC SRegs*/0, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, Test::DRegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, Test::DRegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/2, Test::SRegsRegClassID, // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SOME_INSN, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0, @@ -92,9 +92,9 @@ def : Pat<(i32 (anyext i16:$src)), (SOME_INSN (INSERT_SUBREG (i32 (IMPLICIT_DEF) def : Pat<(i32 (anyext i16:$src)), (INSERT_SUBREG (i32 (COPY_TO_REGCLASS SOP:$src, ERegs)), SOP:$src, sub0)>; // CHECK-LABEL: (anyext:{ *:[i32] } i16:{ *:[i16] }:$src) => (INSERT_SUBREG:{ *:[i32] } (COPY_TO_REGCLASS:{ *:[i32] } SOP:{ *:[i16] }:$src, ERegs:{ *:[i32] }), SOP:{ *:[i16] }:$src, sub0:{ *:[i32] }) // CHECK: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::INSERT_SUBREG, -// CHECK-DAG: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC ERegs*/2, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, /*RC ERegs*/2, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, /*RC SRegs*/0, +// CHECK-DAG: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, Test::ERegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, Test::ERegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, Test::SRegsRegClassID, // Test that we can import INSERT_SUBREG when its subregister source is defined // by a subinstruction. @@ -115,9 +115,9 @@ def : Pat<(i32 (anyext i16:$src)), (INSERT_SUBREG (i32 (IMPLICIT_DEF)), (SUBSOME // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/1, /*TempRegFlags*/0, // CHECK-NEXT: GIR_AddImm, /*InsnID*/0, /*Imm*/1, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC DRegs*/1, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, /*RC DRegs*/1, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, /*RC SRegs*/0, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, Test::DRegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, Test::DRegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, Test::SRegsRegClassID, // Test an EXTRACT_SUBREG that is a sub instruction. The individual // operands should be constrained to specific register classes, and @@ -129,8 +129,8 @@ def : Pat<(i16 (trunc (not DOP:$src))), // CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::COPY, // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/RegState::Define, // CHECK-NEXT: GIR_CopySubReg, /*NewInsnID*/1, /*OldInsnID*/1, /*OpIdx*/1, /*SubRegIdx*/1, // src -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, /*RC SRegs*/0, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, /*RC DRegs*/1, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, Test::SRegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, Test::DRegsRegClassID, // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SUBSOME_INSN, // Test an extract from an output instruction result (nonleaf) @@ -150,8 +150,8 @@ def : Pat<(i16 (trunc (bitreverse DOP:$src))), // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst // CHECK-NEXT: GIR_AddTempSubRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0, sub0, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC SRegs*/0, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, /*RC DRegs*/1, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, Test::SRegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, Test::DRegsRegClassID, // EXTRACT_SUBREG is subinstruction, but also doesn't have a leaf input @@ -169,8 +169,8 @@ def : Pat<(i16 (trunc (bitreverse DOP:$src))), // CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::COPY, // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/RegState::Define, // CHECK-NEXT: GIR_AddTempSubRegister, /*InsnID*/1, /*TempRegID*/1, /*TempRegFlags*/0, sub0, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, /*RC SRegs*/0, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, /*RC DRegs*/1, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, Test::SRegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, Test::DRegsRegClassID, // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SUBSOME_INSN2, // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0, @@ -187,8 +187,8 @@ def : Pat<(i16 (trunc DOP:$src)), // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst // CHECK-NEXT: GIR_CopySubReg, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/1, /*SubRegIdx*/1, // src // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC SRegs*/0, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, /*RC DRegs*/1, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, Test::SRegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, Test::DRegsRegClassID, // Test that we can import SUBREG_TO_REG @@ -206,5 +206,5 @@ def : Pat<(i32 (zext SOP:$src)), // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0, // CHECK-NEXT: GIR_AddImm, /*InsnID*/0, /*Imm*/1, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC DRegs*/1, -// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, /*RC SRegs*/0, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, Test::DRegsRegClassID, +// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, Test::SRegsRegClassID, diff --git a/llvm/test/TableGen/RegisterClass.td b/llvm/test/TableGen/RegisterClass.td new file mode 100644 index 0000000000000..d81c2df453092 --- /dev/null +++ b/llvm/test/TableGen/RegisterClass.td @@ -0,0 +1,7 @@ +// RUN: not llvm-tblgen -gen-register-bank -I %p/../../include %s 2>&1 | FileCheck %s + +include "llvm/Target/Target.td" + +def MyTarget : Target; +def R0 : Register<"r0">; +def ClassA : RegisterClass<"MyTarget", [], 32, (add R0)>; // CHECK: [[@LINE]]:1: error: RegTypes list must not be empty! diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td index 8b3cc8702bd49..49df4e67b53dc 100644 --- a/llvm/test/TableGen/directive1.td +++ b/llvm/test/TableGen/directive1.td @@ -1,5 +1,6 @@ // RUN: llvm-tblgen -gen-directive-decl -I %p/../../include %s | FileCheck -match-full-lines %s // RUN: llvm-tblgen -gen-directive-impl -I %p/../../include %s | FileCheck -match-full-lines %s -check-prefix=IMPL +// RUN: llvm-tblgen -gen-directive-gen -I %p/../../include %s | FileCheck -match-full-lines %s -check-prefix=GEN include "llvm/Frontend/Directive/DirectiveBase.td" @@ -74,6 +75,7 @@ def TDL_DirA : Directive<"dira"> { // IMPL: #include "llvm/ADT/StringRef.h" // IMPL-NEXT: #include "llvm/ADT/StringSwitch.h" +// IMPL-NEXT: #include "llvm/Support/ErrorHandling.h" // IMPL-EMPTY: // IMPL-NEXT: using namespace llvm; // IMPL-NEXT: using namespace tdl; @@ -126,3 +128,57 @@ def TDL_DirA : Directive<"dira"> { // IMPL-NEXT: } // IMPL-NEXT: llvm_unreachable("Invalid Tdl Directive kind"); // IMPL-NEXT: } +// IMPL-EMPTY: + + + +// GEN: #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS +// GEN-NEXT: #undef GEN_FLANG_DIRECTIVE_CLAUSE_SETS +// GEN-EMPTY: +// GEN-NEXT: namespace llvm { +// GEN-NEXT: namespace tdl { +// GEN-EMPTY: +// GEN-NEXT: // Sets for dira +// GEN-EMPTY: +// GEN-NEXT: static allowedClauses_TDLD_dira { +// GEN-NEXT: llvm::tdl::Clause::TDLC_clausea, +// GEN-NEXT: llvm::tdl::Clause::TDLC_clauseb, +// GEN-NEXT: }; +// GEN-EMPTY: +// GEN-NEXT: static allowedOnceClauses_TDLD_dira { +// GEN-NEXT: }; +// GEN-EMPTY: +// GEN-NEXT: static allowedExclusiveClauses_TDLD_dira { +// GEN-NEXT: }; +// GEN-EMPTY: +// GEN-NEXT: static requiredClauses_TDLD_dira { +// GEN-NEXT: }; +// GEN-NEXT: } // namespace tdl +// GEN-NEXT: } // namespace llvm +// GEN-EMPTY: +// GEN-NEXT: #endif // GEN_FLANG_DIRECTIVE_CLAUSE_SETS +// GEN-EMPTY: +// GEN-NEXT: #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_MAP +// GEN-NEXT: #undef GEN_FLANG_DIRECTIVE_CLAUSE_MAP +// GEN-EMPTY: +// GEN-NEXT: struct TdlDirectiveClauses { +// GEN-NEXT: const allowed; +// GEN-NEXT: const allowedOnce; +// GEN-NEXT: const allowedExclusive; +// GEN-NEXT: const requiredOneOf; +// GEN-NEXT: }; +// GEN-EMPTY: +// GEN-NEXT: std::unordered_map +// GEN-NEXT: directiveClausesTable = { +// GEN-NEXT: {llvm::tdl::Directive::TDLD_dira, +// GEN-NEXT: { +// GEN-NEXT: llvm::tdl::allowedClauses_TDLD_dira, +// GEN-NEXT: llvm::tdl::allowedOnceClauses_TDLD_dira, +// GEN-NEXT: llvm::tdl::allowedExclusiveClauses_TDLD_dira, +// GEN-NEXT: llvm::tdl::requiredClauses_TDLD_dira, +// GEN-NEXT: } +// GEN-NEXT: }, +// GEN-NEXT: }; +// GEN-EMPTY: +// GEN-NEXT: #endif // GEN_FLANG_DIRECTIVE_CLAUSE_MAP + diff --git a/llvm/test/TableGen/directive2.td b/llvm/test/TableGen/directive2.td index 06c7aabcf3adc..e585e11496ef5 100644 --- a/llvm/test/TableGen/directive2.td +++ b/llvm/test/TableGen/directive2.td @@ -1,5 +1,6 @@ // RUN: llvm-tblgen -gen-directive-decl -I %p/../../include %s | FileCheck -match-full-lines %s // RUN: llvm-tblgen -gen-directive-impl -I %p/../../include %s | FileCheck -match-full-lines %s -check-prefix=IMPL +// RUN: llvm-tblgen -gen-directive-gen -I %p/../../include %s | FileCheck -match-full-lines %s -check-prefix=GEN include "llvm/Frontend/Directive/DirectiveBase.td" @@ -67,11 +68,12 @@ def TDL_DirA : Directive<"dira"> { // IMPL-EMPTY: // IMPL-NEXT: #include "llvm/ADT/StringRef.h" // IMPL-NEXT: #include "llvm/ADT/StringSwitch.h" +// IMPL-NEXT: #include "llvm/Support/ErrorHandling.h" // IMPL-EMPTY: // IMPL-NEXT: using namespace llvm; // IMPL-NEXT: using namespace tdl; // IMPL-EMPTY: -// IMPL: Directive llvm::tdl::getTdlDirectiveKind(llvm::StringRef Str) { +// IMPL-NEXT: Directive llvm::tdl::getTdlDirectiveKind(llvm::StringRef Str) { // IMPL-NEXT: return llvm::StringSwitch(Str) // IMPL-NEXT: .Case("dira",TDLD_dira) // IMPL-NEXT: .Default(TDLD_dira); @@ -119,3 +121,54 @@ def TDL_DirA : Directive<"dira"> { // IMPL-NEXT: } // IMPL-NEXT: llvm_unreachable("Invalid Tdl Directive kind"); // IMPL-NEXT: } + + +// GEN: #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS +// GEN-NEXT: #undef GEN_FLANG_DIRECTIVE_CLAUSE_SETS +// GEN-EMPTY: +// GEN-NEXT: namespace llvm { +// GEN-NEXT: namespace tdl { +// GEN-EMPTY: +// GEN-NEXT: // Sets for dira +// GEN-EMPTY: +// GEN-NEXT: static allowedClauses_TDLD_dira { +// GEN-NEXT: llvm::tdl::Clause::TDLC_clausea, +// GEN-NEXT: llvm::tdl::Clause::TDLC_clauseb, +// GEN-NEXT: }; +// GEN-EMPTY: +// GEN-NEXT: static allowedOnceClauses_TDLD_dira { +// GEN-NEXT: }; +// GEN-EMPTY: +// GEN-NEXT: static allowedExclusiveClauses_TDLD_dira { +// GEN-NEXT: }; +// GEN-EMPTY: +// GEN-NEXT: static requiredClauses_TDLD_dira { +// GEN-NEXT: }; +// GEN-NEXT: } // namespace tdl +// GEN-NEXT: } // namespace llvm +// GEN-EMPTY: +// GEN-NEXT: #endif // GEN_FLANG_DIRECTIVE_CLAUSE_SETS +// GEN-EMPTY: +// GEN-NEXT: #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_MAP +// GEN-NEXT: #undef GEN_FLANG_DIRECTIVE_CLAUSE_MAP +// GEN-EMPTY: +// GEN-NEXT: struct TdlDirectiveClauses { +// GEN-NEXT: const allowed; +// GEN-NEXT: const allowedOnce; +// GEN-NEXT: const allowedExclusive; +// GEN-NEXT: const requiredOneOf; +// GEN-NEXT: }; +// GEN-EMPTY: +// GEN-NEXT: std::unordered_map +// GEN-NEXT: directiveClausesTable = { +// GEN-NEXT: {llvm::tdl::Directive::TDLD_dira, +// GEN-NEXT: { +// GEN-NEXT: llvm::tdl::allowedClauses_TDLD_dira, +// GEN-NEXT: llvm::tdl::allowedOnceClauses_TDLD_dira, +// GEN-NEXT: llvm::tdl::allowedExclusiveClauses_TDLD_dira, +// GEN-NEXT: llvm::tdl::requiredClauses_TDLD_dira, +// GEN-NEXT: } +// GEN-NEXT: }, +// GEN-NEXT: }; +// GEN-EMPTY: +// GEN-NEXT: #endif // GEN_FLANG_DIRECTIVE_CLAUSE_MAP diff --git a/llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll b/llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll new file mode 100644 index 0000000000000..c44acd32f18c6 --- /dev/null +++ b/llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll @@ -0,0 +1,68 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { i32 (...)** } + +@_ZTV1B = linkonce_odr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1nEi to i8*)] }, !type !0 + +$test = comdat any +$testb = comdat any + +define linkonce_odr i32 @test(%struct.A* %obj, i32 %a) comdat { +entry: + %0 = bitcast %struct.A* %obj to i8** + %vtable5 = load i8*, i8** %0 + + %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 8, metadata !"_ZTS1A") + %2 = extractvalue { i8*, i1 } %1, 1 + br i1 %2, label %cont, label %trap + +trap: + tail call void @llvm.trap() + unreachable + +cont: + %3 = extractvalue { i8*, i1 } %1, 0 + %4 = bitcast i8* %3 to i32 (%struct.A*, i32)* + + %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a) + + ret i32 %call +} + +define linkonce_odr i32 @testb(%struct.A* %obj, i32 %a) comdat { +entry: + %0 = bitcast %struct.A* %obj to i8** + %vtable5 = load i8*, i8** %0 + + %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 0, metadata !"_ZTS1A") + %2 = extractvalue { i8*, i1 } %1, 1 + br i1 %2, label %cont, label %trap + +trap: + tail call void @llvm.trap() + unreachable + +cont: + %3 = extractvalue { i8*, i1 } %1, 0 + %4 = bitcast i8* %3 to i32 (%struct.A*, i32)* + + %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a) + + ret i32 %call +} + +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) +declare void @llvm.trap() + +define internal i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) { +entry: + ret i32 0 +} +define internal i32 @_ZN1B1nEi(%struct.B* %this, i32 %a) { +entry: + ret i32 0 +} + +!0 = !{i64 16, !"_ZTS1B"} diff --git a/llvm/test/ThinLTO/X86/cfi-unsat.ll b/llvm/test/ThinLTO/X86/cfi-unsat.ll new file mode 100644 index 0000000000000..c22ba8f7ad2b3 --- /dev/null +++ b/llvm/test/ThinLTO/X86/cfi-unsat.ll @@ -0,0 +1,82 @@ +; REQUIRES: x86-registered-target + +; Test CFI devirtualization through the thin link and backend when +; a type id is Unsat (not used on any global's type metadata). +; +; In this test case, the first module is split and will import a resolution +; for its type test. The resolution would be exported by the second +; module, which is set up so that it does not get split (treated as regular +; LTO because it does not have any external globals from which to create +; a unique module ID). We should not actually get any resolution for the +; type id in this case, since no globals include it in their type metadata, +; so the resolution is Unsat and the type.checked.load instructions are +; converted to type tests that evaluate to false. + +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1.o %p/Inputs/cfi-unsat.ll + +; RUN: llvm-lto2 run %t.o %t1.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -o %t3 \ +; RUN: -r=%t.o,test2,px \ +; RUN: -r=%t1.o,_ZTV1B,px \ +; RUN: -r=%t1.o,test,px \ +; RUN: -r=%t1.o,testb,px +; RUN: llvm-dis %t3.index.bc -o - | FileCheck %s --check-prefix=INDEX +; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR0 +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1 + +; INDEX-NOT: "typeid:" + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } + +$test2 = comdat any + +define linkonce_odr i32 @test2(%struct.A* %obj, i32 %a) comdat { +entry: + %0 = bitcast %struct.A* %obj to i8** + %vtable5 = load i8*, i8** %0 + + %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 8, metadata !"_ZTS1A") + %2 = extractvalue { i8*, i1 } %1, 1 + br i1 %2, label %cont, label %trap + +trap: + tail call void @llvm.trap() + unreachable + +cont: + %3 = extractvalue { i8*, i1 } %1, 0 + %4 = bitcast i8* %3 to i32 (%struct.A*, i32)* + + %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a) + + ret i32 %call +} + +; CHECK-IR0: define weak_odr i32 @test +; CHECK-IR0-NEXT: entry: +; CHECK-IR0-NEXT: %0 = bitcast +; CHECK-IR0-NEXT: %vtable5 = +; CHECK-IR0-NEXT: tail call void @llvm.trap() +; CHECK-IR0-NEXT: unreachable +; CHECK-IR0-NEXT: } +; CHECK-IR0: define weak_odr i32 @testb +; CHECK-IR0-NEXT: entry: +; CHECK-IR0-NEXT: %0 = bitcast +; CHECK-IR0-NEXT: %vtable5 = +; CHECK-IR0-NEXT: tail call void @llvm.trap() +; CHECK-IR0-NEXT: unreachable +; CHECK-IR0-NEXT: } + +; CHECK-IR1: define weak_odr i32 @test2 +; CHECK-IR1-NEXT: entry: +; CHECK-IR1-NEXT: tail call void @llvm.trap() +; CHECK-IR1-NEXT: unreachable +; CHECK-IR1-NEXT: } + +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) +declare void @llvm.trap() diff --git a/llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll b/llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll index 611a424143ac8..3669db72fa002 100644 --- a/llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll +++ b/llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll @@ -33,6 +33,8 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-grtev4-linux-gnu" +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @g, i8* null }] + %struct.D = type { i32 (...)** } @_ZTV1D = internal constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3 @@ -57,6 +59,23 @@ entry: ; CHECK-IR-LABEL: ret i32 ; CHECK-IR-LABEL: } +; Function Attrs: inlinehint nounwind uwtable +define internal void @_ZN1DC2Ev(%struct.D* %this) unnamed_addr align 2 { +entry: + %this.addr = alloca %struct.D*, align 8 + store %struct.D* %this, %struct.D** %this.addr, align 8 + %this1 = load %struct.D*, %struct.D** %this.addr + %0 = bitcast %struct.D* %this1 to i32 (...)*** + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1D, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define internal void @g() section ".text.startup" { + %d = alloca %struct.D, align 8 + call void @_ZN1DC2Ev(%struct.D* %d) + ret void +} + declare i1 @llvm.type.test(i8*, metadata) declare void @llvm.assume(i1) diff --git a/llvm/test/ThinLTO/X86/type_test_noindircall.ll b/llvm/test/ThinLTO/X86/type_test_noindircall.ll new file mode 100644 index 0000000000000..3a2badcaea693 --- /dev/null +++ b/llvm/test/ThinLTO/X86/type_test_noindircall.ll @@ -0,0 +1,59 @@ +; Test to ensure that we correctly handle a type test not used for a virtual call. +; If it isn't removed correctly by WPD then we could incorrectly get an Unsat +; (resulting in an unreachable in the IR). + +; REQUIRES: x86-registered-target + +; RUN: opt -thinlto-bc -o %t.o %s + +; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436. +; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -verify-machineinstrs=0 \ +; RUN: -r=%t.o,_ZTVN12_GLOBAL__N_18RealFileE,px \ +; RUN: -o %t2 +; RUN: llvm-dis %t2.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Try again without LTO unit splitting. +; RUN: opt -thinlto-bc -thinlto-split-lto-unit=false -o %t3.o %s +; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -verify-machineinstrs=0 \ +; RUN: -r=%t.o,_ZTVN12_GLOBAL__N_18RealFileE,px \ +; RUN: -o %t4 +; RUN: llvm-dis %t4.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%"class.llvm::vfs::File" = type { i32 (...)** } +%"class.llvm::vfs::Status" = type <{ %"class.std::__cxx11::basic_string", %"class.llvm::sys::fs::UniqueID", %"struct.std::chrono::time_point", i32, i32, i64, i32, i32, i8, [7 x i8] }> +%"class.std::__cxx11::basic_string" = type { %"struct.std::__cxx11::basic_string, std::allocator >::_Alloc_hider", i64, %union.anon } +%"struct.std::__cxx11::basic_string, std::allocator >::_Alloc_hider" = type { i8* } +%union.anon = type { i64, [8 x i8] } +%"class.llvm::sys::fs::UniqueID" = type { i64, i64 } +%"struct.std::chrono::time_point" = type { %"struct.std::chrono::duration" } +%"struct.std::chrono::duration" = type { i64 } +%"class.(anonymous namespace)::RealFile" = type { %"class.llvm::vfs::File", i32, [4 x i8], %"class.llvm::vfs::Status", %"class.std::__cxx11::basic_string" } + +@_ZTVN12_GLOBAL__N_18RealFileE = unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%"class.(anonymous namespace)::RealFile"*)* @_ZN12_GLOBAL__N_18RealFileD2Ev to i8*)] }, align 8, !type !74 + +define internal void @_ZN12_GLOBAL__N_18RealFileD2Ev(%"class.(anonymous namespace)::RealFile"* %this) unnamed_addr #0 align 2 { +entry: +; CHECK-IR: %0 = getelementptr + %0 = getelementptr %"class.(anonymous namespace)::RealFile", %"class.(anonymous namespace)::RealFile"* %this, i64 0, i32 0, i32 0 +; CHECK-IR-NEXT: store + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN12_GLOBAL__N_18RealFileE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + %1 = tail call i1 @llvm.type.test(i8* bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN12_GLOBAL__N_18RealFileE, i64 0, inrange i32 0, i64 2) to i8*), metadata !"4$09c6cc733fc6accb91e5d7b87cb48f2d") + tail call void @llvm.assume(i1 %1) +; CHECK-IR-NEXT: ret void + ret void +} + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +; Make sure we don't inline or otherwise optimize out the direct calls. +attributes #0 = { noinline optnone } + +!74 = !{i64 16, !"4$09c6cc733fc6accb91e5d7b87cb48f2d"} diff --git a/llvm/test/Transforms/Attributor/allow_list.ll b/llvm/test/Transforms/Attributor/allow_list.ll new file mode 100644 index 0000000000000..7670090cb03b7 --- /dev/null +++ b/llvm/test/Transforms/Attributor/allow_list.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -S -passes=attributor --attributor-seed-allow-list asd < %s | FileCheck %s --check-prefixes=CHECK_DISABLED +; RUN: opt -S -passes=attributor --attributor-seed-allow-list AAValueSimplify < %s | FileCheck %s --check-prefixes=CHECK_ENABLED + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind uwtable +define internal i32 @range_test(i32 %a) #0 { +; CHECK_DISABLED-LABEL: define {{[^@]+}}@range_test +; CHECK_DISABLED-SAME: (i32 [[A:%.*]]) +; CHECK_DISABLED-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[A]], 100 +; CHECK_DISABLED-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK_DISABLED-NEXT: ret i32 [[TMP2]] +; + %1 = icmp sgt i32 %a, 100 + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; Function Attrs: nounwind uwtable +define i32 @range_use() #0 { +; CHECK_DISABLED-LABEL: define {{[^@]+}}@range_use() +; CHECK_DISABLED-NEXT: [[TMP1:%.*]] = call i32 @range_test(i32 123) +; CHECK_DISABLED-NEXT: ret i32 [[TMP1]] +; +; CHECK_ENABLED-LABEL: define {{[^@]+}}@range_use() +; CHECK_ENABLED-NEXT: ret i32 1 +; + %1 = call i32 @range_test(i32 123) + ret i32 %1 +} + +attributes #0 = { nounwind uwtable noinline } \ No newline at end of file diff --git a/llvm/test/Transforms/Attributor/range.ll b/llvm/test/Transforms/Attributor/range.ll index 03338b4ce4999..f105bb3fad0e1 100644 --- a/llvm/test/Transforms/Attributor/range.ll +++ b/llvm/test/Transforms/Attributor/range.ll @@ -1063,6 +1063,71 @@ end: } +define i32 @func(i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@func +; CHECK-SAME: (i1 [[C:%.*]]) +; CHECK-NEXT: [[RET:%.*]] = select i1 [[C]], i32 0, i32 1 +; CHECK-NEXT: ret i32 [[RET]] +; + %ret = select i1 %c, i32 0, i32 1 + ret i32 %ret +} + +define i32 @simplify_callsite_argument(i1 %d) { +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@simplify_callsite_argument +; IS__TUNIT_OPM-SAME: (i1 [[D:%.*]]) +; IS__TUNIT_OPM-NEXT: [[C:%.*]] = select i1 [[D]], i1 true, i1 false +; IS__TUNIT_OPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; IS__TUNIT_OPM: t: +; IS__TUNIT_OPM-NEXT: [[RET1:%.*]] = call i32 @func(i1 [[C]]) #2, !range !3 +; IS__TUNIT_OPM-NEXT: ret i32 [[RET1]] +; IS__TUNIT_OPM: f: +; IS__TUNIT_OPM-NEXT: [[RET2:%.*]] = call i32 @func(i1 false) #2, !range !3 +; IS__TUNIT_OPM-NEXT: ret i32 [[RET2]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@simplify_callsite_argument +; IS__TUNIT_NPM-SAME: (i1 [[D:%.*]]) +; IS__TUNIT_NPM-NEXT: [[C:%.*]] = select i1 [[D]], i1 true, i1 false +; IS__TUNIT_NPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; IS__TUNIT_NPM: t: +; IS__TUNIT_NPM-NEXT: [[RET1:%.*]] = call i32 @func(i1 true) #1, !range !4 +; IS__TUNIT_NPM-NEXT: ret i32 [[RET1]] +; IS__TUNIT_NPM: f: +; IS__TUNIT_NPM-NEXT: [[RET2:%.*]] = call i32 @func(i1 false) #1, !range !4 +; IS__TUNIT_NPM-NEXT: ret i32 [[RET2]] +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@simplify_callsite_argument +; IS__CGSCC_OPM-SAME: (i1 [[D:%.*]]) +; IS__CGSCC_OPM-NEXT: [[C:%.*]] = select i1 [[D]], i1 true, i1 false +; IS__CGSCC_OPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; IS__CGSCC_OPM: t: +; IS__CGSCC_OPM-NEXT: [[RET1:%.*]] = call i32 @func(i1 [[C]]) +; IS__CGSCC_OPM-NEXT: ret i32 [[RET1]] +; IS__CGSCC_OPM: f: +; IS__CGSCC_OPM-NEXT: [[RET2:%.*]] = call i32 @func(i1 false) +; IS__CGSCC_OPM-NEXT: ret i32 [[RET2]] +; +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@simplify_callsite_argument +; IS__CGSCC_NPM-SAME: (i1 [[D:%.*]]) +; IS__CGSCC_NPM-NEXT: [[C:%.*]] = select i1 [[D]], i1 true, i1 false +; IS__CGSCC_NPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; IS__CGSCC_NPM: t: +; IS__CGSCC_NPM-NEXT: [[RET1:%.*]] = call i32 @func(i1 true) +; IS__CGSCC_NPM-NEXT: ret i32 [[RET1]] +; IS__CGSCC_NPM: f: +; IS__CGSCC_NPM-NEXT: [[RET2:%.*]] = call i32 @func(i1 false) +; IS__CGSCC_NPM-NEXT: ret i32 [[RET2]] +; + %c = select i1 %d, i1 true, i1 false + br i1 %c, label %t, label %f +t: + %ret1 = call i32 @func(i1 %c) + ret i32 %ret1 +f: + %ret2 = call i32 @func(i1 false) + ret i32 %ret2 +} + !0 = !{i32 0, i32 10} !1 = !{i32 10, i32 100} diff --git a/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll b/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll index 117464904ceb4..4147720dcb459 100644 --- a/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll +++ b/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll @@ -74,8 +74,8 @@ declare void @dummy1(%struct.bitmap*, %struct.bitmap*, %struct.bitmap*, %struct. ;CHECK-LABEL: NextCond.split: ;CHECK: call void @dummy3() ;CheCK-LABEL: CallSiteBB: -;CHECK: %phi.call = phi i1 [ true, %NextCond.split ], [ false, %Top.split ] -;CHECK: call void @foo(i1 %phi.call) +;CHECK: [[NEG:%.*]] = xor i1 %tobool1, true +;CHECK: call void @foo(i1 [[NEG]]) define void @caller2(i1 %c, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, %struct.bitmap* %c_elt) { entry: br label %Top diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/dead-gep.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/dead-gep.ll new file mode 100644 index 0000000000000..a82cce01a29f5 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/dead-gep.ll @@ -0,0 +1,19 @@ +; RUN: opt -codegenprepare -S %s -o - | FileCheck %s +target triple = "thumbv7-apple-ios7.0.0" + + +%struct = type [1000 x i32] + +define void @test_dead_gep(%struct* %t0) { +; CHECK-LABEL: define void @test_dead_gep +; CHECK-NOT: getelementptr +; CHECK: %t16 = load i32, i32* undef +; CHECK: ret void + + %t12 = getelementptr inbounds %struct, %struct* %t0, i32 1, i32 500 + %t13 = load i32, i32* %t12, align 4 + %t14 = icmp eq i32 %t13, 2 + %t15 = select i1 %t14, i32* undef, i32* undef + %t16 = load i32, i32* %t15, align 4 + ret void +} diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll b/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll index b037bfaee7a21..8dfa09d477925 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll @@ -1,7 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -correlated-propagation -S | FileCheck %s -; CHECK-LABEL: @test0( +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + define void @test0(i32 %n) { +; CHECK-LABEL: @test0( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[J_0:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[DIV1:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[J_0]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[DIV1]] = udiv i32 [[J_0]], 2 +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; entry: br label %for.cond @@ -11,7 +26,6 @@ for.cond: ; preds = %for.body, %entry br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond -; CHECK: %div1 = udiv i32 %j.0, 2 %div = sdiv i32 %j.0, 2 br label %for.cond @@ -19,8 +33,20 @@ for.end: ; preds = %for.cond ret void } -; CHECK-LABEL: @test1( define void @test1(i32 %n) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[J_0:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[DIV:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[J_0]], -2 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[DIV]] = sdiv i32 [[J_0]], 2 +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; entry: br label %for.cond @@ -30,7 +56,6 @@ for.cond: ; preds = %for.body, %entry br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond -; CHECK: %div = sdiv i32 %j.0, 2 %div = sdiv i32 %j.0, 2 br label %for.cond @@ -38,14 +63,22 @@ for.end: ; preds = %for.cond ret void } -; CHECK-LABEL: @test2( define void @test2(i32 %n) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[DIV1:%.*]] = udiv i32 [[N]], 2 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; entry: %cmp = icmp sgt i32 %n, 1 br i1 %cmp, label %bb, label %exit bb: -; CHECK: %div1 = udiv i32 %n, 2 %div = sdiv i32 %n, 2 br label %exit @@ -57,14 +90,25 @@ exit: ; at the point of sdiv, we know that %a is always greater than 0, ; because of the guard before it, so we can transform it to udiv. declare void @llvm.experimental.guard(i1,...) -; CHECK-LABEL: @test4 define void @test4(i32 %n) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP:%.*]], label [[EXIT:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[DIV1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[COND:%.*]] = icmp sgt i32 [[A]], 4 +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND]]) [ "deopt"() ] +; CHECK-NEXT: [[DIV1]] = udiv i32 [[A]], 6 +; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; entry: %cmp = icmp sgt i32 %n, 0 br i1 %cmp, label %loop, label %exit loop: -; CHECK: udiv i32 %a, 6 %a = phi i32 [ %n, %entry ], [ %div, %loop ] %cond = icmp sgt i32 %a, 4 call void(i1,...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ] @@ -77,14 +121,26 @@ exit: ; same test as above with assume instead of guard. declare void @llvm.assume(i1) -; CHECK-LABEL: @test5 define void @test5(i32 %n) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP:%.*]], label [[EXIT:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[DIV1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[COND:%.*]] = icmp sgt i32 [[A]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[COND]]) +; CHECK-NEXT: [[DIV1]] = udiv i32 [[A]], 6 +; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp sgt i32 [[DIV1]], 8 +; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; entry: %cmp = icmp sgt i32 %n, 0 br i1 %cmp, label %loop, label %exit loop: -; CHECK: udiv i32 %a, 6 %a = phi i32 [ %n, %entry ], [ %div, %loop ] %cond = icmp sgt i32 %a, 4 call void @llvm.assume(i1 %cond) @@ -95,3 +151,106 @@ loop: exit: ret void } + +; Now, let's try various domain combinations for operands. + +define i32 @test6_pos_pos(i32 %x, i32 %y) { +; CHECK-LABEL: @test6_pos_pos( +; CHECK-NEXT: [[C0:%.*]] = icmp sge i32 [[X:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[C0]]) +; CHECK-NEXT: [[C1:%.*]] = icmp sge i32 [[Y:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[C1]]) +; CHECK-NEXT: [[DIV1:%.*]] = udiv i32 [[X]], [[Y]] +; CHECK-NEXT: ret i32 [[DIV1]] +; + %c0 = icmp sge i32 %x, 0 + call void @llvm.assume(i1 %c0) + %c1 = icmp sge i32 %y, 0 + call void @llvm.assume(i1 %c1) + + %div = sdiv i32 %x, %y + ret i32 %div +} +define i32 @test7_pos_neg(i32 %x, i32 %y) { +; CHECK-LABEL: @test7_pos_neg( +; CHECK-NEXT: [[C0:%.*]] = icmp sge i32 [[X:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[C0]]) +; CHECK-NEXT: [[C1:%.*]] = icmp sle i32 [[Y:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[C1]]) +; CHECK-NEXT: [[Y_NONNEG:%.*]] = sub i32 0, [[Y]] +; CHECK-NEXT: [[DIV1:%.*]] = udiv i32 [[X]], [[Y_NONNEG]] +; CHECK-NEXT: [[DIV1_NEG:%.*]] = sub i32 0, [[DIV1]] +; CHECK-NEXT: ret i32 [[DIV1_NEG]] +; + %c0 = icmp sge i32 %x, 0 + call void @llvm.assume(i1 %c0) + %c1 = icmp sle i32 %y, 0 + call void @llvm.assume(i1 %c1) + + %div = sdiv i32 %x, %y + ret i32 %div +} +define i32 @test8_neg_pos(i32 %x, i32 %y) { +; CHECK-LABEL: @test8_neg_pos( +; CHECK-NEXT: [[C0:%.*]] = icmp sle i32 [[X:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[C0]]) +; CHECK-NEXT: [[C1:%.*]] = icmp sge i32 [[Y:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[C1]]) +; CHECK-NEXT: [[X_NONNEG:%.*]] = sub i32 0, [[X]] +; CHECK-NEXT: [[DIV1:%.*]] = udiv i32 [[X_NONNEG]], [[Y]] +; CHECK-NEXT: [[DIV1_NEG:%.*]] = sub i32 0, [[DIV1]] +; CHECK-NEXT: ret i32 [[DIV1_NEG]] +; + %c0 = icmp sle i32 %x, 0 + call void @llvm.assume(i1 %c0) + %c1 = icmp sge i32 %y, 0 + call void @llvm.assume(i1 %c1) + + %div = sdiv i32 %x, %y + ret i32 %div +} +define i32 @test9_neg_neg(i32 %x, i32 %y) { +; CHECK-LABEL: @test9_neg_neg( +; CHECK-NEXT: [[C0:%.*]] = icmp sle i32 [[X:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[C0]]) +; CHECK-NEXT: [[C1:%.*]] = icmp sle i32 [[Y:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[C1]]) +; CHECK-NEXT: [[X_NONNEG:%.*]] = sub i32 0, [[X]] +; CHECK-NEXT: [[Y_NONNEG:%.*]] = sub i32 0, [[Y]] +; CHECK-NEXT: [[DIV1:%.*]] = udiv i32 [[X_NONNEG]], [[Y_NONNEG]] +; CHECK-NEXT: ret i32 [[DIV1]] +; + %c0 = icmp sle i32 %x, 0 + call void @llvm.assume(i1 %c0) + %c1 = icmp sle i32 %y, 0 + call void @llvm.assume(i1 %c1) + + %div = sdiv i32 %x, %y + ret i32 %div +} + +; After making division unsigned, can we narrow it? +define i32 @test10_narrow(i32 %x, i32 %y) { +; CHECK-LABEL: @test10_narrow( +; CHECK-NEXT: [[C0:%.*]] = icmp ult i32 [[X:%.*]], 128 +; CHECK-NEXT: call void @llvm.assume(i1 [[C0]]) +; CHECK-NEXT: [[C1:%.*]] = icmp ult i32 [[Y:%.*]], 128 +; CHECK-NEXT: call void @llvm.assume(i1 [[C1]]) +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: [[DIV1_LHS_TRUNC:%.*]] = trunc i32 [[X]] to i8 +; CHECK-NEXT: [[DIV1_RHS_TRUNC:%.*]] = trunc i32 [[Y]] to i8 +; CHECK-NEXT: [[DIV12:%.*]] = udiv i8 [[DIV1_LHS_TRUNC]], [[DIV1_RHS_TRUNC]] +; CHECK-NEXT: [[DIV1_ZEXT:%.*]] = zext i8 [[DIV12]] to i32 +; CHECK-NEXT: ret i32 [[DIV1_ZEXT]] +; + %c0 = icmp ult i32 %x, 128 + call void @llvm.assume(i1 %c0) + %c1 = icmp ult i32 %y, 128 + call void @llvm.assume(i1 %c1) + br label %end + +end: + %div = sdiv i32 %x, %y + ret i32 %div +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll index f4274a9e87f30..a4d3127d25f3d 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll @@ -4,29 +4,6 @@ ; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind -declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind -declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind -declare void @llvm.init.trampoline(i8*, i8*, i8*) - -; DSE should delete the dead trampoline. -declare void @test11f() -define void @test11() { -; CHECK-LABEL: @test11( -; CHECK-NEXT: ret void -; - %storage = alloca [10 x i8], align 16 ; <[10 x i8]*> [#uses=1] - %cast = getelementptr [10 x i8], [10 x i8]* %storage, i32 0, i32 0 ; [#uses=1] - call void @llvm.init.trampoline( i8* %cast, i8* bitcast (void ()* @test11f to i8*), i8* null ) ; [#uses=1] - ret void -} - - -declare noalias i8* @malloc(i32) - -declare void @unknown_func() - ; Remove redundant store if loaded value is in another block inside a loop. define i32 @test31(i1 %c, i32* %p, i32 %i) { ; CHECK-LABEL: @test31( diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll index ef785f10ffafc..0c83a750a6a88 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -181,6 +181,18 @@ define double @test10(i8* %X) { ret double %tmp.0 } +; DSE should delete the dead trampoline. +declare void @test11f() +define void @test11() { +; CHECK-LABEL: @test11( +; CHECK-NEXT: ret void +; + %storage = alloca [10 x i8], align 16 ; <[10 x i8]*> [#uses=1] + %cast = getelementptr [10 x i8], [10 x i8]* %storage, i32 0, i32 0 ; [#uses=1] + call void @llvm.init.trampoline( i8* %cast, i8* bitcast (void ()* @test11f to i8*), i8* null ) ; [#uses=1] + ret void +} + ; %P doesn't escape, the DEAD instructions should be removed. declare void @test13f() define i32* @test13() { diff --git a/llvm/test/Transforms/FixIrreducible/unreachable.ll b/llvm/test/Transforms/FixIrreducible/unreachable.ll new file mode 100644 index 0000000000000..71cd81e01953e --- /dev/null +++ b/llvm/test/Transforms/FixIrreducible/unreachable.ll @@ -0,0 +1,24 @@ +; RUN: opt %s -fix-irreducible -S -o - | FileCheck %s + +; CHECK-LABEL: @unreachable( +; CHECK: entry: +; CHECK-NOT: irr.guard: +define void @unreachable(i32 %n) { +entry: + br label %loop.body + +loop.body: + br label %inner.block + +unreachable.block: + br label %inner.block + +inner.block: + br i1 undef, label %loop.exit, label %loop.latch + +loop.latch: + br label %loop.body + +loop.exit: + ret void +} diff --git a/llvm/test/Transforms/GVN/non-integral-pointers.ll b/llvm/test/Transforms/GVN/non-integral-pointers.ll index 2a5414fbc07ce..a017dda926e3a 100644 --- a/llvm/test/Transforms/GVN/non-integral-pointers.ll +++ b/llvm/test/Transforms/GVN/non-integral-pointers.ll @@ -169,7 +169,14 @@ define i8 addrspace(4)* @forward_store_zero2(i8 addrspace(4)* addrspace(4)* %loc ret i8 addrspace(4)* %ref } + + @NonZeroConstant = constant <4 x i64> +@NonZeroConstant2 = constant <4 x i64 addrspace(4)*> < + i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3), + i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3), + i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3), + i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3)> @ZeroConstant = constant <4 x i64> zeroinitializer @@ -190,6 +197,54 @@ entry: ret i8 addrspace(4)* %ref } +define i64 addrspace(4)* @neg_forward_memcopy2(i64 addrspace(4)* addrspace(4)* %loc) { +; CHECK-LABEL: @neg_forward_memcopy2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 8, i1 false) +; CHECK-NEXT: [[REF:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* [[LOC]] +; CHECK-NEXT: ret i64 addrspace(4)* [[REF]] +; +entry: + %loc.bc = bitcast i64 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64>* @NonZeroConstant to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false) + %ref = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* %loc + ret i64 addrspace(4)* %ref +} + +; TODO: missed optimization +define i8 addrspace(4)* @forward_memcopy(i8 addrspace(4)* addrspace(4)* %loc) { +; CHECK-LABEL: @forward_memcopy( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 8, i1 false) +; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc +; CHECK-NEXT: ret i8 addrspace(4)* [[REF]] +; +entry: + %loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false) + %ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc + ret i8 addrspace(4)* %ref +} + +define i64 addrspace(4)* @forward_memcopy2(i64 addrspace(4)* addrspace(4)* %loc) { +; CHECK-LABEL: @forward_memcopy2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 8, i1 false) +; CHECK-NEXT: ret i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3) +; +entry: + %loc.bc = bitcast i64 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false) + %ref = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* %loc + ret i64 addrspace(4)* %ref +} + define <1 x i8 addrspace(4)*> @neg_forward_memcpy_vload(<1 x i8 addrspace(4)*> addrspace(4)* %loc) { ; CHECK-LABEL: @neg_forward_memcpy_vload( ; CHECK-NEXT: entry: @@ -206,16 +261,62 @@ entry: ret <1 x i8 addrspace(4)*> %ref } +define <4 x i64 addrspace(4)*> @neg_forward_memcpy_vload2(<4 x i64 addrspace(4)*> addrspace(4)* %loc) { +; CHECK-LABEL: @neg_forward_memcpy_vload2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 32, i1 false) +; CHECK-NEXT: [[REF:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* [[LOC]] +; CHECK-NEXT: ret <4 x i64 addrspace(4)*> [[REF]] +; +entry: + %loc.bc = bitcast <4 x i64 addrspace(4)*> addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64>* @NonZeroConstant to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false) + %ref = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* %loc + ret <4 x i64 addrspace(4)*> %ref +} + +define <4 x i64> @neg_forward_memcpy_vload3(<4 x i64> addrspace(4)* %loc) { +; CHECK-LABEL: @neg_forward_memcpy_vload3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false) +; CHECK-NEXT: [[REF:%.*]] = load <4 x i64>, <4 x i64> addrspace(4)* [[LOC]] +; CHECK-NEXT: ret <4 x i64> [[REF]] +; +entry: + %loc.bc = bitcast <4 x i64> addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false) + %ref = load <4 x i64>, <4 x i64> addrspace(4)* %loc + ret <4 x i64> %ref +} + +define <1 x i64 addrspace(4)*> @forward_memcpy_vload3(<4 x i64 addrspace(4)*> addrspace(4)* %loc) { +; CHECK-LABEL: @forward_memcpy_vload3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false) +; CHECK-NEXT: ret <1 x i64 addrspace(4)*> +; +entry: + %loc.bc = bitcast <4 x i64 addrspace(4)*> addrspace(4)* %loc to i8 addrspace(4)* + %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false) + %ref = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* %loc + %val = extractelement <4 x i64 addrspace(4)*> %ref, i32 0 + %ret = insertelement <1 x i64 addrspace(4)*> undef, i64 addrspace(4)* %val, i32 0 + ret <1 x i64 addrspace(4)*> %ret +} ; Can forward since we can do so w/o breaking types -; TODO: missed optimization define i8 addrspace(4)* @forward_memcpy_zero(i8 addrspace(4)* addrspace(4)* %loc) { ; CHECK-LABEL: @forward_memcpy_zero( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* ; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @ZeroConstant to i8*), i64 8, i1 false) -; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]], align 8 -; CHECK-NEXT: ret i8 addrspace(4)* [[REF]] +; CHECK-NEXT: ret i8 addrspace(4)* null ; entry: %loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)* diff --git a/llvm/test/Transforms/GlobalOpt/evaluate-call-errors.ll b/llvm/test/Transforms/GlobalOpt/evaluate-call-errors.ll index 88f7cbd8df1bd..84fe70b08b2ee 100644 --- a/llvm/test/Transforms/GlobalOpt/evaluate-call-errors.ll +++ b/llvm/test/Transforms/GlobalOpt/evaluate-call-errors.ll @@ -65,7 +65,7 @@ define linkonce_odr void @_ZN1SC2Ev(%struct.S*) unnamed_addr align 2 { } define internal %struct.Foo* @_ZL3foov() { - ret %struct.Foo* null + ret %struct.Foo* getelementptr (%struct.Foo, %struct.Foo *null, i32 1) } define linkonce_odr void @_ZN1QC2Ev(%struct.Q*) unnamed_addr align 2 { @@ -73,7 +73,7 @@ define linkonce_odr void @_ZN1QC2Ev(%struct.Q*) unnamed_addr align 2 { store %struct.Q* %0, %struct.Q** %2, align 8 %3 = load %struct.Q*, %struct.Q** %2, align 8 %4 = getelementptr inbounds %struct.Q, %struct.Q* %3, i32 0, i32 0 - %5 = call i32 bitcast (i32 (i32)* @_ZL3baz3Foo to i32 (%struct.Foo*)*)(%struct.Foo* null) + %5 = call i32 bitcast (i32 (i32)* @_ZL3baz3Foo to i32 (%struct.Foo*)*)(%struct.Foo* getelementptr (%struct.Foo, %struct.Foo *null, i32 1)) store i32 %5, i32* %4, align 4 ret void } diff --git a/llvm/test/Transforms/HardwareLoops/sibling-loops.ll b/llvm/test/Transforms/HardwareLoops/sibling-loops.ll new file mode 100644 index 0000000000000..e415e522da7b7 --- /dev/null +++ b/llvm/test/Transforms/HardwareLoops/sibling-loops.ll @@ -0,0 +1,94 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEC + +define arm_aapcs_vfpcc void @test(i16* noalias nocapture readonly %off, i16* noalias nocapture %data, i16* noalias nocapture %dst, i32 %n) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP252:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP252]], label [[FOR_COND1_PREHEADER_US:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond1.preheader.us: +; CHECK-NEXT: [[I_057_US:%.*]] = phi i32 [ [[INC29_US:%.*]], [[FOR_COND_CLEANUP14_US:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[MUL_US:%.*]] = mul i32 [[I_057_US]], [[N]] +; CHECK-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[N]]) +; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] +; CHECK: for.body4.us: +; CHECK-NEXT: [[J_053_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY4_US]] ] +; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i16, i16* [[OFF:%.*]], i32 [[J_053_US]] +; CHECK-NEXT: [[L2:%.*]] = load i16, i16* [[ARRAYIDX_US]], align 2 +; CHECK-NEXT: [[ARRAYIDX5_US:%.*]] = getelementptr inbounds i16, i16* [[DATA:%.*]], i32 [[J_053_US]] +; CHECK-NEXT: [[L3:%.*]] = load i16, i16* [[ARRAYIDX5_US]], align 2 +; CHECK-NEXT: [[ADD_US:%.*]] = add i16 [[L3]], [[L2]] +; CHECK-NEXT: [[ADD8_US:%.*]] = add i32 [[J_053_US]], [[MUL_US]] +; CHECK-NEXT: [[ARRAYIDX9_US:%.*]] = getelementptr inbounds i16, i16* [[DATA]], i32 [[ADD8_US]] +; CHECK-NEXT: store i16 [[ADD_US]], i16* [[ARRAYIDX9_US]], align 2 +; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[J_053_US]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) +; CHECK-NEXT: br i1 [[TMP0]], label [[FOR_BODY4_US]], label [[FOR_BODY15_US_PREHEADER:%.*]] +; CHECK: for.body15.us.preheader: +; CHECK-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[N]]) +; CHECK-NEXT: br label [[FOR_BODY15_US:%.*]] +; CHECK: for.body15.us: +; CHECK-NEXT: [[J10_055_US:%.*]] = phi i32 [ [[INC26_US:%.*]], [[FOR_BODY15_US]] ], [ 0, [[FOR_BODY15_US_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX16_US:%.*]] = getelementptr inbounds i16, i16* [[OFF]], i32 [[J10_055_US]] +; CHECK-NEXT: [[L0:%.*]] = load i16, i16* [[ARRAYIDX16_US]], align 2 +; CHECK-NEXT: [[ARRAYIDX18_US:%.*]] = getelementptr inbounds i16, i16* [[DATA]], i32 [[J10_055_US]] +; CHECK-NEXT: [[L1:%.*]] = load i16, i16* [[ARRAYIDX18_US]], align 2 +; CHECK-NEXT: [[ADD20_US:%.*]] = add i16 [[L1]], [[L0]] +; CHECK-NEXT: [[ADD23_US:%.*]] = add i32 [[J10_055_US]], [[MUL_US]] +; CHECK-NEXT: [[ARRAYIDX24_US:%.*]] = getelementptr inbounds i16, i16* [[DST:%.*]], i32 [[ADD23_US]] +; CHECK-NEXT: store i16 [[ADD20_US]], i16* [[ARRAYIDX24_US]], align 2 +; CHECK-NEXT: [[INC26_US]] = add nuw nsw i32 [[J10_055_US]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) +; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY15_US]], label [[FOR_COND_CLEANUP14_US]] +; CHECK: for.cond.cleanup14.us: +; CHECK-NEXT: [[INC29_US]] = add nuw i32 [[I_057_US]], 1 +; CHECK-NEXT: [[EXITCOND94:%.*]] = icmp eq i32 [[INC29_US]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND94]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; +entry: + %cmp252 = icmp sgt i32 %n, 0 + br i1 %cmp252, label %for.cond1.preheader.us, label %for.cond.cleanup + +for.cond1.preheader.us: ; preds = %entry, %for.cond.cleanup14.us + %i.057.us = phi i32 [ %inc29.us, %for.cond.cleanup14.us ], [ 0, %entry ] + %mul.us = mul i32 %i.057.us, %n + br label %for.body4.us + +for.body4.us: ; preds = %for.body4.us, %for.cond1.preheader.us + %j.053.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc.us, %for.body4.us ] + %arrayidx.us = getelementptr inbounds i16, i16* %off, i32 %j.053.us + %l2 = load i16, i16* %arrayidx.us, align 2 + %arrayidx5.us = getelementptr inbounds i16, i16* %data, i32 %j.053.us + %l3 = load i16, i16* %arrayidx5.us, align 2 + %add.us = add i16 %l3, %l2 + %add8.us = add i32 %j.053.us, %mul.us + %arrayidx9.us = getelementptr inbounds i16, i16* %data, i32 %add8.us + store i16 %add.us, i16* %arrayidx9.us, align 2 + %inc.us = add nuw nsw i32 %j.053.us, 1 + %exitcond = icmp eq i32 %inc.us, %n + br i1 %exitcond, label %for.body15.us, label %for.body4.us + +for.body15.us: ; preds = %for.body4.us, %for.body15.us + %j10.055.us = phi i32 [ %inc26.us, %for.body15.us ], [ 0, %for.body4.us ] + %arrayidx16.us = getelementptr inbounds i16, i16* %off, i32 %j10.055.us + %l0 = load i16, i16* %arrayidx16.us, align 2 + %arrayidx18.us = getelementptr inbounds i16, i16* %data, i32 %j10.055.us + %l1 = load i16, i16* %arrayidx18.us, align 2 + %add20.us = add i16 %l1, %l0 + %add23.us = add i32 %j10.055.us, %mul.us + %arrayidx24.us = getelementptr inbounds i16, i16* %dst, i32 %add23.us + store i16 %add20.us, i16* %arrayidx24.us, align 2 + %inc26.us = add nuw nsw i32 %j10.055.us, 1 + %exitcond93 = icmp eq i32 %inc26.us, %n + br i1 %exitcond93, label %for.cond.cleanup14.us, label %for.body15.us + +for.cond.cleanup14.us: ; preds = %for.body15.us + %inc29.us = add nuw i32 %i.057.us, 1 + %exitcond94 = icmp eq i32 %inc29.us, %n + br i1 %exitcond94, label %for.cond.cleanup, label %for.cond1.preheader.us + +for.cond.cleanup: ; preds = %for.cond.cleanup14.us, %entry + ret void +} diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll index 3acd21c739585..1fa4bdc1964e8 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll @@ -221,7 +221,7 @@ define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_flat_null } ; CHECK-LABEL: @store_select_group_global_mismatch_undef_undef_constexpr( -; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* undef), align 4 +; CHECK: store i32 7, i32 addrspace(3)* null define amdgpu_kernel void @store_select_group_global_mismatch_undef_undef_constexpr() #0 { store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* undef to i32*)), align 4 ret void diff --git a/llvm/test/Transforms/Inline/ML/Inputs/size-estimator.ll b/llvm/test/Transforms/Inline/ML/Inputs/size-estimator.ll new file mode 100644 index 0000000000000..b13595f355fb4 --- /dev/null +++ b/llvm/test/Transforms/Inline/ML/Inputs/size-estimator.ll @@ -0,0 +1,28 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +declare i32 @f1(i32) +declare i32 @f2(i32) + +define i32 @branches(i32) { + %cond = icmp slt i32 %0, 3 + br i1 %cond, label %then, label %else + +then: + %ret.1 = call i32 @f1(i32 %0) + br label %last.block + +else: + %ret.2 = call i32 @f2(i32 %0) + br label %last.block + +last.block: + %ret = phi i32 [%ret.1, %then], [%ret.2, %else] + ret i32 %ret +} + +define internal i32 @top() { + %1 = call i32 @branches(i32 2) + %2 = call i32 @f1(i32 %1) + ret i32 %2 +} \ No newline at end of file diff --git a/llvm/test/Transforms/Inline/ML/Inputs/test-module.ll b/llvm/test/Transforms/Inline/ML/Inputs/test-module.ll index b8279e5db6a02..d01f4bb301a86 100644 --- a/llvm/test/Transforms/Inline/ML/Inputs/test-module.ll +++ b/llvm/test/Transforms/Inline/ML/Inputs/test-module.ll @@ -61,4 +61,8 @@ define i32 @switcher(i32) { ;